From a14f791d8c4ec432f93f58eac9ce647226ed483e Mon Sep 17 00:00:00 2001
From: Marcin Wielgus <mwielgus@google.com>
Date: Mon, 1 Feb 2016 12:12:37 +0100
Subject: [PATCH] Revert "Merge pull request #20329 from
 kubernetes/revert-20323-bump-influxdb"

This reverts commit b4188ec459cd89b8cf2fe03daa6888ef9421b6c1, reversing
changes made to 28951bd66ad18098befc426bf0baef7019083af0.
---
 Godeps/Godeps.json                            |   45 +-
 Godeps/LICENSES.md                            |    4 +
 .../github.com/armon/go-metrics/.gitignore    |   22 +
 .../src/github.com/armon/go-metrics/LICENSE   |   20 +
 .../src/github.com/armon/go-metrics/README.md |   71 +
 .../github.com/armon/go-metrics/const_unix.go |   12 +
 .../armon/go-metrics/const_windows.go         |   13 +
 .../armon/go-metrics/datadog/dogstatsd.go     |  125 +
 .../src/github.com/armon/go-metrics/inmem.go  |  241 ++
 .../armon/go-metrics/inmem_signal.go          |  100 +
 .../github.com/armon/go-metrics/metrics.go    |  115 +
 .../armon/go-metrics/prometheus/prometheus.go |   88 +
 .../src/github.com/armon/go-metrics/sink.go   |   52 +
 .../src/github.com/armon/go-metrics/start.go  |   95 +
 .../src/github.com/armon/go-metrics/statsd.go |  154 +
 .../github.com/armon/go-metrics/statsite.go   |  142 +
 .../github.com/hashicorp/go-msgpack/LICENSE   |   25 +
 .../hashicorp/go-msgpack/codec/0doc.go        |  143 +
 .../hashicorp/go-msgpack/codec/README.md      |  174 +
 .../hashicorp/go-msgpack/codec/binc.go        |  786 +++++
 .../hashicorp/go-msgpack/codec/decode.go      | 1048 ++++++
 .../hashicorp/go-msgpack/codec/encode.go      | 1001 ++++++
 .../hashicorp/go-msgpack/codec/helper.go      |  589 ++++
 .../go-msgpack/codec/helper_internal.go       |  127 +
 .../hashicorp/go-msgpack/codec/msgpack.go     |  816 +++++
 .../go-msgpack/codec/msgpack_test.py          |  110 +
 .../hashicorp/go-msgpack/codec/rpc.go         |  152 +
 .../hashicorp/go-msgpack/codec/simple.go      |  461 +++
 .../hashicorp/go-msgpack/codec/time.go        |  193 ++
 .../github.com/hashicorp/raft-boltdb/LICENSE  |  362 ++
 .../hashicorp/raft-boltdb/README.md           |   11 +
 .../hashicorp/raft-boltdb/bolt_store.go       |  231 ++
 .../github.com/hashicorp/raft-boltdb/util.go  |   37 +
 .../src/github.com/hashicorp/raft/.gitignore  |   23 +
 .../src/github.com/hashicorp/raft/.travis.yml |   14 +
 .../src/github.com/hashicorp/raft/LICENSE     |  354 ++
 .../src/github.com/hashicorp/raft/Makefile    |   17 +
 .../src/github.com/hashicorp/raft/README.md   |   89 +
 .../github.com/hashicorp/raft/bench/bench.go  |  171 +
 .../src/github.com/hashicorp/raft/commands.go |   84 +
 .../src/github.com/hashicorp/raft/config.go   |  134 +
 .../hashicorp/raft/discard_snapshot.go        |   48 +
 .../hashicorp/raft/file_snapshot.go           |  470 +++
 .../src/github.com/hashicorp/raft/fsm.go      |   37 +
 .../src/github.com/hashicorp/raft/future.go   |  182 +
 .../src/github.com/hashicorp/raft/inflight.go |  213 ++
 .../github.com/hashicorp/raft/inmem_store.go  |  116 +
 .../hashicorp/raft/inmem_transport.go         |  315 ++
 .../src/github.com/hashicorp/raft/log.go      |   60 +
 .../github.com/hashicorp/raft/log_cache.go    |   79 +
 .../hashicorp/raft/net_transport.go           |  622 ++++
 .../src/github.com/hashicorp/raft/peer.go     |  122 +
 .../src/github.com/hashicorp/raft/raft.go     | 1887 +++++++++++
 .../github.com/hashicorp/raft/replication.go  |  517 +++
 .../src/github.com/hashicorp/raft/snapshot.go |   40 +
 .../src/github.com/hashicorp/raft/stable.go   |   15 +
 .../src/github.com/hashicorp/raft/state.go    |  169 +
 .../hashicorp/raft/tcp_transport.go           |  105 +
 .../github.com/hashicorp/raft/transport.go    |   85 +
 .../src/github.com/hashicorp/raft/util.go     |  200 ++
 .../src/github.com/influxdb/influxdb/LICENSE  |    2 +-
 .../influxdb/influxdb/_vendor/raft/LICENSE    |   20 -
 .../influxdb/influxdb/client/README.md        |  208 +-
 .../influxdb/client/examples/example.go       |  200 --
 .../influxdb/influxdb/client/influxdb.go      | 1073 +++---
 .../influxdb/influxdb/client/series.go        |   19 -
 .../influxdb/influxdb/client/shard_space.go   |   15 -
 .../influxdb/influxdb/influxql/INFLUXQL.md    |  650 ++++
 .../influxdb/influxdb/influxql/NOTES          |  682 ++++
 .../influxdb/influxdb/influxql/ast.go         | 3016 +++++++++++++++++
 .../influxdb/influxdb/influxql/doc.go         |   64 +
 .../influxdb/influxdb/influxql/functions.go   | 1115 ++++++
 .../influxdb/influxdb/influxql/parser.go      | 2238 ++++++++++++
 .../influxdb/influxdb/influxql/result.go      |  223 ++
 .../influxdb/influxdb/influxql/scanner.go     |  561 +++
 .../influxdb/influxdb/influxql/token.go       |  296 ++
 .../influxdb/influxdb/meta/config.go          |   52 +
 .../github.com/influxdb/influxdb/meta/data.go | 1055 ++++++
 .../influxdb/influxdb/meta/errors.go          |  116 +
 .../influxdb/meta/internal/meta.pb.go         | 1167 +++++++
 .../influxdb/meta/internal/meta.proto         |  257 ++
 .../influxdb/meta/statement_executor.go       |  280 ++
 .../influxdb/influxdb/meta/store.go           | 1876 ++++++++++
 .../influxdb/influxdb/snapshot/snapshot.go    |  529 +++
 .../github.com/influxdb/influxdb/toml/toml.go |   72 +
 .../influxdb/influxdb/tsdb/README.md          |   85 +
 .../influxdb/influxdb/tsdb/batcher.go         |  142 +
 .../influxdb/influxdb/tsdb/config.go          |   34 +
 .../github.com/influxdb/influxdb/tsdb/doc.go  |    5 +
 .../influxdb/influxdb/tsdb/engine.go          |  961 ++++++
 .../influxdb/tsdb/internal/meta.pb.go         |  123 +
 .../influxdb/tsdb/internal/meta.proto         |   27 +
 .../influxdb/influxdb/tsdb/mapper.go          |  751 ++++
 .../github.com/influxdb/influxdb/tsdb/meta.go | 1279 +++++++
 .../influxdb/influxdb/tsdb/monitor.go         |   83 +
 .../influxdb/influxdb/tsdb/points.go          | 1135 +++++++
 .../influxdb/influxdb/tsdb/query_executor.go  | 1031 ++++++
 .../influxdb/influxdb/tsdb/shard.go           | 1218 +++++++
 .../influxdb/influxdb/tsdb/snapshot_writer.go |  124 +
 .../influxdb/influxdb/tsdb/store.go           |  343 ++
 .../google/heapster-controller.yaml           |   30 +-
 .../heapster-controller-combined.yaml         |   32 +-
 .../influxdb/heapster-controller.yaml         |   26 +-
 .../influxdb/influxdb-grafana-controller.yaml |   11 +-
 .../standalone/heapster-controller.yaml       |   10 +-
 hack/jenkins/e2e.sh                           |    2 -
 .../admission/initialresources/influxdb.go    |   99 +-
 test/e2e/initial_resources.go                 |   10 +-
 test/e2e/monitoring.go                        |   89 +-
 109 files changed, 36182 insertions(+), 987 deletions(-)
 create mode 100644 Godeps/_workspace/src/github.com/armon/go-metrics/.gitignore
 create mode 100644 Godeps/_workspace/src/github.com/armon/go-metrics/LICENSE
 create mode 100644 Godeps/_workspace/src/github.com/armon/go-metrics/README.md
 create mode 100644 Godeps/_workspace/src/github.com/armon/go-metrics/const_unix.go
 create mode 100644 Godeps/_workspace/src/github.com/armon/go-metrics/const_windows.go
 create mode 100644 Godeps/_workspace/src/github.com/armon/go-metrics/datadog/dogstatsd.go
 create mode 100644 Godeps/_workspace/src/github.com/armon/go-metrics/inmem.go
 create mode 100644 Godeps/_workspace/src/github.com/armon/go-metrics/inmem_signal.go
 create mode 100644 Godeps/_workspace/src/github.com/armon/go-metrics/metrics.go
 create mode 100644 Godeps/_workspace/src/github.com/armon/go-metrics/prometheus/prometheus.go
 create mode 100644 Godeps/_workspace/src/github.com/armon/go-metrics/sink.go
 create mode 100644 Godeps/_workspace/src/github.com/armon/go-metrics/start.go
 create mode 100644 Godeps/_workspace/src/github.com/armon/go-metrics/statsd.go
 create mode 100644 Godeps/_workspace/src/github.com/armon/go-metrics/statsite.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/go-msgpack/LICENSE
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/0doc.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/README.md
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/binc.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/decode.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/encode.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/helper.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/helper_internal.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/msgpack.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/msgpack_test.py
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/rpc.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/simple.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/time.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft-boltdb/LICENSE
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft-boltdb/README.md
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft-boltdb/bolt_store.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft-boltdb/util.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/.gitignore
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/.travis.yml
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/LICENSE
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/Makefile
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/README.md
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/bench/bench.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/commands.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/config.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/discard_snapshot.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/file_snapshot.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/fsm.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/future.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/inflight.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/inmem_store.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/inmem_transport.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/log.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/log_cache.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/net_transport.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/peer.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/raft.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/replication.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/snapshot.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/stable.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/state.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/tcp_transport.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/transport.go
 create mode 100644 Godeps/_workspace/src/github.com/hashicorp/raft/util.go
 delete mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/_vendor/raft/LICENSE
 delete mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/client/examples/example.go
 delete mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/client/series.go
 delete mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/client/shard_space.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/INFLUXQL.md
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/NOTES
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/ast.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/doc.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/functions.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/parser.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/result.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/scanner.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/token.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/meta/config.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/meta/data.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/meta/errors.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/meta/internal/meta.pb.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/meta/internal/meta.proto
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/meta/statement_executor.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/meta/store.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/snapshot/snapshot.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/toml/toml.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/README.md
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/batcher.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/config.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/doc.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/internal/meta.pb.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/internal/meta.proto
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/mapper.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/meta.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/monitor.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/points.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/query_executor.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/shard.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/snapshot_writer.go
 create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/store.go

diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json
index 987910fe027..5847fcde6da 100644
--- a/Godeps/Godeps.json
+++ b/Godeps/Godeps.json
@@ -47,6 +47,10 @@
 			"Comment": "v0.7.4-6-g5d54e27",
 			"Rev": "5d54e27f1764a0309eafe12c9df7bac03f241646"
 		},
+		{
+			"ImportPath": "github.com/armon/go-metrics",
+			"Rev": "345426c77237ece5dab0e1605c3e4b35c3f54757"
+		},
 		{
 			"ImportPath": "github.com/aws/aws-sdk-go/aws",
 			"Comment": "v1.0.7",
@@ -679,6 +683,18 @@
 			"ImportPath": "github.com/gorilla/mux",
 			"Rev": "8096f47503459bcc74d1f4c487b7e6e42e5746b5"
 		},
+		{
+			"ImportPath": "github.com/hashicorp/go-msgpack/codec",
+			"Rev": "fa3f63826f7c23912c15263591e65d54d080b458"
+		},
+		{
+			"ImportPath": "github.com/hashicorp/raft",
+			"Rev": "057b893fd996696719e98b6c44649ea14968c811"
+		},
+		{
+			"ImportPath": "github.com/hashicorp/raft-boltdb",
+			"Rev": "d1e82c1ec3f15ee991f7cc7ffd5b67ff6f5bbaee"
+		},
 		{
 			"ImportPath": "github.com/imdario/mergo",
 			"Comment": "0.1.3-8-g6633656",
@@ -690,8 +706,33 @@
 		},
 		{
 			"ImportPath": "github.com/influxdb/influxdb/client",
-			"Comment": "v0.8.8",
-			"Rev": "afde71eb1740fd763ab9450e1f700ba0e53c36d0"
+			"Comment": "v0.9.2.1",
+			"Rev": "b237c68bab4756507baf6840023be103853e77db"
+		},
+		{
+			"ImportPath": "github.com/influxdb/influxdb/influxql",
+			"Comment": "v0.9.2.1",
+			"Rev": "b237c68bab4756507baf6840023be103853e77db"
+		},
+		{
+			"ImportPath": "github.com/influxdb/influxdb/meta",
+			"Comment": "v0.9.2.1",
+			"Rev": "b237c68bab4756507baf6840023be103853e77db"
+		},
+		{
+			"ImportPath": "github.com/influxdb/influxdb/snapshot",
+			"Comment": "v0.9.2.1",
+			"Rev": "b237c68bab4756507baf6840023be103853e77db"
+		},
+		{
+			"ImportPath": "github.com/influxdb/influxdb/toml",
+			"Comment": "v0.9.2.1",
+			"Rev": "b237c68bab4756507baf6840023be103853e77db"
+		},
+		{
+			"ImportPath": "github.com/influxdb/influxdb/tsdb",
+			"Comment": "v0.9.2.1",
+			"Rev": "b237c68bab4756507baf6840023be103853e77db"
 		},
 		{
 			"ImportPath": "github.com/jmespath/go-jmespath",
diff --git a/Godeps/LICENSES.md b/Godeps/LICENSES.md
index 652d803eae6..cf5170dfe9c 100644
--- a/Godeps/LICENSES.md
+++ b/Godeps/LICENSES.md
@@ -8,6 +8,7 @@ bitbucket.org/ww/goautoneg | spdxBSD3
 github.com/abbot/go-http-auth | Apache-2
 github.com/appc/cni | Apache-2
 github.com/appc/spec | Apache-2
+github.com/armon/go-metrics | MITname
 github.com/aws/aws-sdk-go | Apache-2
 github.com/beorn7/perks/quantile | MIT?
 github.com/blang/semver | MITname
@@ -50,6 +51,9 @@ github.com/google/cadvisor | Apache-2
 github.com/google/gofuzz | Apache-2
 github.com/gorilla/context | spdxBSD3
 github.com/gorilla/mux | spdxBSD3
+github.com/hashicorp/go-msgpack | spdxBSD3
+github.com/hashicorp/raft | IntelPart08
+github.com/hashicorp/raft-boltdb | IntelPart08
 github.com/imdario/mergo | spdxBSD3
 github.com/inconshreveable/mousetrap | Apache-2
 github.com/influxdb/influxdb | MITname
diff --git a/Godeps/_workspace/src/github.com/armon/go-metrics/.gitignore b/Godeps/_workspace/src/github.com/armon/go-metrics/.gitignore
new file mode 100644
index 00000000000..00268614f04
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/armon/go-metrics/.gitignore
@@ -0,0 +1,22 @@
+# Compiled Object files, Static and Dynamic libs (Shared Objects)
+*.o
+*.a
+*.so
+
+# Folders
+_obj
+_test
+
+# Architecture specific extensions/prefixes
+*.[568vq]
+[568vq].out
+
+*.cgo1.go
+*.cgo2.c
+_cgo_defun.c
+_cgo_gotypes.go
+_cgo_export.*
+
+_testmain.go
+
+*.exe
diff --git a/Godeps/_workspace/src/github.com/armon/go-metrics/LICENSE b/Godeps/_workspace/src/github.com/armon/go-metrics/LICENSE
new file mode 100644
index 00000000000..106569e542b
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/armon/go-metrics/LICENSE
@@ -0,0 +1,20 @@
+The MIT License (MIT)
+
+Copyright (c) 2013 Armon Dadgar
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/Godeps/_workspace/src/github.com/armon/go-metrics/README.md b/Godeps/_workspace/src/github.com/armon/go-metrics/README.md
new file mode 100644
index 00000000000..7b6f23e29f8
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/armon/go-metrics/README.md
@@ -0,0 +1,71 @@
+go-metrics
+==========
+
+This library provides a `metrics` package which can be used to instrument code,
+expose application metrics, and profile runtime performance in a flexible manner.
+
+Current API: [![GoDoc](https://godoc.org/github.com/armon/go-metrics?status.svg)](https://godoc.org/github.com/armon/go-metrics)
+
+Sinks
+=====
+
+The `metrics` package makes use of a `MetricSink` interface to support delivery
+to any type of backend. Currently the following sinks are provided:
+
+* StatsiteSink : Sinks to a [statsite](https://github.com/armon/statsite/) instance (TCP)
+* StatsdSink: Sinks to a [StatsD](https://github.com/etsy/statsd/) / statsite instance (UDP)
+* PrometheusSink: Sinks to a [Prometheus](http://prometheus.io/) metrics endpoint (exposed via HTTP for scrapes)
+* InmemSink : Provides in-memory aggregation, can be used to export stats
+* FanoutSink : Sinks to multiple sinks. Enables writing to multiple statsite instances for example.
+* BlackholeSink : Sinks to nowhere
+
+In addition to the sinks, the `InmemSignal` can be used to catch a signal,
+and dump a formatted output of recent metrics. For example, when a process gets
+a SIGUSR1, it can dump to stderr recent performance metrics for debugging.
+
+Examples
+========
+
+Here is an example of using the package:
+
+    func SlowMethod() {
+        // Profiling the runtime of a method
+        defer metrics.MeasureSince([]string{"SlowMethod"}, time.Now())
+    }
+
+    // Configure a statsite sink as the global metrics sink
+    sink, _ := metrics.NewStatsiteSink("statsite:8125")
+    metrics.NewGlobal(metrics.DefaultConfig("service-name"), sink)
+
+    // Emit a Key/Value pair
+    metrics.EmitKey([]string{"questions", "meaning of life"}, 42)
+
+
+Here is an example of setting up an signal handler:
+
+    // Setup the inmem sink and signal handler
+    inm := metrics.NewInmemSink(10*time.Second, time.Minute)
+    sig := metrics.DefaultInmemSignal(inm)
+    metrics.NewGlobal(metrics.DefaultConfig("service-name"), inm)
+
+    // Run some code
+    inm.SetGauge([]string{"foo"}, 42)
+    inm.EmitKey([]string{"bar"}, 30)
+
+    inm.IncrCounter([]string{"baz"}, 42)
+    inm.IncrCounter([]string{"baz"}, 1)
+    inm.IncrCounter([]string{"baz"}, 80)
+
+    inm.AddSample([]string{"method", "wow"}, 42)
+    inm.AddSample([]string{"method", "wow"}, 100)
+    inm.AddSample([]string{"method", "wow"}, 22)
+
+    ....
+
+When a signal comes in, output like the following will be dumped to stderr:
+
+    [2014-01-28 14:57:33.04 -0800 PST][G] 'foo': 42.000
+    [2014-01-28 14:57:33.04 -0800 PST][P] 'bar': 30.000
+    [2014-01-28 14:57:33.04 -0800 PST][C] 'baz': Count: 3 Min: 1.000 Mean: 41.000 Max: 80.000 Stddev: 39.509
+    [2014-01-28 14:57:33.04 -0800 PST][S] 'method.wow': Count: 3 Min: 22.000 Mean: 54.667 Max: 100.000 Stddev: 40.513
+
diff --git a/Godeps/_workspace/src/github.com/armon/go-metrics/const_unix.go b/Godeps/_workspace/src/github.com/armon/go-metrics/const_unix.go
new file mode 100644
index 00000000000..31098dd57e5
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/armon/go-metrics/const_unix.go
@@ -0,0 +1,12 @@
+// +build !windows
+
+package metrics
+
+import (
+	"syscall"
+)
+
+const (
+	// DefaultSignal is used with DefaultInmemSignal
+	DefaultSignal = syscall.SIGUSR1
+)
diff --git a/Godeps/_workspace/src/github.com/armon/go-metrics/const_windows.go b/Godeps/_workspace/src/github.com/armon/go-metrics/const_windows.go
new file mode 100644
index 00000000000..38136af3e42
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/armon/go-metrics/const_windows.go
@@ -0,0 +1,13 @@
+// +build windows
+
+package metrics
+
+import (
+	"syscall"
+)
+
+const (
+	// DefaultSignal is used with DefaultInmemSignal
+	// Windows has no SIGUSR1, use SIGBREAK
+	DefaultSignal = syscall.Signal(21)
+)
diff --git a/Godeps/_workspace/src/github.com/armon/go-metrics/datadog/dogstatsd.go b/Godeps/_workspace/src/github.com/armon/go-metrics/datadog/dogstatsd.go
new file mode 100644
index 00000000000..aaba9fe0e22
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/armon/go-metrics/datadog/dogstatsd.go
@@ -0,0 +1,125 @@
+package datadog
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/DataDog/datadog-go/statsd"
+)
+
+// DogStatsdSink provides a MetricSink that can be used
+// with a dogstatsd server. It utilizes the Dogstatsd client at github.com/DataDog/datadog-go/statsd
+type DogStatsdSink struct {
+	client            *statsd.Client
+	hostName          string
+	propagateHostname bool
+}
+
+// NewDogStatsdSink is used to create a new DogStatsdSink with sane defaults
+func NewDogStatsdSink(addr string, hostName string) (*DogStatsdSink, error) {
+	client, err := statsd.New(addr)
+	if err != nil {
+		return nil, err
+	}
+	sink := &DogStatsdSink{
+		client:            client,
+		hostName:          hostName,
+		propagateHostname: false,
+	}
+	return sink, nil
+}
+
+// SetTags sets common tags on the Dogstatsd Client that will be sent
+// along with all dogstatsd packets.
+// Ref: http://docs.datadoghq.com/guides/dogstatsd/#tags
+func (s *DogStatsdSink) SetTags(tags []string) {
+	s.client.Tags = tags
+}
+
+// EnableHostnamePropagation forces a Dogstatsd `host` tag with the value specified by `s.HostName`
+// Since the go-metrics package has its own mechanism for attaching a hostname to metrics,
+// setting the `propagateHostname` flag ensures that `s.HostName` overrides the host tag naively set by the DogStatsd server
+func (s *DogStatsdSink) EnableHostNamePropagation() {
+	s.propagateHostname = true
+}
+
+func (s *DogStatsdSink) flattenKey(parts []string) string {
+	joined := strings.Join(parts, ".")
+	return strings.Map(func(r rune) rune {
+		switch r {
+		case ':':
+			fallthrough
+		case ' ':
+			return '_'
+		default:
+			return r
+		}
+	}, joined)
+}
+
+func (s *DogStatsdSink) parseKey(key []string) ([]string, []string) {
+	// Since DogStatsd supports dimensionality via tags on metric keys, this sink's approach is to splice the hostname out of the key in favor of a `host` tag
+	// The `host` tag is either forced here, or set downstream by the DogStatsd server
+
+	var tags []string
+	hostName := s.hostName
+
+	//Splice the hostname out of the key
+	for i, el := range key {
+		if el == hostName {
+			key = append(key[:i], key[i+1:]...)
+		}
+	}
+
+	if s.propagateHostname {
+		tags = append(tags, fmt.Sprintf("host:%s", hostName))
+	}
+	return key, tags
+}
+
+// Implementation of methods in the MetricSink interface
+
+func (s *DogStatsdSink) SetGauge(key []string, val float32) {
+	s.SetGaugeWithTags(key, val, []string{})
+}
+
+func (s *DogStatsdSink) IncrCounter(key []string, val float32) {
+	s.IncrCounterWithTags(key, val, []string{})
+}
+
+// EmitKey is not implemented since DogStatsd does not provide a metric type that holds an
+// arbitrary number of values
+func (s *DogStatsdSink) EmitKey(key []string, val float32) {
+}
+
+func (s *DogStatsdSink) AddSample(key []string, val float32) {
+	s.AddSampleWithTags(key, val, []string{})
+}
+
+// The following ...WithTags methods correspond to Datadog's Tag extension to Statsd.
+// http://docs.datadoghq.com/guides/dogstatsd/#tags
+
+func (s *DogStatsdSink) SetGaugeWithTags(key []string, val float32, tags []string) {
+	flatKey, tags := s.getFlatkeyAndCombinedTags(key, tags)
+	rate := 1.0
+	s.client.Gauge(flatKey, float64(val), tags, rate)
+}
+
+func (s *DogStatsdSink) IncrCounterWithTags(key []string, val float32, tags []string) {
+	flatKey, tags := s.getFlatkeyAndCombinedTags(key, tags)
+	rate := 1.0
+	s.client.Count(flatKey, int64(val), tags, rate)
+}
+
+func (s *DogStatsdSink) AddSampleWithTags(key []string, val float32, tags []string) {
+	flatKey, tags := s.getFlatkeyAndCombinedTags(key, tags)
+	rate := 1.0
+	s.client.TimeInMilliseconds(flatKey, float64(val), tags, rate)
+}
+
+func (s *DogStatsdSink) getFlatkeyAndCombinedTags(key []string, tags []string) (flattenedKey string, combinedTags []string) {
+	key, hostTags := s.parseKey(key)
+	flatKey := s.flattenKey(key)
+	tags = append(tags, hostTags...)
+	return flatKey, tags
+}
diff --git a/Godeps/_workspace/src/github.com/armon/go-metrics/inmem.go b/Godeps/_workspace/src/github.com/armon/go-metrics/inmem.go
new file mode 100644
index 00000000000..da503296060
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/armon/go-metrics/inmem.go
@@ -0,0 +1,241 @@
+package metrics
+
+import (
+	"fmt"
+	"math"
+	"strings"
+	"sync"
+	"time"
+)
+
+// InmemSink provides a MetricSink that does in-memory aggregation
+// without sending metrics over a network. It can be embedded within
+// an application to provide profiling information.
+type InmemSink struct {
+	// How long is each aggregation interval
+	interval time.Duration
+
+	// Retain controls how many metrics interval we keep
+	retain time.Duration
+
+	// maxIntervals is the maximum length of intervals.
+	// It is retain / interval.
+	maxIntervals int
+
+	// intervals is a slice of the retained intervals
+	intervals    []*IntervalMetrics
+	intervalLock sync.RWMutex
+}
+
+// IntervalMetrics stores the aggregated metrics
+// for a specific interval
+type IntervalMetrics struct {
+	sync.RWMutex
+
+	// The start time of the interval
+	Interval time.Time
+
+	// Gauges maps the key to the last set value
+	Gauges map[string]float32
+
+	// Points maps the string to the list of emitted values
+	// from EmitKey
+	Points map[string][]float32
+
+	// Counters maps the string key to a sum of the counter
+	// values
+	Counters map[string]*AggregateSample
+
+	// Samples maps the key to an AggregateSample,
+	// which has the rolled up view of a sample
+	Samples map[string]*AggregateSample
+}
+
+// NewIntervalMetrics creates a new IntervalMetrics for a given interval
+func NewIntervalMetrics(intv time.Time) *IntervalMetrics {
+	return &IntervalMetrics{
+		Interval: intv,
+		Gauges:   make(map[string]float32),
+		Points:   make(map[string][]float32),
+		Counters: make(map[string]*AggregateSample),
+		Samples:  make(map[string]*AggregateSample),
+	}
+}
+
+// AggregateSample is used to hold aggregate metrics
+// about a sample
+type AggregateSample struct {
+	Count       int       // The count of emitted pairs
+	Sum         float64   // The sum of values
+	SumSq       float64   // The sum of squared values
+	Min         float64   // Minimum value
+	Max         float64   // Maximum value
+	LastUpdated time.Time // When value was last updated
+}
+
+// Computes a Stddev of the values
+func (a *AggregateSample) Stddev() float64 {
+	num := (float64(a.Count) * a.SumSq) - math.Pow(a.Sum, 2)
+	div := float64(a.Count * (a.Count - 1))
+	if div == 0 {
+		return 0
+	}
+	return math.Sqrt(num / div)
+}
+
+// Computes a mean of the values
+func (a *AggregateSample) Mean() float64 {
+	if a.Count == 0 {
+		return 0
+	}
+	return a.Sum / float64(a.Count)
+}
+
+// Ingest is used to update a sample
+func (a *AggregateSample) Ingest(v float64) {
+	a.Count++
+	a.Sum += v
+	a.SumSq += (v * v)
+	if v < a.Min || a.Count == 1 {
+		a.Min = v
+	}
+	if v > a.Max || a.Count == 1 {
+		a.Max = v
+	}
+	a.LastUpdated = time.Now()
+}
+
+func (a *AggregateSample) String() string {
+	if a.Count == 0 {
+		return "Count: 0"
+	} else if a.Stddev() == 0 {
+		return fmt.Sprintf("Count: %d Sum: %0.3f LastUpdated: %s", a.Count, a.Sum, a.LastUpdated)
+	} else {
+		return fmt.Sprintf("Count: %d Min: %0.3f Mean: %0.3f Max: %0.3f Stddev: %0.3f Sum: %0.3f LastUpdated: %s",
+			a.Count, a.Min, a.Mean(), a.Max, a.Stddev(), a.Sum, a.LastUpdated)
+	}
+}
+
+// NewInmemSink is used to construct a new in-memory sink.
+// Uses an aggregation interval and maximum retention period.
+func NewInmemSink(interval, retain time.Duration) *InmemSink {
+	i := &InmemSink{
+		interval:     interval,
+		retain:       retain,
+		maxIntervals: int(retain / interval),
+	}
+	i.intervals = make([]*IntervalMetrics, 0, i.maxIntervals)
+	return i
+}
+
+func (i *InmemSink) SetGauge(key []string, val float32) {
+	k := i.flattenKey(key)
+	intv := i.getInterval()
+
+	intv.Lock()
+	defer intv.Unlock()
+	intv.Gauges[k] = val
+}
+
+func (i *InmemSink) EmitKey(key []string, val float32) {
+	k := i.flattenKey(key)
+	intv := i.getInterval()
+
+	intv.Lock()
+	defer intv.Unlock()
+	vals := intv.Points[k]
+	intv.Points[k] = append(vals, val)
+}
+
+func (i *InmemSink) IncrCounter(key []string, val float32) {
+	k := i.flattenKey(key)
+	intv := i.getInterval()
+
+	intv.Lock()
+	defer intv.Unlock()
+
+	agg := intv.Counters[k]
+	if agg == nil {
+		agg = &AggregateSample{}
+		intv.Counters[k] = agg
+	}
+	agg.Ingest(float64(val))
+}
+
+func (i *InmemSink) AddSample(key []string, val float32) {
+	k := i.flattenKey(key)
+	intv := i.getInterval()
+
+	intv.Lock()
+	defer intv.Unlock()
+
+	agg := intv.Samples[k]
+	if agg == nil {
+		agg = &AggregateSample{}
+		intv.Samples[k] = agg
+	}
+	agg.Ingest(float64(val))
+}
+
+// Data is used to retrieve all the aggregated metrics
+// Intervals may be in use, and a read lock should be acquired
+func (i *InmemSink) Data() []*IntervalMetrics {
+	// Get the current interval, forces creation
+	i.getInterval()
+
+	i.intervalLock.RLock()
+	defer i.intervalLock.RUnlock()
+
+	intervals := make([]*IntervalMetrics, len(i.intervals))
+	copy(intervals, i.intervals)
+	return intervals
+}
+
+func (i *InmemSink) getExistingInterval(intv time.Time) *IntervalMetrics {
+	i.intervalLock.RLock()
+	defer i.intervalLock.RUnlock()
+
+	n := len(i.intervals)
+	if n > 0 && i.intervals[n-1].Interval == intv {
+		return i.intervals[n-1]
+	}
+	return nil
+}
+
+func (i *InmemSink) createInterval(intv time.Time) *IntervalMetrics {
+	i.intervalLock.Lock()
+	defer i.intervalLock.Unlock()
+
+	// Check for an existing interval
+	n := len(i.intervals)
+	if n > 0 && i.intervals[n-1].Interval == intv {
+		return i.intervals[n-1]
+	}
+
+	// Add the current interval
+	current := NewIntervalMetrics(intv)
+	i.intervals = append(i.intervals, current)
+	n++
+
+	// Truncate the intervals if they are too long
+	if n >= i.maxIntervals {
+		copy(i.intervals[0:], i.intervals[n-i.maxIntervals:])
+		i.intervals = i.intervals[:i.maxIntervals]
+	}
+	return current
+}
+
+// getInterval returns the current interval to write to
+func (i *InmemSink) getInterval() *IntervalMetrics {
+	intv := time.Now().Truncate(i.interval)
+	if m := i.getExistingInterval(intv); m != nil {
+		return m
+	}
+	return i.createInterval(intv)
+}
+
+// Flattens the key for formatting, removes spaces
+func (i *InmemSink) flattenKey(parts []string) string {
+	joined := strings.Join(parts, ".")
+	return strings.Replace(joined, " ", "_", -1)
+}
diff --git a/Godeps/_workspace/src/github.com/armon/go-metrics/inmem_signal.go b/Godeps/_workspace/src/github.com/armon/go-metrics/inmem_signal.go
new file mode 100644
index 00000000000..95d08ee10f0
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/armon/go-metrics/inmem_signal.go
@@ -0,0 +1,100 @@
+package metrics
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"os"
+	"os/signal"
+	"sync"
+	"syscall"
+)
+
+// InmemSignal is used to listen for a given signal, and when received,
+// to dump the current metrics from the InmemSink to an io.Writer
+type InmemSignal struct {
+	signal syscall.Signal
+	inm    *InmemSink
+	w      io.Writer
+	sigCh  chan os.Signal
+
+	stop     bool
+	stopCh   chan struct{}
+	stopLock sync.Mutex
+}
+
+// NewInmemSignal creates a new InmemSignal which listens for a given signal,
+// and dumps the current metrics out to a writer
+func NewInmemSignal(inmem *InmemSink, sig syscall.Signal, w io.Writer) *InmemSignal {
+	i := &InmemSignal{
+		signal: sig,
+		inm:    inmem,
+		w:      w,
+		sigCh:  make(chan os.Signal, 1),
+		stopCh: make(chan struct{}),
+	}
+	signal.Notify(i.sigCh, sig)
+	go i.run()
+	return i
+}
+
+// DefaultInmemSignal returns a new InmemSignal that responds to SIGUSR1
+// and writes output to stderr. Windows uses SIGBREAK
+func DefaultInmemSignal(inmem *InmemSink) *InmemSignal {
+	return NewInmemSignal(inmem, DefaultSignal, os.Stderr)
+}
+
+// Stop is used to stop the InmemSignal from listening
+func (i *InmemSignal) Stop() {
+	i.stopLock.Lock()
+	defer i.stopLock.Unlock()
+
+	if i.stop {
+		return
+	}
+	i.stop = true
+	close(i.stopCh)
+	signal.Stop(i.sigCh)
+}
+
+// run is a long running routine that handles signals
+func (i *InmemSignal) run() {
+	for {
+		select {
+		case <-i.sigCh:
+			i.dumpStats()
+		case <-i.stopCh:
+			return
+		}
+	}
+}
+
+// dumpStats is used to dump the data to output writer
+func (i *InmemSignal) dumpStats() {
+	buf := bytes.NewBuffer(nil)
+
+	data := i.inm.Data()
+	// Skip the last period which is still being aggregated
+	for i := 0; i < len(data)-1; i++ {
+		intv := data[i]
+		intv.RLock()
+		for name, val := range intv.Gauges {
+			fmt.Fprintf(buf, "[%v][G] '%s': %0.3f\n", intv.Interval, name, val)
+		}
+		for name, vals := range intv.Points {
+			for _, val := range vals {
+				fmt.Fprintf(buf, "[%v][P] '%s': %0.3f\n", intv.Interval, name, val)
+			}
+		}
+		for name, agg := range intv.Counters {
+			fmt.Fprintf(buf, "[%v][C] '%s': %s\n", intv.Interval, name, agg)
+		}
+		for name, agg := range intv.Samples {
+			fmt.Fprintf(buf, "[%v][S] '%s': %s\n", intv.Interval, name, agg)
+		}
+		intv.RUnlock()
+	}
+
+	// Write out the bytes
+	i.w.Write(buf.Bytes())
+}
diff --git a/Godeps/_workspace/src/github.com/armon/go-metrics/metrics.go b/Godeps/_workspace/src/github.com/armon/go-metrics/metrics.go
new file mode 100644
index 00000000000..b818e4182c0
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/armon/go-metrics/metrics.go
@@ -0,0 +1,115 @@
+package metrics
+
+import (
+	"runtime"
+	"time"
+)
+
+func (m *Metrics) SetGauge(key []string, val float32) {
+	if m.HostName != "" && m.EnableHostname {
+		key = insert(0, m.HostName, key)
+	}
+	if m.EnableTypePrefix {
+		key = insert(0, "gauge", key)
+	}
+	if m.ServiceName != "" {
+		key = insert(0, m.ServiceName, key)
+	}
+	m.sink.SetGauge(key, val)
+}
+
+func (m *Metrics) EmitKey(key []string, val float32) {
+	if m.EnableTypePrefix {
+		key = insert(0, "kv", key)
+	}
+	if m.ServiceName != "" {
+		key = insert(0, m.ServiceName, key)
+	}
+	m.sink.EmitKey(key, val)
+}
+
+func (m *Metrics) IncrCounter(key []string, val float32) {
+	if m.EnableTypePrefix {
+		key = insert(0, "counter", key)
+	}
+	if m.ServiceName != "" {
+		key = insert(0, m.ServiceName, key)
+	}
+	m.sink.IncrCounter(key, val)
+}
+
+func (m *Metrics) AddSample(key []string, val float32) {
+	if m.EnableTypePrefix {
+		key = insert(0, "sample", key)
+	}
+	if m.ServiceName != "" {
+		key = insert(0, m.ServiceName, key)
+	}
+	m.sink.AddSample(key, val)
+}
+
+func (m *Metrics) MeasureSince(key []string, start time.Time) {
+	if m.EnableTypePrefix {
+		key = insert(0, "timer", key)
+	}
+	if m.ServiceName != "" {
+		key = insert(0, m.ServiceName, key)
+	}
+	now := time.Now()
+	elapsed := now.Sub(start)
+	msec := float32(elapsed.Nanoseconds()) / float32(m.TimerGranularity)
+	m.sink.AddSample(key, msec)
+}
+
+// Periodically collects runtime stats to publish
+func (m *Metrics) collectStats() {
+	for {
+		time.Sleep(m.ProfileInterval)
+		m.emitRuntimeStats()
+	}
+}
+
+// Emits various runtime statsitics
+func (m *Metrics) emitRuntimeStats() {
+	// Export number of Goroutines
+	numRoutines := runtime.NumGoroutine()
+	m.SetGauge([]string{"runtime", "num_goroutines"}, float32(numRoutines))
+
+	// Export memory stats
+	var stats runtime.MemStats
+	runtime.ReadMemStats(&stats)
+	m.SetGauge([]string{"runtime", "alloc_bytes"}, float32(stats.Alloc))
+	m.SetGauge([]string{"runtime", "sys_bytes"}, float32(stats.Sys))
+	m.SetGauge([]string{"runtime", "malloc_count"}, float32(stats.Mallocs))
+	m.SetGauge([]string{"runtime", "free_count"}, float32(stats.Frees))
+	m.SetGauge([]string{"runtime", "heap_objects"}, float32(stats.HeapObjects))
+	m.SetGauge([]string{"runtime", "total_gc_pause_ns"}, float32(stats.PauseTotalNs))
+	m.SetGauge([]string{"runtime", "total_gc_runs"}, float32(stats.NumGC))
+
+	// Export info about the last few GC runs
+	num := stats.NumGC
+
+	// Handle wrap around
+	if num < m.lastNumGC {
+		m.lastNumGC = 0
+	}
+
+	// Ensure we don't scan more than 256
+	if num-m.lastNumGC >= 256 {
+		m.lastNumGC = num - 255
+	}
+
+	for i := m.lastNumGC; i < num; i++ {
+		pause := stats.PauseNs[i%256]
+		m.AddSample([]string{"runtime", "gc_pause_ns"}, float32(pause))
+	}
+	m.lastNumGC = num
+}
+
+// Inserts a string value at an index into the slice
+func insert(i int, v string, s []string) []string {
+	s = append(s, "")
+	copy(s[i+1:], s[i:])
+	s[i] = v
+	return s
+}
diff --git a/Godeps/_workspace/src/github.com/armon/go-metrics/prometheus/prometheus.go b/Godeps/_workspace/src/github.com/armon/go-metrics/prometheus/prometheus.go
new file mode 100644
index 00000000000..362dbfb623d
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/armon/go-metrics/prometheus/prometheus.go
@@ -0,0 +1,88 @@
+// +build go1.3
+package prometheus
+
+import (
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/prometheus/client_golang/prometheus"
+)
+
+type PrometheusSink struct {
+	mu        sync.Mutex
+	gauges    map[string]prometheus.Gauge
+	summaries map[string]prometheus.Summary
+	counters  map[string]prometheus.Counter
+}
+
+func NewPrometheusSink() (*PrometheusSink, error) {
+	return &PrometheusSink{
+		gauges:    make(map[string]prometheus.Gauge),
+		summaries: make(map[string]prometheus.Summary),
+		counters:  make(map[string]prometheus.Counter),
+	}, nil
+}
+
+func (p *PrometheusSink) flattenKey(parts []string) string {
+	joined := strings.Join(parts, "_")
+	joined = strings.Replace(joined, " ", "_", -1)
+	joined = strings.Replace(joined, ".", "_", -1)
+	joined = strings.Replace(joined, "-", "_", -1)
+	return joined
+}
+
+func (p *PrometheusSink) SetGauge(parts []string, val float32) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	key := p.flattenKey(parts)
+	g, ok := p.gauges[key]
+	if !ok {
+		g = prometheus.NewGauge(prometheus.GaugeOpts{
+			Name: key,
+			Help: key,
+		})
+		prometheus.MustRegister(g)
+		p.gauges[key] = g
+	}
+	g.Set(float64(val))
+}
+
+func (p *PrometheusSink) AddSample(parts []string, val float32) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	key := p.flattenKey(parts)
+	g, ok := p.summaries[key]
+	if !ok {
+		g = prometheus.NewSummary(prometheus.SummaryOpts{
+			Name:   key,
+			Help:   key,
+			MaxAge: 10 * time.Second,
+		})
+		prometheus.MustRegister(g)
+		p.summaries[key] = g
+	}
+	g.Observe(float64(val))
+}
+
+// EmitKey is not implemented. Prometheus doesn’t offer a type for which an
+// arbitrary number of values is retained, as Prometheus works with a pull
+// model, rather than a push model.
+func (p *PrometheusSink) EmitKey(key []string, val float32) {
+}
+
+func (p *PrometheusSink) IncrCounter(parts []string, val float32) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	key := p.flattenKey(parts)
+	g, ok := p.counters[key]
+	if !ok {
+		g = prometheus.NewCounter(prometheus.CounterOpts{
+			Name: key,
+			Help: key,
+		})
+		prometheus.MustRegister(g)
+		p.counters[key] = g
+	}
+	g.Add(float64(val))
+}
diff --git a/Godeps/_workspace/src/github.com/armon/go-metrics/sink.go b/Godeps/_workspace/src/github.com/armon/go-metrics/sink.go
new file mode 100644
index 00000000000..0c240c2c47e
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/armon/go-metrics/sink.go
@@ -0,0 +1,52 @@
+package metrics
+
+// The MetricSink interface is used to transmit metrics information
+// to an external system
+type MetricSink interface {
+	// A Gauge should retain the last value it is set to
+	SetGauge(key []string, val float32)
+
+	// Should emit a Key/Value pair for each call
+	EmitKey(key []string, val float32)
+
+	// Counters should accumulate values
+	IncrCounter(key []string, val float32)
+
+	// Samples are for timing information, where quantiles are used
+	AddSample(key []string, val float32)
+}
+
+// BlackholeSink is used to just blackhole messages
+type BlackholeSink struct{}
+
+func (*BlackholeSink) SetGauge(key []string, val float32)    {}
+func (*BlackholeSink) EmitKey(key []string, val float32)     {}
+func (*BlackholeSink) IncrCounter(key []string, val float32) {}
+func (*BlackholeSink) AddSample(key []string, val float32)   {}
+
+// FanoutSink is used to sink to fanout values to multiple sinks
+type FanoutSink []MetricSink
+
+func (fh FanoutSink) SetGauge(key []string, val float32) {
+	for _, s := range fh {
+		s.SetGauge(key, val)
+	}
+}
+
+func (fh FanoutSink) EmitKey(key []string, val float32) {
+	for _, s := range fh {
+		s.EmitKey(key, val)
+	}
+}
+
+func (fh FanoutSink) IncrCounter(key []string, val float32) {
+	for _, s := range fh {
+		s.IncrCounter(key, val)
+	}
+}
+
+func (fh FanoutSink) AddSample(key []string, val float32) {
+	for _, s := range fh {
+		s.AddSample(key, val)
+	}
+}
diff --git a/Godeps/_workspace/src/github.com/armon/go-metrics/start.go b/Godeps/_workspace/src/github.com/armon/go-metrics/start.go
new file mode 100644
index 00000000000..44113f10042
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/armon/go-metrics/start.go
@@ -0,0 +1,95 @@
+package metrics
+
+import (
+	"os"
+	"time"
+)
+
+// Config is used to configure metrics settings
+type Config struct {
+	ServiceName          string        // Prefixed with keys to seperate services
+	HostName             string        // Hostname to use. If not provided and EnableHostname, it will be os.Hostname
+	EnableHostname       bool          // Enable prefixing gauge values with hostname
+	EnableRuntimeMetrics bool          // Enables profiling of runtime metrics (GC, Goroutines, Memory)
+	EnableTypePrefix     bool          // Prefixes key with a type ("counter", "gauge", "timer")
+	TimerGranularity     time.Duration // Granularity of timers.
+	ProfileInterval      time.Duration // Interval to profile runtime metrics
+}
+
+// Metrics represents an instance of a metrics sink that can
+// be used to emit
+type Metrics struct {
+	Config
+	lastNumGC uint32
+	sink      MetricSink
+}
+
+// Shared global metrics instance
+var globalMetrics *Metrics
+
+func init() {
+	// Initialize to a blackhole sink to avoid errors
+	globalMetrics = &Metrics{sink: &BlackholeSink{}}
+}
+
+// DefaultConfig provides a sane default configuration
+func DefaultConfig(serviceName string) *Config {
+	c := &Config{
+		ServiceName:          serviceName, // Use client provided service
+		HostName:             "",
+		EnableHostname:       true,             // Enable hostname prefix
+		EnableRuntimeMetrics: true,             // Enable runtime profiling
+		EnableTypePrefix:     false,            // Disable type prefix
+		TimerGranularity:     time.Millisecond, // Timers are in milliseconds
+		ProfileInterval:      time.Second,      // Poll runtime every second
+	}
+
+	// Try to get the hostname
+	name, _ := os.Hostname()
+	c.HostName = name
+	return c
+}
+
+// New is used to create a new instance of Metrics
+func New(conf *Config, sink MetricSink) (*Metrics, error) {
+	met := &Metrics{}
+	met.Config = *conf
+	met.sink = sink
+
+	// Start the runtime collector
+	if conf.EnableRuntimeMetrics {
+		go met.collectStats()
+	}
+	return met, nil
+}
+
+// NewGlobal is the same as New, but it assigns the metrics object to be
+// used globally as well as returning it.
+func NewGlobal(conf *Config, sink MetricSink) (*Metrics, error) {
+	metrics, err := New(conf, sink)
+	if err == nil {
+		globalMetrics = metrics
+	}
+	return metrics, err
+}
+
+// Proxy all the methods to the globalMetrics instance
+func SetGauge(key []string, val float32) {
+	globalMetrics.SetGauge(key, val)
+}
+
+func EmitKey(key []string, val float32) {
+	globalMetrics.EmitKey(key, val)
+}
+
+func IncrCounter(key []string, val float32) {
+	globalMetrics.IncrCounter(key, val)
+}
+
+func AddSample(key []string, val float32) {
+	globalMetrics.AddSample(key, val)
+}
+
+func MeasureSince(key []string, start time.Time) {
+	globalMetrics.MeasureSince(key, start)
+}
diff --git a/Godeps/_workspace/src/github.com/armon/go-metrics/statsd.go b/Godeps/_workspace/src/github.com/armon/go-metrics/statsd.go
new file mode 100644
index 00000000000..65a5021a057
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/armon/go-metrics/statsd.go
@@ -0,0 +1,154 @@
+package metrics
+
+import (
+	"bytes"
+	"fmt"
+	"log"
+	"net"
+	"strings"
+	"time"
+)
+
+const (
+	// statsdMaxLen is the maximum size of a packet
+	// to send to statsd
+	statsdMaxLen = 1400
+)
+
+// StatsdSink provides a MetricSink that can be used
+// with a statsite or statsd metrics server. It uses
+// only UDP packets, while StatsiteSink uses TCP.
+type StatsdSink struct {
+	addr        string
+	metricQueue chan string
+}
+
+// NewStatsdSink is used to create a new StatsdSink
+func NewStatsdSink(addr string) (*StatsdSink, error) {
+	s := &StatsdSink{
+		addr:        addr,
+		metricQueue: make(chan string, 4096),
+	}
+	go s.flushMetrics()
+	return s, nil
+}
+
+// Close is used to stop flushing to statsd
+func (s *StatsdSink) Shutdown() {
+	close(s.metricQueue)
+}
+
+func (s *StatsdSink) SetGauge(key []string, val float32) {
+	flatKey := s.flattenKey(key)
+	s.pushMetric(fmt.Sprintf("%s:%f|g\n", flatKey, val))
+}
+
+func (s *StatsdSink) EmitKey(key []string, val float32) {
+	flatKey := s.flattenKey(key)
+	s.pushMetric(fmt.Sprintf("%s:%f|kv\n", flatKey, val))
+}
+
+func (s *StatsdSink) IncrCounter(key []string, val float32) {
+	flatKey := s.flattenKey(key)
+	s.pushMetric(fmt.Sprintf("%s:%f|c\n", flatKey, val))
+}
+
+func (s *StatsdSink) AddSample(key []string, val float32) {
+	flatKey := s.flattenKey(key)
+	s.pushMetric(fmt.Sprintf("%s:%f|ms\n", flatKey, val))
+}
+
+// Flattens the key for formatting, removes spaces
+func (s *StatsdSink) flattenKey(parts []string) string {
+	joined := strings.Join(parts, ".")
+	return strings.Map(func(r rune) rune {
+		switch r {
+		case ':':
+			fallthrough
+		case ' ':
+			return '_'
+		default:
+			return r
+		}
+	}, joined)
+}
+
+// Does a non-blocking push to the metrics queue
+func (s *StatsdSink) pushMetric(m string) {
+	select {
+	case s.metricQueue <- m:
+	default:
+	}
+}
+
+// Flushes metrics
+func (s *StatsdSink) flushMetrics() {
+	var sock net.Conn
+	var err error
+	var wait <-chan time.Time
+	ticker := time.NewTicker(flushInterval)
+	defer ticker.Stop()
+
+CONNECT:
+	// Create a buffer
+	buf := bytes.NewBuffer(nil)
+
+	// Attempt to connect
+	sock, err = net.Dial("udp", s.addr)
+	if err != nil {
+		log.Printf("[ERR] Error connecting to statsd! Err: %s", err)
+		goto WAIT
+	}
+
+	for {
+		select {
+		case metric, ok := <-s.metricQueue:
+			// Get a metric from the queue
+			if !ok {
+				goto QUIT
+			}
+
+			// Check if this would overflow the packet size
+			if len(metric)+buf.Len() > statsdMaxLen {
+				_, err := sock.Write(buf.Bytes())
+				buf.Reset()
+				if err != nil {
+					log.Printf("[ERR] Error writing to statsd! Err: %s", err)
+					goto WAIT
+				}
+			}
+
+			// Append to the buffer
+			buf.WriteString(metric)
+
+		case <-ticker.C:
+			if buf.Len() == 0 {
+				continue
+			}
+
+			_, err := sock.Write(buf.Bytes())
+			buf.Reset()
+			if err != nil {
+				log.Printf("[ERR] Error flushing to statsd! Err: %s", err)
+				goto WAIT
+			}
+		}
+	}
+
+WAIT:
+	// Wait for a while
+	wait = time.After(time.Duration(5) * time.Second)
+	for {
+		select {
+		// Dequeue the messages to avoid backlog
+		case _, ok := <-s.metricQueue:
+			if !ok {
+				goto QUIT
+			}
+		case <-wait:
+			goto CONNECT
+		}
+	}
+QUIT:
+	s.metricQueue = nil
+}
diff --git a/Godeps/_workspace/src/github.com/armon/go-metrics/statsite.go b/Godeps/_workspace/src/github.com/armon/go-metrics/statsite.go
new file mode 100644
index 00000000000..68730139a73
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/armon/go-metrics/statsite.go
@@ -0,0 +1,142 @@
+package metrics
+
+import (
+	"bufio"
+	"fmt"
+	"log"
+	"net"
+	"strings"
+	"time"
+)
+
+const (
+	// We force flush the statsite metrics after this period of
+	// inactivity. Prevents stats from getting stuck in a buffer
+	// forever.
+	flushInterval = 100 * time.Millisecond
+)
+
+// StatsiteSink provides a MetricSink that can be used with a
+// statsite metrics server
+type StatsiteSink struct {
+	addr        string
+	metricQueue chan string
+}
+
+// NewStatsiteSink is used to create a new StatsiteSink
+func NewStatsiteSink(addr string) (*StatsiteSink, error) {
+	s := &StatsiteSink{
+		addr:        addr,
+		metricQueue: make(chan string, 4096),
+	}
+	go s.flushMetrics()
+	return s, nil
+}
+
+// Close is used to stop flushing to statsite
+func (s *StatsiteSink) Shutdown() {
+	close(s.metricQueue)
+}
+
+func (s *StatsiteSink) SetGauge(key []string, val float32) {
+	flatKey := s.flattenKey(key)
+	s.pushMetric(fmt.Sprintf("%s:%f|g\n", flatKey, val))
+}
+
+func (s *StatsiteSink) EmitKey(key []string, val float32) {
+	flatKey := s.flattenKey(key)
+	s.pushMetric(fmt.Sprintf("%s:%f|kv\n", flatKey, val))
+}
+
+func (s *StatsiteSink) IncrCounter(key []string, val float32) {
+	flatKey := s.flattenKey(key)
+	s.pushMetric(fmt.Sprintf("%s:%f|c\n", flatKey, val))
+}
+
+func (s *StatsiteSink) AddSample(key []string, val float32) {
+	flatKey := s.flattenKey(key)
+	s.pushMetric(fmt.Sprintf("%s:%f|ms\n", flatKey, val))
+}
+
+// Flattens the key for formatting, removes spaces
+func (s *StatsiteSink) flattenKey(parts []string) string {
+	joined := strings.Join(parts, ".")
+	return strings.Map(func(r rune) rune {
+		switch r {
+		case ':':
+			fallthrough
+		case ' ':
+			return '_'
+		default:
+			return r
+		}
+	}, joined)
+}
+
+// Does a non-blocking push to the metrics queue
+func (s *StatsiteSink) pushMetric(m string) {
+	select {
+	case s.metricQueue <- m:
+	default:
+	}
+}
+
+// Flushes metrics
+func (s *StatsiteSink) flushMetrics() {
+	var sock net.Conn
+	var err error
+	var wait <-chan time.Time
+	var buffered *bufio.Writer
+	ticker := time.NewTicker(flushInterval)
+	defer ticker.Stop()
+
+CONNECT:
+	// Attempt to connect
+	sock, err = net.Dial("tcp", s.addr)
+	if err != nil {
+		log.Printf("[ERR] Error connecting to statsite! Err: %s", err)
+		goto WAIT
+	}
+
+	// Create a buffered writer
+	buffered = bufio.NewWriter(sock)
+
+	for {
+		select {
+		case metric, ok := <-s.metricQueue:
+			// Get a metric from the queue
+			if !ok {
+				goto QUIT
+			}
+
+			// Try to send to statsite
+			_, err := buffered.Write([]byte(metric))
+			if err != nil {
+				log.Printf("[ERR] Error writing to statsite! Err: %s", err)
+				goto WAIT
+			}
+		case <-ticker.C:
+			if err := buffered.Flush(); err != nil {
+				log.Printf("[ERR] Error flushing to statsite! Err: %s", err)
+				goto WAIT
+			}
+		}
+	}
+
+WAIT:
+	// Wait for a while
+	wait = time.After(time.Duration(5) * time.Second)
+	for {
+		select {
+		// Dequeue the messages to avoid backlog
+		case _, ok := <-s.metricQueue:
+			if !ok {
+				goto QUIT
+			}
+		case <-wait:
+			goto CONNECT
+		}
+	}
+QUIT:
+	s.metricQueue = nil
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/LICENSE b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/LICENSE
new file mode 100644
index 00000000000..ccae99f6a9a
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/LICENSE
@@ -0,0 +1,25 @@
+Copyright (c) 2012, 2013 Ugorji Nwoke.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+* Neither the name of the author nor the names of its contributors may be used
+  to endorse or promote products derived from this software
+  without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/0doc.go b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/0doc.go
new file mode 100644
index 00000000000..c14d810a73e
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/0doc.go
@@ -0,0 +1,143 @@
+// Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
+// Use of this source code is governed by a BSD-style license found in the LICENSE file.
+
+/*
+High Performance, Feature-Rich Idiomatic Go encoding library for msgpack and binc .
+
+Supported Serialization formats are:
+
+  - msgpack: [https://github.com/msgpack/msgpack]
+  - binc: [http://github.com/ugorji/binc]
+
+To install:
+
+    go get github.com/ugorji/go/codec
+
+The idiomatic Go support is as seen in other encoding packages in
+the standard library (ie json, xml, gob, etc).
+
+Rich Feature Set includes:
+
+  - Simple but extremely powerful and feature-rich API
+  - Very High Performance.
+    Our extensive benchmarks show us outperforming Gob, Json and Bson by 2-4X.
+    This was achieved by taking extreme care on:
+      - managing allocation
+      - function frame size (important due to Go's use of split stacks),
+      - reflection use (and by-passing reflection for common types)
+      - recursion implications
+      - zero-copy mode (encoding/decoding to byte slice without using temp buffers)
+  - Correct.
+    Care was taken to precisely handle corner cases like:
+      overflows, nil maps and slices, nil value in stream, etc.
+  - Efficient zero-copying into temporary byte buffers
+    when encoding into or decoding from a byte slice.
+  - Standard field renaming via tags
+  - Encoding from any value
+    (struct, slice, map, primitives, pointers, interface{}, etc)
+  - Decoding into pointer to any non-nil typed value
+    (struct, slice, map, int, float32, bool, string, reflect.Value, etc)
+  - Supports extension functions to handle the encode/decode of custom types
+  - Support Go 1.2 encoding.BinaryMarshaler/BinaryUnmarshaler
+  - Schema-less decoding
+    (decode into a pointer to a nil interface{} as opposed to a typed non-nil value).
+    Includes Options to configure what specific map or slice type to use
+    when decoding an encoded list or map into a nil interface{}
+  - Provides a RPC Server and Client Codec for net/rpc communication protocol.
+  - Msgpack Specific:
+      - Provides extension functions to handle spec-defined extensions (binary, timestamp)
+      - Options to resolve ambiguities in handling raw bytes (as string or []byte)
+        during schema-less decoding (decoding into a nil interface{})
+      - RPC Server/Client Codec for msgpack-rpc protocol defined at:
+        https://github.com/msgpack-rpc/msgpack-rpc/blob/master/spec.md
+  - Fast Paths for some container types:
+    For some container types, we circumvent reflection and its associated overhead
+    and allocation costs, and encode/decode directly. These types are:
+	    []interface{}
+	    []int
+	    []string
+	    map[interface{}]interface{}
+	    map[int]interface{}
+	    map[string]interface{}
+
+Extension Support
+
+Users can register a function to handle the encoding or decoding of
+their custom types.
+
+There are no restrictions on what the custom type can be. Some examples:
+
+    type BisSet   []int
+    type BitSet64 uint64
+    type UUID     string
+    type MyStructWithUnexportedFields struct { a int; b bool; c []int; }
+    type GifImage struct { ... }
+
+As an illustration, MyStructWithUnexportedFields would normally be
+encoded as an empty map because it has no exported fields, while UUID
+would be encoded as a string. However, with extension support, you can
+encode any of these however you like.
+
+RPC
+
+RPC Client and Server Codecs are implemented, so the codecs can be used
+with the standard net/rpc package.
+
+Usage
+
+Typical usage model:
+
+    // create and configure Handle
+    var (
+      bh codec.BincHandle
+      mh codec.MsgpackHandle
+    )
+
+    mh.MapType = reflect.TypeOf(map[string]interface{}(nil))
+
+    // configure extensions
+    // e.g. for msgpack, define functions and enable Time support for tag 1
+    // mh.AddExt(reflect.TypeOf(time.Time{}), 1, myMsgpackTimeEncodeExtFn, myMsgpackTimeDecodeExtFn)
+
+    // create and use decoder/encoder
+    var (
+      r io.Reader
+      w io.Writer
+      b []byte
+      h = &bh // or mh to use msgpack
+    )
+
+    dec = codec.NewDecoder(r, h)
+    dec = codec.NewDecoderBytes(b, h)
+    err = dec.Decode(&v)
+
+    enc = codec.NewEncoder(w, h)
+    enc = codec.NewEncoderBytes(&b, h)
+    err = enc.Encode(v)
+
+    //RPC Server
+    go func() {
+        for {
+            conn, err := listener.Accept()
+            rpcCodec := codec.GoRpc.ServerCodec(conn, h)
+            //OR rpcCodec := codec.MsgpackSpecRpc.ServerCodec(conn, h)
+            rpc.ServeCodec(rpcCodec)
+        }
+    }()
+
+    //RPC Communication (client side)
+    conn, err = net.Dial("tcp", "localhost:5555")
+    rpcCodec := codec.GoRpc.ClientCodec(conn, h)
+    //OR rpcCodec := codec.MsgpackSpecRpc.ClientCodec(conn, h)
+    client := rpc.NewClientWithCodec(rpcCodec)
+
+Representative Benchmark Results
+
+Run the benchmark suite using:
+   go test -bi -bench=. -benchmem
+
+To run full benchmark suite (including against vmsgpack and bson),
+see notes in ext_dep_test.go
+
+*/
+package codec
diff --git a/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/README.md b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/README.md
new file mode 100644
index 00000000000..6c95d1bfd20
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/README.md
@@ -0,0 +1,174 @@
+# Codec
+
+High Performance and Feature-Rich Idiomatic Go Library providing
+encode/decode support for different serialization formats.
+
+Supported Serialization formats are:
+
+  - msgpack: [https://github.com/msgpack/msgpack]
+  - binc: [http://github.com/ugorji/binc]
+
+To install:
+
+    go get github.com/ugorji/go/codec
+
+Online documentation: [http://godoc.org/github.com/ugorji/go/codec]
+
+The idiomatic Go support is as seen in other encoding packages in
+the standard library (ie json, xml, gob, etc).
+
+Rich Feature Set includes:
+
+  - Simple but extremely powerful and feature-rich API
+  - Very High Performance.   
+    Our extensive benchmarks show us outperforming Gob, Json and Bson by 2-4X.
+    This was achieved by taking extreme care on:
+      - managing allocation
+      - function frame size (important due to Go's use of split stacks),
+      - reflection use (and by-passing reflection for common types)
+      - recursion implications
+      - zero-copy mode (encoding/decoding to byte slice without using temp buffers)
+  - Correct.  
+    Care was taken to precisely handle corner cases like: 
+      overflows, nil maps and slices, nil value in stream, etc.
+  - Efficient zero-copying into temporary byte buffers  
+    when encoding into or decoding from a byte slice.
+  - Standard field renaming via tags
+  - Encoding from any value  
+    (struct, slice, map, primitives, pointers, interface{}, etc)
+  - Decoding into pointer to any non-nil typed value  
+    (struct, slice, map, int, float32, bool, string, reflect.Value, etc)
+  - Supports extension functions to handle the encode/decode of custom types
+  - Support Go 1.2 encoding.BinaryMarshaler/BinaryUnmarshaler
+  - Schema-less decoding  
+    (decode into a pointer to a nil interface{} as opposed to a typed non-nil value).  
+    Includes Options to configure what specific map or slice type to use 
+    when decoding an encoded list or map into a nil interface{}
+  - Provides a RPC Server and Client Codec for net/rpc communication protocol.
+  - Msgpack Specific:
+      - Provides extension functions to handle spec-defined extensions (binary, timestamp)
+      - Options to resolve ambiguities in handling raw bytes (as string or []byte)  
+        during schema-less decoding (decoding into a nil interface{})
+      - RPC Server/Client Codec for msgpack-rpc protocol defined at: 
+        https://github.com/msgpack-rpc/msgpack-rpc/blob/master/spec.md
+  - Fast Paths for some container types:  
+    For some container types, we circumvent reflection and its associated overhead
+    and allocation costs, and encode/decode directly. These types are:  
+	    []interface{}
+	    []int
+	    []string
+	    map[interface{}]interface{}
+	    map[int]interface{}
+	    map[string]interface{}
+
+## Extension Support
+
+Users can register a function to handle the encoding or decoding of
+their custom types.
+
+There are no restrictions on what the custom type can be. Some examples:
+
+    type BisSet   []int
+    type BitSet64 uint64
+    type UUID     string
+    type MyStructWithUnexportedFields struct { a int; b bool; c []int; }
+    type GifImage struct { ... }
+
+As an illustration, MyStructWithUnexportedFields would normally be
+encoded as an empty map because it has no exported fields, while UUID
+would be encoded as a string. However, with extension support, you can
+encode any of these however you like.
+
+## RPC
+
+RPC Client and Server Codecs are implemented, so the codecs can be used
+with the standard net/rpc package.
+
+## Usage
+
+Typical usage model:
+
+    // create and configure Handle
+    var (
+      bh codec.BincHandle
+      mh codec.MsgpackHandle
+    )
+
+    mh.MapType = reflect.TypeOf(map[string]interface{}(nil))
+    
+    // configure extensions
+    // e.g. for msgpack, define functions and enable Time support for tag 1
+    // mh.AddExt(reflect.TypeOf(time.Time{}), 1, myMsgpackTimeEncodeExtFn, myMsgpackTimeDecodeExtFn)
+
+    // create and use decoder/encoder
+    var (
+      r io.Reader
+      w io.Writer
+      b []byte
+      h = &bh // or mh to use msgpack
+    )
+    
+    dec = codec.NewDecoder(r, h)
+    dec = codec.NewDecoderBytes(b, h)
+    err = dec.Decode(&v) 
+    
+    enc = codec.NewEncoder(w, h)
+    enc = codec.NewEncoderBytes(&b, h)
+    err = enc.Encode(v)
+    
+    //RPC Server
+    go func() {
+        for {
+            conn, err := listener.Accept()
+            rpcCodec := codec.GoRpc.ServerCodec(conn, h)
+            //OR rpcCodec := codec.MsgpackSpecRpc.ServerCodec(conn, h)
+            rpc.ServeCodec(rpcCodec)
+        }
+    }()
+
+    //RPC Communication (client side)
+    conn, err = net.Dial("tcp", "localhost:5555")
+    rpcCodec := codec.GoRpc.ClientCodec(conn, h)
+    //OR rpcCodec := codec.MsgpackSpecRpc.ClientCodec(conn, h)
+    client := rpc.NewClientWithCodec(rpcCodec)
+
+## Representative Benchmark Results
+
+A sample run of benchmark using "go test -bi -bench=. -benchmem":
+
+    /proc/cpuinfo: Intel(R) Core(TM) i7-2630QM CPU @ 2.00GHz (HT)
+    
+    ..............................................
+    BENCHMARK INIT: 2013-10-16 11:02:50.345970786 -0400 EDT
+    To run full benchmark comparing encodings (MsgPack, Binc, JSON, GOB, etc), use: "go test -bench=."
+    Benchmark: 
+    	Struct recursive Depth:             1
+    	ApproxDeepSize Of benchmark Struct: 4694 bytes
+    Benchmark One-Pass Run:
+    	 v-msgpack: len: 1600 bytes
+    	      bson: len: 3025 bytes
+    	   msgpack: len: 1560 bytes
+    	      binc: len: 1187 bytes
+    	       gob: len: 1972 bytes
+    	      json: len: 2538 bytes
+    ..............................................
+    PASS
+    Benchmark__Msgpack____Encode	   50000	     54359 ns/op	   14953 B/op	      83 allocs/op
+    Benchmark__Msgpack____Decode	   10000	    106531 ns/op	   14990 B/op	     410 allocs/op
+    Benchmark__Binc_NoSym_Encode	   50000	     53956 ns/op	   14966 B/op	      83 allocs/op
+    Benchmark__Binc_NoSym_Decode	   10000	    103751 ns/op	   14529 B/op	     386 allocs/op
+    Benchmark__Binc_Sym___Encode	   50000	     65961 ns/op	   17130 B/op	      88 allocs/op
+    Benchmark__Binc_Sym___Decode	   10000	    106310 ns/op	   15857 B/op	     287 allocs/op
+    Benchmark__Gob________Encode	   10000	    135944 ns/op	   21189 B/op	     237 allocs/op
+    Benchmark__Gob________Decode	    5000	    405390 ns/op	   83460 B/op	    1841 allocs/op
+    Benchmark__Json_______Encode	   20000	     79412 ns/op	   13874 B/op	     102 allocs/op
+    Benchmark__Json_______Decode	   10000	    247979 ns/op	   14202 B/op	     493 allocs/op
+    Benchmark__Bson_______Encode	   10000	    121762 ns/op	   27814 B/op	     514 allocs/op
+    Benchmark__Bson_______Decode	   10000	    162126 ns/op	   16514 B/op	     789 allocs/op
+    Benchmark__VMsgpack___Encode	   50000	     69155 ns/op	   12370 B/op	     344 allocs/op
+    Benchmark__VMsgpack___Decode	   10000	    151609 ns/op	   20307 B/op	     571 allocs/op
+    ok  	ugorji.net/codec	30.827s
+
+To run full benchmark suite (including against vmsgpack and bson), 
+see notes in ext\_dep\_test.go
+
diff --git a/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/binc.go b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/binc.go
new file mode 100644
index 00000000000..2bb5e8fee85
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/binc.go
@@ -0,0 +1,786 @@
+// Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
+// Use of this source code is governed by a BSD-style license found in the LICENSE file.
+
+package codec
+
+import (
+	"math"
+	// "reflect"
+	// "sync/atomic"
+	"time"
+	//"fmt"
+)
+
+const bincDoPrune = true // No longer needed. Needed before as C lib did not support pruning.
+
+//var _ = fmt.Printf
+
+// vd as low 4 bits (there are 16 slots)
+const (
+	bincVdSpecial byte = iota
+	bincVdPosInt
+	bincVdNegInt
+	bincVdFloat
+
+	bincVdString
+	bincVdByteArray
+	bincVdArray
+	bincVdMap
+
+	bincVdTimestamp
+	bincVdSmallInt
+	bincVdUnicodeOther
+	bincVdSymbol
+
+	bincVdDecimal
+	_               // open slot
+	_               // open slot
+	bincVdCustomExt = 0x0f
+)
+
+const (
+	bincSpNil byte = iota
+	bincSpFalse
+	bincSpTrue
+	bincSpNan
+	bincSpPosInf
+	bincSpNegInf
+	bincSpZeroFloat
+	bincSpZero
+	bincSpNegOne
+)
+
+const (
+	bincFlBin16 byte = iota
+	bincFlBin32
+	_ // bincFlBin32e
+	bincFlBin64
+	_ // bincFlBin64e
+	// others not currently supported
+)
+
+type bincEncDriver struct {
+	w encWriter
+	m map[string]uint16 // symbols
+	s uint32            // symbols sequencer
+	b [8]byte
+}
+
+func (e *bincEncDriver) isBuiltinType(rt uintptr) bool {
+	return rt == timeTypId
+}
+
+func (e *bincEncDriver) encodeBuiltin(rt uintptr, v interface{}) {
+	switch rt {
+	case timeTypId:
+		bs := encodeTime(v.(time.Time))
+		e.w.writen1(bincVdTimestamp<<4 | uint8(len(bs)))
+		e.w.writeb(bs)
+	}
+}
+
+func (e *bincEncDriver) encodeNil() {
+	e.w.writen1(bincVdSpecial<<4 | bincSpNil)
+}
+
+func (e *bincEncDriver) encodeBool(b bool) {
+	if b {
+		e.w.writen1(bincVdSpecial<<4 | bincSpTrue)
+	} else {
+		e.w.writen1(bincVdSpecial<<4 | bincSpFalse)
+	}
+}
+
+func (e *bincEncDriver) encodeFloat32(f float32) {
+	if f == 0 {
+		e.w.writen1(bincVdSpecial<<4 | bincSpZeroFloat)
+		return
+	}
+	e.w.writen1(bincVdFloat<<4 | bincFlBin32)
+	e.w.writeUint32(math.Float32bits(f))
+}
+
+func (e *bincEncDriver) encodeFloat64(f float64) {
+	if f == 0 {
+		e.w.writen1(bincVdSpecial<<4 | bincSpZeroFloat)
+		return
+	}
+	bigen.PutUint64(e.b[:], math.Float64bits(f))
+	if bincDoPrune {
+		i := 7
+		for ; i >= 0 && (e.b[i] == 0); i-- {
+		}
+		i++
+		if i <= 6 {
+			e.w.writen1(bincVdFloat<<4 | 0x8 | bincFlBin64)
+			e.w.writen1(byte(i))
+			e.w.writeb(e.b[:i])
+			return
+		}
+	}
+	e.w.writen1(bincVdFloat<<4 | bincFlBin64)
+	e.w.writeb(e.b[:])
+}
+
+func (e *bincEncDriver) encIntegerPrune(bd byte, pos bool, v uint64, lim uint8) {
+	if lim == 4 {
+		bigen.PutUint32(e.b[:lim], uint32(v))
+	} else {
+		bigen.PutUint64(e.b[:lim], v)
+	}
+	if bincDoPrune {
+		i := pruneSignExt(e.b[:lim], pos)
+		e.w.writen1(bd | lim - 1 - byte(i))
+		e.w.writeb(e.b[i:lim])
+	} else {
+		e.w.writen1(bd | lim - 1)
+		e.w.writeb(e.b[:lim])
+	}
+}
+
+func (e *bincEncDriver) encodeInt(v int64) {
+	const nbd byte = bincVdNegInt << 4
+	switch {
+	case v >= 0:
+		e.encUint(bincVdPosInt<<4, true, uint64(v))
+	case v == -1:
+		e.w.writen1(bincVdSpecial<<4 | bincSpNegOne)
+	default:
+		e.encUint(bincVdNegInt<<4, false, uint64(-v))
+	}
+}
+
+func (e *bincEncDriver) encodeUint(v uint64) {
+	e.encUint(bincVdPosInt<<4, true, v)
+}
+
+func (e *bincEncDriver) encUint(bd byte, pos bool, v uint64) {
+	switch {
+	case v == 0:
+		e.w.writen1(bincVdSpecial<<4 | bincSpZero)
+	case pos && v >= 1 && v <= 16:
+		e.w.writen1(bincVdSmallInt<<4 | byte(v-1))
+	case v <= math.MaxUint8:
+		e.w.writen2(bd|0x0, byte(v))
+	case v <= math.MaxUint16:
+		e.w.writen1(bd | 0x01)
+		e.w.writeUint16(uint16(v))
+	case v <= math.MaxUint32:
+		e.encIntegerPrune(bd, pos, v, 4)
+	default:
+		e.encIntegerPrune(bd, pos, v, 8)
+	}
+}
+
+func (e *bincEncDriver) encodeExtPreamble(xtag byte, length int) {
+	e.encLen(bincVdCustomExt<<4, uint64(length))
+	e.w.writen1(xtag)
+}
+
+func (e *bincEncDriver) encodeArrayPreamble(length int) {
+	e.encLen(bincVdArray<<4, uint64(length))
+}
+
+func (e *bincEncDriver) encodeMapPreamble(length int) {
+	e.encLen(bincVdMap<<4, uint64(length))
+}
+
+func (e *bincEncDriver) encodeString(c charEncoding, v string) {
+	l := uint64(len(v))
+	e.encBytesLen(c, l)
+	if l > 0 {
+		e.w.writestr(v)
+	}
+}
+
+func (e *bincEncDriver) encodeSymbol(v string) {
+	// if WriteSymbolsNoRefs {
+	// 	e.encodeString(c_UTF8, v)
+	// 	return
+	// }
+
+	//symbols only offer benefit when string length > 1.
+	//This is because strings with length 1 take only 2 bytes to store
+	//(bd with embedded length, and single byte for string val).
+
+	l := len(v)
+	switch l {
+	case 0:
+		e.encBytesLen(c_UTF8, 0)
+		return
+	case 1:
+		e.encBytesLen(c_UTF8, 1)
+		e.w.writen1(v[0])
+		return
+	}
+	if e.m == nil {
+		e.m = make(map[string]uint16, 16)
+	}
+	ui, ok := e.m[v]
+	if ok {
+		if ui <= math.MaxUint8 {
+			e.w.writen2(bincVdSymbol<<4, byte(ui))
+		} else {
+			e.w.writen1(bincVdSymbol<<4 | 0x8)
+			e.w.writeUint16(ui)
+		}
+	} else {
+		e.s++
+		ui = uint16(e.s)
+		//ui = uint16(atomic.AddUint32(&e.s, 1))
+		e.m[v] = ui
+		var lenprec uint8
+		switch {
+		case l <= math.MaxUint8:
+			// lenprec = 0
+		case l <= math.MaxUint16:
+			lenprec = 1
+		case int64(l) <= math.MaxUint32:
+			lenprec = 2
+		default:
+			lenprec = 3
+		}
+		if ui <= math.MaxUint8 {
+			e.w.writen2(bincVdSymbol<<4|0x0|0x4|lenprec, byte(ui))
+		} else {
+			e.w.writen1(bincVdSymbol<<4 | 0x8 | 0x4 | lenprec)
+			e.w.writeUint16(ui)
+		}
+		switch lenprec {
+		case 0:
+			e.w.writen1(byte(l))
+		case 1:
+			e.w.writeUint16(uint16(l))
+		case 2:
+			e.w.writeUint32(uint32(l))
+		default:
+			e.w.writeUint64(uint64(l))
+		}
+		e.w.writestr(v)
+	}
+}
+
+func (e *bincEncDriver) encodeStringBytes(c charEncoding, v []byte) {
+	l := uint64(len(v))
+	e.encBytesLen(c, l)
+	if l > 0 {
+		e.w.writeb(v)
+	}
+}
+
+func (e *bincEncDriver) encBytesLen(c charEncoding, length uint64) {
+	//TODO: support bincUnicodeOther (for now, just use string or bytearray)
+	if c == c_RAW {
+		e.encLen(bincVdByteArray<<4, length)
+	} else {
+		e.encLen(bincVdString<<4, length)
+	}
+}
+
+func (e *bincEncDriver) encLen(bd byte, l uint64) {
+	if l < 12 {
+		e.w.writen1(bd | uint8(l+4))
+	} else {
+		e.encLenNumber(bd, l)
+	}
+}
+
+func (e *bincEncDriver) encLenNumber(bd byte, v uint64) {
+	switch {
+	case v <= math.MaxUint8:
+		e.w.writen2(bd, byte(v))
+	case v <= math.MaxUint16:
+		e.w.writen1(bd | 0x01)
+		e.w.writeUint16(uint16(v))
+	case v <= math.MaxUint32:
+		e.w.writen1(bd | 0x02)
+		e.w.writeUint32(uint32(v))
+	default:
+		e.w.writen1(bd | 0x03)
+		e.w.writeUint64(uint64(v))
+	}
+}
+
+//------------------------------------
+
+type bincDecDriver struct {
+	r      decReader
+	bdRead bool
+	bdType valueType
+	bd     byte
+	vd     byte
+	vs     byte
+	b      [8]byte
+	m      map[uint32]string // symbols (use uint32 as key, as map optimizes for it)
+}
+
+func (d *bincDecDriver) initReadNext() {
+	if d.bdRead {
+		return
+	}
+	d.bd = d.r.readn1()
+	d.vd = d.bd >> 4
+	d.vs = d.bd & 0x0f
+	d.bdRead = true
+	d.bdType = valueTypeUnset
+}
+
+func (d *bincDecDriver) currentEncodedType() valueType {
+	if d.bdType == valueTypeUnset {
+		switch d.vd {
+		case bincVdSpecial:
+			switch d.vs {
+			case bincSpNil:
+				d.bdType = valueTypeNil
+			case bincSpFalse, bincSpTrue:
+				d.bdType = valueTypeBool
+			case bincSpNan, bincSpNegInf, bincSpPosInf, bincSpZeroFloat:
+				d.bdType = valueTypeFloat
+			case bincSpZero:
+				d.bdType = valueTypeUint
+			case bincSpNegOne:
+				d.bdType = valueTypeInt
+			default:
+				decErr("currentEncodedType: Unrecognized special value 0x%x", d.vs)
+			}
+		case bincVdSmallInt:
+			d.bdType = valueTypeUint
+		case bincVdPosInt:
+			d.bdType = valueTypeUint
+		case bincVdNegInt:
+			d.bdType = valueTypeInt
+		case bincVdFloat:
+			d.bdType = valueTypeFloat
+		case bincVdString:
+			d.bdType = valueTypeString
+		case bincVdSymbol:
+			d.bdType = valueTypeSymbol
+		case bincVdByteArray:
+			d.bdType = valueTypeBytes
+		case bincVdTimestamp:
+			d.bdType = valueTypeTimestamp
+		case bincVdCustomExt:
+			d.bdType = valueTypeExt
+		case bincVdArray:
+			d.bdType = valueTypeArray
+		case bincVdMap:
+			d.bdType = valueTypeMap
+		default:
+			decErr("currentEncodedType: Unrecognized d.vd: 0x%x", d.vd)
+		}
+	}
+	return d.bdType
+}
+
+func (d *bincDecDriver) tryDecodeAsNil() bool {
+	if d.bd == bincVdSpecial<<4|bincSpNil {
+		d.bdRead = false
+		return true
+	}
+	return false
+}
+
+func (d *bincDecDriver) isBuiltinType(rt uintptr) bool {
+	return rt == timeTypId
+}
+
+func (d *bincDecDriver) decodeBuiltin(rt uintptr, v interface{}) {
+	switch rt {
+	case timeTypId:
+		if d.vd != bincVdTimestamp {
+			decErr("Invalid d.vd. Expecting 0x%x. Received: 0x%x", bincVdTimestamp, d.vd)
+		}
+		tt, err := decodeTime(d.r.readn(int(d.vs)))
+		if err != nil {
+			panic(err)
+		}
+		var vt *time.Time = v.(*time.Time)
+		*vt = tt
+		d.bdRead = false
+	}
+}
+
+func (d *bincDecDriver) decFloatPre(vs, defaultLen byte) {
+	if vs&0x8 == 0 {
+		d.r.readb(d.b[0:defaultLen])
+	} else {
+		l := d.r.readn1()
+		if l > 8 {
+			decErr("At most 8 bytes used to represent float. Received: %v bytes", l)
+		}
+		for i := l; i < 8; i++ {
+			d.b[i] = 0
+		}
+		d.r.readb(d.b[0:l])
+	}
+}
+
+func (d *bincDecDriver) decFloat() (f float64) {
+	//if true { f = math.Float64frombits(d.r.readUint64()); break; }
+	switch vs := d.vs; vs & 0x7 {
+	case bincFlBin32:
+		d.decFloatPre(vs, 4)
+		f = float64(math.Float32frombits(bigen.Uint32(d.b[0:4])))
+	case bincFlBin64:
+		d.decFloatPre(vs, 8)
+		f = math.Float64frombits(bigen.Uint64(d.b[0:8]))
+	default:
+		decErr("only float32 and float64 are supported. d.vd: 0x%x, d.vs: 0x%x", d.vd, d.vs)
+	}
+	return
+}
+
+func (d *bincDecDriver) decUint() (v uint64) {
+	// need to inline the code (interface conversion and type assertion expensive)
+	switch d.vs {
+	case 0:
+		v = uint64(d.r.readn1())
+	case 1:
+		d.r.readb(d.b[6:])
+		v = uint64(bigen.Uint16(d.b[6:]))
+	case 2:
+		d.b[4] = 0
+		d.r.readb(d.b[5:])
+		v = uint64(bigen.Uint32(d.b[4:]))
+	case 3:
+		d.r.readb(d.b[4:])
+		v = uint64(bigen.Uint32(d.b[4:]))
+	case 4, 5, 6:
+		lim := int(7 - d.vs)
+		d.r.readb(d.b[lim:])
+		for i := 0; i < lim; i++ {
+			d.b[i] = 0
+		}
+		v = uint64(bigen.Uint64(d.b[:]))
+	case 7:
+		d.r.readb(d.b[:])
+		v = uint64(bigen.Uint64(d.b[:]))
+	default:
+		decErr("unsigned integers with greater than 64 bits of precision not supported")
+	}
+	return
+}
+
+func (d *bincDecDriver) decIntAny() (ui uint64, i int64, neg bool) {
+	switch d.vd {
+	case bincVdPosInt:
+		ui = d.decUint()
+		i = int64(ui)
+	case bincVdNegInt:
+		ui = d.decUint()
+		i = -(int64(ui))
+		neg = true
+	case bincVdSmallInt:
+		i = int64(d.vs) + 1
+		ui = uint64(d.vs) + 1
+	case bincVdSpecial:
+		switch d.vs {
+		case bincSpZero:
+			//i = 0
+		case bincSpNegOne:
+			neg = true
+			ui = 1
+			i = -1
+		default:
+			decErr("numeric decode fails for special value: d.vs: 0x%x", d.vs)
+		}
+	default:
+		decErr("number can only be decoded from uint or int values. d.bd: 0x%x, d.vd: 0x%x", d.bd, d.vd)
+	}
+	return
+}
+
+func (d *bincDecDriver) decodeInt(bitsize uint8) (i int64) {
+	_, i, _ = d.decIntAny()
+	checkOverflow(0, i, bitsize)
+	d.bdRead = false
+	return
+}
+
+func (d *bincDecDriver) decodeUint(bitsize uint8) (ui uint64) {
+	ui, i, neg := d.decIntAny()
+	if neg {
+		decErr("Assigning negative signed value: %v, to unsigned type", i)
+	}
+	checkOverflow(ui, 0, bitsize)
+	d.bdRead = false
+	return
+}
+
+func (d *bincDecDriver) decodeFloat(chkOverflow32 bool) (f float64) {
+	switch d.vd {
+	case bincVdSpecial:
+		d.bdRead = false
+		switch d.vs {
+		case bincSpNan:
+			return math.NaN()
+		case bincSpPosInf:
+			return math.Inf(1)
+		case bincSpZeroFloat, bincSpZero:
+			return
+		case bincSpNegInf:
+			return math.Inf(-1)
+		default:
+			decErr("Invalid d.vs decoding float where d.vd=bincVdSpecial: %v", d.vs)
+		}
+	case bincVdFloat:
+		f = d.decFloat()
+	default:
+		_, i, _ := d.decIntAny()
+		f = float64(i)
+	}
+	checkOverflowFloat32(f, chkOverflow32)
+	d.bdRead = false
+	return
+}
+
+// bool can be decoded from bool only (single byte).
+func (d *bincDecDriver) decodeBool() (b bool) {
+	switch d.bd {
+	case (bincVdSpecial | bincSpFalse):
+		// b = false
+	case (bincVdSpecial | bincSpTrue):
+		b = true
+	default:
+		decErr("Invalid single-byte value for bool: %s: %x", msgBadDesc, d.bd)
+	}
+	d.bdRead = false
+	return
+}
+
+func (d *bincDecDriver) readMapLen() (length int) {
+	if d.vd != bincVdMap {
+		decErr("Invalid d.vd for map. Expecting 0x%x. Got: 0x%x", bincVdMap, d.vd)
+	}
+	length = d.decLen()
+	d.bdRead = false
+	return
+}
+
+func (d *bincDecDriver) readArrayLen() (length int) {
+	if d.vd != bincVdArray {
+		decErr("Invalid d.vd for array. Expecting 0x%x. Got: 0x%x", bincVdArray, d.vd)
+	}
+	length = d.decLen()
+	d.bdRead = false
+	return
+}
+
+func (d *bincDecDriver) decLen() int {
+	if d.vs <= 3 {
+		return int(d.decUint())
+	}
+	return int(d.vs - 4)
+}
+
+func (d *bincDecDriver) decodeString() (s string) {
+	switch d.vd {
+	case bincVdString, bincVdByteArray:
+		if length := d.decLen(); length > 0 {
+			s = string(d.r.readn(length))
+		}
+	case bincVdSymbol:
+		//from vs: extract numSymbolBytes, containsStringVal, strLenPrecision,
+		//extract symbol
+		//if containsStringVal, read it and put in map
+		//else look in map for string value
+		var symbol uint32
+		vs := d.vs
+		//fmt.Printf(">>>> d.vs: 0b%b, & 0x8: %v, & 0x4: %v\n", d.vs, vs & 0x8, vs & 0x4)
+		if vs&0x8 == 0 {
+			symbol = uint32(d.r.readn1())
+		} else {
+			symbol = uint32(d.r.readUint16())
+		}
+		if d.m == nil {
+			d.m = make(map[uint32]string, 16)
+		}
+
+		if vs&0x4 == 0 {
+			s = d.m[symbol]
+		} else {
+			var slen int
+			switch vs & 0x3 {
+			case 0:
+				slen = int(d.r.readn1())
+			case 1:
+				slen = int(d.r.readUint16())
+			case 2:
+				slen = int(d.r.readUint32())
+			case 3:
+				slen = int(d.r.readUint64())
+			}
+			s = string(d.r.readn(slen))
+			d.m[symbol] = s
+		}
+	default:
+		decErr("Invalid d.vd for string. Expecting string:0x%x, bytearray:0x%x or symbol: 0x%x. Got: 0x%x",
+			bincVdString, bincVdByteArray, bincVdSymbol, d.vd)
+	}
+	d.bdRead = false
+	return
+}
+
+func (d *bincDecDriver) decodeBytes(bs []byte) (bsOut []byte, changed bool) {
+	var clen int
+	switch d.vd {
+	case bincVdString, bincVdByteArray:
+		clen = d.decLen()
+	default:
+		decErr("Invalid d.vd for bytes. Expecting string:0x%x or bytearray:0x%x. Got: 0x%x",
+			bincVdString, bincVdByteArray, d.vd)
+	}
+	if clen > 0 {
+		// if no contents in stream, don't update the passed byteslice
+		if len(bs) != clen {
+			if len(bs) > clen {
+				bs = bs[:clen]
+			} else {
+				bs = make([]byte, clen)
+			}
+			bsOut = bs
+			changed = true
+		}
+		d.r.readb(bs)
+	}
+	d.bdRead = false
+	return
+}
+
+func (d *bincDecDriver) decodeExt(verifyTag bool, tag byte) (xtag byte, xbs []byte) {
+	switch d.vd {
+	case bincVdCustomExt:
+		l := d.decLen()
+		xtag = d.r.readn1()
+		if verifyTag && xtag != tag {
+			decErr("Wrong extension tag. Got %b. Expecting: %v", xtag, tag)
+		}
+		xbs = d.r.readn(l)
+	case bincVdByteArray:
+		xbs, _ = d.decodeBytes(nil)
+	default:
+		decErr("Invalid d.vd for extensions (Expecting extensions or byte array). Got: 0x%x", d.vd)
+	}
+	d.bdRead = false
+	return
+}
+
+func (d *bincDecDriver) decodeNaked() (v interface{}, vt valueType, decodeFurther bool) {
+	d.initReadNext()
+
+	switch d.vd {
+	case bincVdSpecial:
+		switch d.vs {
+		case bincSpNil:
+			vt = valueTypeNil
+		case bincSpFalse:
+			vt = valueTypeBool
+			v = false
+		case bincSpTrue:
+			vt = valueTypeBool
+			v = true
+		case bincSpNan:
+			vt = valueTypeFloat
+			v = math.NaN()
+		case bincSpPosInf:
+			vt = valueTypeFloat
+			v = math.Inf(1)
+		case bincSpNegInf:
+			vt = valueTypeFloat
+			v = math.Inf(-1)
+		case bincSpZeroFloat:
+			vt = valueTypeFloat
+			v = float64(0)
+		case bincSpZero:
+			vt = valueTypeUint
+			v = int64(0) // int8(0)
+		case bincSpNegOne:
+			vt = valueTypeInt
+			v = int64(-1) // int8(-1)
+		default:
+			decErr("decodeNaked: Unrecognized special value 0x%x", d.vs)
+		}
+	case bincVdSmallInt:
+		vt = valueTypeUint
+		v = uint64(int8(d.vs)) + 1 // int8(d.vs) + 1
+	case bincVdPosInt:
+		vt = valueTypeUint
+		v = d.decUint()
+	case bincVdNegInt:
+		vt = valueTypeInt
+		v = -(int64(d.decUint()))
+	case bincVdFloat:
+		vt = valueTypeFloat
+		v = d.decFloat()
+	case bincVdSymbol:
+		vt = valueTypeSymbol
+		v = d.decodeString()
+	case bincVdString:
+		vt = valueTypeString
+		v = d.decodeString()
+	case bincVdByteArray:
+		vt = valueTypeBytes
+		v, _ = d.decodeBytes(nil)
+	case bincVdTimestamp:
+		vt = valueTypeTimestamp
+		tt, err := decodeTime(d.r.readn(int(d.vs)))
+		if err != nil {
+			panic(err)
+		}
+		v = tt
+	case bincVdCustomExt:
+		vt = valueTypeExt
+		l := d.decLen()
+		var re RawExt
+		re.Tag = d.r.readn1()
+		re.Data = d.r.readn(l)
+		v = &re
+		vt = valueTypeExt
+	case bincVdArray:
+		vt = valueTypeArray
+		decodeFurther = true
+	case bincVdMap:
+		vt = valueTypeMap
+		decodeFurther = true
+	default:
+		decErr("decodeNaked: Unrecognized d.vd: 0x%x", d.vd)
+	}
+
+	if !decodeFurther {
+		d.bdRead = false
+	}
+	return
+}
+
+//------------------------------------
+
+//BincHandle is a Handle for the Binc Schema-Free Encoding Format
+//defined at https://github.com/ugorji/binc .
+//
+//BincHandle currently supports all Binc features with the following EXCEPTIONS:
+//  - only integers up to 64 bits of precision are supported.
+//    big integers are unsupported.
+//  - Only IEEE 754 binary32 and binary64 floats are supported (ie Go float32 and float64 types).
+//    extended precision and decimal IEEE 754 floats are unsupported.
+//  - Only UTF-8 strings supported.
+//    Unicode_Other Binc types (UTF16, UTF32) are currently unsupported.
+//Note that these EXCEPTIONS are temporary and full support is possible and may happen soon.
+type BincHandle struct {
+	BasicHandle
+}
+
+func (h *BincHandle) newEncDriver(w encWriter) encDriver {
+	return &bincEncDriver{w: w}
+}
+
+func (h *BincHandle) newDecDriver(r decReader) decDriver {
+	return &bincDecDriver{r: r}
+}
+
+func (_ *BincHandle) writeExt() bool {
+	return true
+}
+
+func (h *BincHandle) getBasicHandle() *BasicHandle {
+	return &h.BasicHandle
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/decode.go b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/decode.go
new file mode 100644
index 00000000000..87bef2b9358
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/decode.go
@@ -0,0 +1,1048 @@
+// Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
+// Use of this source code is governed by a BSD-style license found in the LICENSE file.
+
+package codec
+
+import (
+	"io"
+	"reflect"
+	// "runtime/debug"
+)
+
+// Some tagging information for error messages.
+const (
+	msgTagDec             = "codec.decoder"
+	msgBadDesc            = "Unrecognized descriptor byte"
+	msgDecCannotExpandArr = "cannot expand go array from %v to stream length: %v"
+)
+
+// decReader abstracts the reading source, allowing implementations that can
+// read from an io.Reader or directly off a byte slice with zero-copying.
+type decReader interface {
+	readn(n int) []byte
+	readb([]byte)
+	readn1() uint8
+	readUint16() uint16
+	readUint32() uint32
+	readUint64() uint64
+}
+
+type decDriver interface {
+	initReadNext()
+	tryDecodeAsNil() bool
+	currentEncodedType() valueType
+	isBuiltinType(rt uintptr) bool
+	decodeBuiltin(rt uintptr, v interface{})
+	//decodeNaked: Numbers are decoded as int64, uint64, float64 only (no smaller sized number types).
+	decodeNaked() (v interface{}, vt valueType, decodeFurther bool)
+	decodeInt(bitsize uint8) (i int64)
+	decodeUint(bitsize uint8) (ui uint64)
+	decodeFloat(chkOverflow32 bool) (f float64)
+	decodeBool() (b bool)
+	// decodeString can also decode symbols
+	decodeString() (s string)
+	decodeBytes(bs []byte) (bsOut []byte, changed bool)
+	decodeExt(verifyTag bool, tag byte) (xtag byte, xbs []byte)
+	readMapLen() int
+	readArrayLen() int
+}
+
+type DecodeOptions struct {
+	// An instance of MapType is used during schema-less decoding of a map in the stream.
+	// If nil, we use map[interface{}]interface{}
+	MapType reflect.Type
+	// An instance of SliceType is used during schema-less decoding of an array in the stream.
+	// If nil, we use []interface{}
+	SliceType reflect.Type
+	// ErrorIfNoField controls whether an error is returned when decoding a map
+	// from a codec stream into a struct, and no matching struct field is found.
+	ErrorIfNoField bool
+}
+
+// ------------------------------------
+
+// ioDecReader is a decReader that reads off an io.Reader
+type ioDecReader struct {
+	r  io.Reader
+	br io.ByteReader
+	x  [8]byte //temp byte array re-used internally for efficiency
+}
+
+func (z *ioDecReader) readn(n int) (bs []byte) {
+	if n <= 0 {
+		return
+	}
+	bs = make([]byte, n)
+	if _, err := io.ReadAtLeast(z.r, bs, n); err != nil {
+		panic(err)
+	}
+	return
+}
+
+func (z *ioDecReader) readb(bs []byte) {
+	if _, err := io.ReadAtLeast(z.r, bs, len(bs)); err != nil {
+		panic(err)
+	}
+}
+
+func (z *ioDecReader) readn1() uint8 {
+	if z.br != nil {
+		b, err := z.br.ReadByte()
+		if err != nil {
+			panic(err)
+		}
+		return b
+	}
+	z.readb(z.x[:1])
+	return z.x[0]
+}
+
+func (z *ioDecReader) readUint16() uint16 {
+	z.readb(z.x[:2])
+	return bigen.Uint16(z.x[:2])
+}
+
+func (z *ioDecReader) readUint32() uint32 {
+	z.readb(z.x[:4])
+	return bigen.Uint32(z.x[:4])
+}
+
+func (z *ioDecReader) readUint64() uint64 {
+	z.readb(z.x[:8])
+	return bigen.Uint64(z.x[:8])
+}
+
+// ------------------------------------
+
+// bytesDecReader is a decReader that reads off a byte slice with zero copying
+type bytesDecReader struct {
+	b []byte // data
+	c int    // cursor
+	a int    // available
+}
+
+func (z *bytesDecReader) consume(n int) (oldcursor int) {
+	if z.a == 0 {
+		panic(io.EOF)
+	}
+	if n > z.a {
+		decErr("Trying to read %v bytes. Only %v available", n, z.a)
+	}
+	// z.checkAvailable(n)
+	oldcursor = z.c
+	z.c = oldcursor + n
+	z.a = z.a - n
+	return
+}
+
+func (z *bytesDecReader) readn(n int) (bs []byte) {
+	if n <= 0 {
+		return
+	}
+	c0 := z.consume(n)
+	bs = z.b[c0:z.c]
+	return
+}
+
+func (z *bytesDecReader) readb(bs []byte) {
+	copy(bs, z.readn(len(bs)))
+}
+
+func (z *bytesDecReader) readn1() uint8 {
+	c0 := z.consume(1)
+	return z.b[c0]
+}
+
+// Use binaryEncoding helper for 4 and 8 bits, but inline it for 2 bits
+// creating temp slice variable and copying it to helper function is expensive
+// for just 2 bits.
+
+func (z *bytesDecReader) readUint16() uint16 {
+	c0 := z.consume(2)
+	return uint16(z.b[c0+1]) | uint16(z.b[c0])<<8
+}
+
+func (z *bytesDecReader) readUint32() uint32 {
+	c0 := z.consume(4)
+	return bigen.Uint32(z.b[c0:z.c])
+}
+
+func (z *bytesDecReader) readUint64() uint64 {
+	c0 := z.consume(8)
+	return bigen.Uint64(z.b[c0:z.c])
+}
+
+// ------------------------------------
+
+// decFnInfo has methods for registering handling decoding of a specific type
+// based on some characteristics (builtin, extension, reflect Kind, etc)
+type decFnInfo struct {
+	ti    *typeInfo
+	d     *Decoder
+	dd    decDriver
+	xfFn  func(reflect.Value, []byte) error
+	xfTag byte
+	array bool
+}
+
+func (f *decFnInfo) builtin(rv reflect.Value) {
+	f.dd.decodeBuiltin(f.ti.rtid, rv.Addr().Interface())
+}
+
+func (f *decFnInfo) rawExt(rv reflect.Value) {
+	xtag, xbs := f.dd.decodeExt(false, 0)
+	rv.Field(0).SetUint(uint64(xtag))
+	rv.Field(1).SetBytes(xbs)
+}
+
+func (f *decFnInfo) ext(rv reflect.Value) {
+	_, xbs := f.dd.decodeExt(true, f.xfTag)
+	if fnerr := f.xfFn(rv, xbs); fnerr != nil {
+		panic(fnerr)
+	}
+}
+
+func (f *decFnInfo) binaryMarshal(rv reflect.Value) {
+	var bm binaryUnmarshaler
+	if f.ti.unmIndir == -1 {
+		bm = rv.Addr().Interface().(binaryUnmarshaler)
+	} else if f.ti.unmIndir == 0 {
+		bm = rv.Interface().(binaryUnmarshaler)
+	} else {
+		for j, k := int8(0), f.ti.unmIndir; j < k; j++ {
+			if rv.IsNil() {
+				rv.Set(reflect.New(rv.Type().Elem()))
+			}
+			rv = rv.Elem()
+		}
+		bm = rv.Interface().(binaryUnmarshaler)
+	}
+	xbs, _ := f.dd.decodeBytes(nil)
+	if fnerr := bm.UnmarshalBinary(xbs); fnerr != nil {
+		panic(fnerr)
+	}
+}
+
+func (f *decFnInfo) kErr(rv reflect.Value) {
+	decErr("Unhandled value for kind: %v: %s", rv.Kind(), msgBadDesc)
+}
+
+func (f *decFnInfo) kString(rv reflect.Value) {
+	rv.SetString(f.dd.decodeString())
+}
+
+func (f *decFnInfo) kBool(rv reflect.Value) {
+	rv.SetBool(f.dd.decodeBool())
+}
+
+func (f *decFnInfo) kInt(rv reflect.Value) {
+	rv.SetInt(f.dd.decodeInt(intBitsize))
+}
+
+func (f *decFnInfo) kInt64(rv reflect.Value) {
+	rv.SetInt(f.dd.decodeInt(64))
+}
+
+func (f *decFnInfo) kInt32(rv reflect.Value) {
+	rv.SetInt(f.dd.decodeInt(32))
+}
+
+func (f *decFnInfo) kInt8(rv reflect.Value) {
+	rv.SetInt(f.dd.decodeInt(8))
+}
+
+func (f *decFnInfo) kInt16(rv reflect.Value) {
+	rv.SetInt(f.dd.decodeInt(16))
+}
+
+func (f *decFnInfo) kFloat32(rv reflect.Value) {
+	rv.SetFloat(f.dd.decodeFloat(true))
+}
+
+func (f *decFnInfo) kFloat64(rv reflect.Value) {
+	rv.SetFloat(f.dd.decodeFloat(false))
+}
+
+func (f *decFnInfo) kUint8(rv reflect.Value) {
+	rv.SetUint(f.dd.decodeUint(8))
+}
+
+func (f *decFnInfo) kUint64(rv reflect.Value) {
+	rv.SetUint(f.dd.decodeUint(64))
+}
+
+func (f *decFnInfo) kUint(rv reflect.Value) {
+	rv.SetUint(f.dd.decodeUint(uintBitsize))
+}
+
+func (f *decFnInfo) kUint32(rv reflect.Value) {
+	rv.SetUint(f.dd.decodeUint(32))
+}
+
+func (f *decFnInfo) kUint16(rv reflect.Value) {
+	rv.SetUint(f.dd.decodeUint(16))
+}
+
+// func (f *decFnInfo) kPtr(rv reflect.Value) {
+// 	debugf(">>>>>>> ??? decode kPtr called - shouldn't get called")
+// 	if rv.IsNil() {
+// 		rv.Set(reflect.New(rv.Type().Elem()))
+// 	}
+// 	f.d.decodeValue(rv.Elem())
+// }
+
+func (f *decFnInfo) kInterface(rv reflect.Value) {
+	// debugf("\t===> kInterface")
+	if !rv.IsNil() {
+		f.d.decodeValue(rv.Elem())
+		return
+	}
+	// nil interface:
+	// use some hieristics to set the nil interface to an
+	// appropriate value based on the first byte read (byte descriptor bd)
+	v, vt, decodeFurther := f.dd.decodeNaked()
+	if vt == valueTypeNil {
+		return
+	}
+	// Cannot decode into nil interface with methods (e.g. error, io.Reader, etc)
+	// if non-nil value in stream.
+	if num := f.ti.rt.NumMethod(); num > 0 {
+		decErr("decodeValue: Cannot decode non-nil codec value into nil %v (%v methods)",
+			f.ti.rt, num)
+	}
+	var rvn reflect.Value
+	var useRvn bool
+	switch vt {
+	case valueTypeMap:
+		if f.d.h.MapType == nil {
+			var m2 map[interface{}]interface{}
+			v = &m2
+		} else {
+			rvn = reflect.New(f.d.h.MapType).Elem()
+			useRvn = true
+		}
+	case valueTypeArray:
+		if f.d.h.SliceType == nil {
+			var m2 []interface{}
+			v = &m2
+		} else {
+			rvn = reflect.New(f.d.h.SliceType).Elem()
+			useRvn = true
+		}
+	case valueTypeExt:
+		re := v.(*RawExt)
+		var bfn func(reflect.Value, []byte) error
+		rvn, bfn = f.d.h.getDecodeExtForTag(re.Tag)
+		if bfn == nil {
+			rvn = reflect.ValueOf(*re)
+		} else if fnerr := bfn(rvn, re.Data); fnerr != nil {
+			panic(fnerr)
+		}
+		rv.Set(rvn)
+		return
+	}
+	if decodeFurther {
+		if useRvn {
+			f.d.decodeValue(rvn)
+		} else if v != nil {
+			// this v is a pointer, so we need to dereference it when done
+			f.d.decode(v)
+			rvn = reflect.ValueOf(v).Elem()
+			useRvn = true
+		}
+	}
+	if useRvn {
+		rv.Set(rvn)
+	} else if v != nil {
+		rv.Set(reflect.ValueOf(v))
+	}
+}
+
+func (f *decFnInfo) kStruct(rv reflect.Value) {
+	fti := f.ti
+	if currEncodedType := f.dd.currentEncodedType(); currEncodedType == valueTypeMap {
+		containerLen := f.dd.readMapLen()
+		if containerLen == 0 {
+			return
+		}
+		tisfi := fti.sfi
+		for j := 0; j < containerLen; j++ {
+			// var rvkencname string
+			// ddecode(&rvkencname)
+			f.dd.initReadNext()
+			rvkencname := f.dd.decodeString()
+			// rvksi := ti.getForEncName(rvkencname)
+			if k := fti.indexForEncName(rvkencname); k > -1 {
+				sfik := tisfi[k]
+				if sfik.i != -1 {
+					f.d.decodeValue(rv.Field(int(sfik.i)))
+				} else {
+					f.d.decEmbeddedField(rv, sfik.is)
+				}
+				// f.d.decodeValue(ti.field(k, rv))
+			} else {
+				if f.d.h.ErrorIfNoField {
+					decErr("No matching struct field found when decoding stream map with key: %v",
+						rvkencname)
+				} else {
+					var nilintf0 interface{}
+					f.d.decodeValue(reflect.ValueOf(&nilintf0).Elem())
+				}
+			}
+		}
+	} else if currEncodedType == valueTypeArray {
+		containerLen := f.dd.readArrayLen()
+		if containerLen == 0 {
+			return
+		}
+		for j, si := range fti.sfip {
+			if j == containerLen {
+				break
+			}
+			if si.i != -1 {
+				f.d.decodeValue(rv.Field(int(si.i)))
+			} else {
+				f.d.decEmbeddedField(rv, si.is)
+			}
+		}
+		if containerLen > len(fti.sfip) {
+			// read remaining values and throw away
+			for j := len(fti.sfip); j < containerLen; j++ {
+				var nilintf0 interface{}
+				f.d.decodeValue(reflect.ValueOf(&nilintf0).Elem())
+			}
+		}
+	} else {
+		decErr("Only encoded map or array can be decoded into a struct. (valueType: %x)",
+			currEncodedType)
+	}
+}
+
+func (f *decFnInfo) kSlice(rv reflect.Value) {
+	// A slice can be set from a map or array in stream.
+	currEncodedType := f.dd.currentEncodedType()
+
+	switch currEncodedType {
+	case valueTypeBytes, valueTypeString:
+		if f.ti.rtid == uint8SliceTypId || f.ti.rt.Elem().Kind() == reflect.Uint8 {
+			if bs2, changed2 := f.dd.decodeBytes(rv.Bytes()); changed2 {
+				rv.SetBytes(bs2)
+			}
+			return
+		}
+	}
+
+	if shortCircuitReflectToFastPath && rv.CanAddr() {
+		switch f.ti.rtid {
+		case intfSliceTypId:
+			f.d.decSliceIntf(rv.Addr().Interface().(*[]interface{}), currEncodedType, f.array)
+			return
+		case uint64SliceTypId:
+			f.d.decSliceUint64(rv.Addr().Interface().(*[]uint64), currEncodedType, f.array)
+			return
+		case int64SliceTypId:
+			f.d.decSliceInt64(rv.Addr().Interface().(*[]int64), currEncodedType, f.array)
+			return
+		case strSliceTypId:
+			f.d.decSliceStr(rv.Addr().Interface().(*[]string), currEncodedType, f.array)
+			return
+		}
+	}
+
+	containerLen, containerLenS := decContLens(f.dd, currEncodedType)
+
+	// an array can never return a nil slice. so no need to check f.array here.
+
+	if rv.IsNil() {
+		rv.Set(reflect.MakeSlice(f.ti.rt, containerLenS, containerLenS))
+	}
+
+	if containerLen == 0 {
+		return
+	}
+
+	if rvcap, rvlen := rv.Len(), rv.Cap(); containerLenS > rvcap {
+		if f.array { // !rv.CanSet()
+			decErr(msgDecCannotExpandArr, rvcap, containerLenS)
+		}
+		rvn := reflect.MakeSlice(f.ti.rt, containerLenS, containerLenS)
+		if rvlen > 0 {
+			reflect.Copy(rvn, rv)
+		}
+		rv.Set(rvn)
+	} else if containerLenS > rvlen {
+		rv.SetLen(containerLenS)
+	}
+
+	for j := 0; j < containerLenS; j++ {
+		f.d.decodeValue(rv.Index(j))
+	}
+}
+
+func (f *decFnInfo) kArray(rv reflect.Value) {
+	// f.d.decodeValue(rv.Slice(0, rv.Len()))
+	f.kSlice(rv.Slice(0, rv.Len()))
+}
+
+func (f *decFnInfo) kMap(rv reflect.Value) {
+	if shortCircuitReflectToFastPath && rv.CanAddr() {
+		switch f.ti.rtid {
+		case mapStrIntfTypId:
+			f.d.decMapStrIntf(rv.Addr().Interface().(*map[string]interface{}))
+			return
+		case mapIntfIntfTypId:
+			f.d.decMapIntfIntf(rv.Addr().Interface().(*map[interface{}]interface{}))
+			return
+		case mapInt64IntfTypId:
+			f.d.decMapInt64Intf(rv.Addr().Interface().(*map[int64]interface{}))
+			return
+		case mapUint64IntfTypId:
+			f.d.decMapUint64Intf(rv.Addr().Interface().(*map[uint64]interface{}))
+			return
+		}
+	}
+
+	containerLen := f.dd.readMapLen()
+
+	if rv.IsNil() {
+		rv.Set(reflect.MakeMap(f.ti.rt))
+	}
+
+	if containerLen == 0 {
+		return
+	}
+
+	ktype, vtype := f.ti.rt.Key(), f.ti.rt.Elem()
+	ktypeId := reflect.ValueOf(ktype).Pointer()
+	for j := 0; j < containerLen; j++ {
+		rvk := reflect.New(ktype).Elem()
+		f.d.decodeValue(rvk)
+
+		// special case if a byte array.
+		// if ktype == intfTyp {
+		if ktypeId == intfTypId {
+			rvk = rvk.Elem()
+			if rvk.Type() == uint8SliceTyp {
+				rvk = reflect.ValueOf(string(rvk.Bytes()))
+			}
+		}
+		rvv := rv.MapIndex(rvk)
+		if !rvv.IsValid() {
+			rvv = reflect.New(vtype).Elem()
+		}
+
+		f.d.decodeValue(rvv)
+		rv.SetMapIndex(rvk, rvv)
+	}
+}
+
+// ----------------------------------------
+
+type decFn struct {
+	i *decFnInfo
+	f func(*decFnInfo, reflect.Value)
+}
+
+// A Decoder reads and decodes an object from an input stream in the codec format.
+type Decoder struct {
+	r decReader
+	d decDriver
+	h *BasicHandle
+	f map[uintptr]decFn
+	x []uintptr
+	s []decFn
+}
+
+// NewDecoder returns a Decoder for decoding a stream of bytes from an io.Reader.
+//
+// For efficiency, Users are encouraged to pass in a memory buffered writer
+// (eg bufio.Reader, bytes.Buffer).
+func NewDecoder(r io.Reader, h Handle) *Decoder {
+	z := ioDecReader{
+		r: r,
+	}
+	z.br, _ = r.(io.ByteReader)
+	return &Decoder{r: &z, d: h.newDecDriver(&z), h: h.getBasicHandle()}
+}
+
+// NewDecoderBytes returns a Decoder which efficiently decodes directly
+// from a byte slice with zero copying.
+func NewDecoderBytes(in []byte, h Handle) *Decoder {
+	z := bytesDecReader{
+		b: in,
+		a: len(in),
+	}
+	return &Decoder{r: &z, d: h.newDecDriver(&z), h: h.getBasicHandle()}
+}
+
+// Decode decodes the stream from reader and stores the result in the
+// value pointed to by v. v cannot be a nil pointer. v can also be
+// a reflect.Value of a pointer.
+//
+// Note that a pointer to a nil interface is not a nil pointer.
+// If you do not know what type of stream it is, pass in a pointer to a nil interface.
+// We will decode and store a value in that nil interface.
+//
+// Sample usages:
+//   // Decoding into a non-nil typed value
+//   var f float32
+//   err = codec.NewDecoder(r, handle).Decode(&f)
+//
+//   // Decoding into nil interface
+//   var v interface{}
+//   dec := codec.NewDecoder(r, handle)
+//   err = dec.Decode(&v)
+//
+// When decoding into a nil interface{}, we will decode into an appropriate value based
+// on the contents of the stream:
+//   - Numbers are decoded as float64, int64 or uint64.
+//   - Other values are decoded appropriately depending on the type:
+//     bool, string, []byte, time.Time, etc
+//   - Extensions are decoded as RawExt (if no ext function registered for the tag)
+// Configurations exist on the Handle to override defaults
+// (e.g. for MapType, SliceType and how to decode raw bytes).
+//
+// When decoding into a non-nil interface{} value, the mode of encoding is based on the
+// type of the value. When a value is seen:
+//   - If an extension is registered for it, call that extension function
+//   - If it implements BinaryUnmarshaler, call its UnmarshalBinary(data []byte) error
+//   - Else decode it based on its reflect.Kind
+//
+// There are some special rules when decoding into containers (slice/array/map/struct).
+// Decode will typically use the stream contents to UPDATE the container.
+//   - A map can be decoded from a stream map, by updating matching keys.
+//   - A slice can be decoded from a stream array,
+//     by updating the first n elements, where n is length of the stream.
+//   - A slice can be decoded from a stream map, by decoding as if
+//     it contains a sequence of key-value pairs.
+//   - A struct can be decoded from a stream map, by updating matching fields.
+//   - A struct can be decoded from a stream array,
+//     by updating fields as they occur in the struct (by index).
+//
+// When decoding a stream map or array with length of 0 into a nil map or slice,
+// we reset the destination map or slice to a zero-length value.
+//
+// However, when decoding a stream nil, we reset the destination container
+// to its "zero" value (e.g. nil for slice/map, etc).
+//
+func (d *Decoder) Decode(v interface{}) (err error) {
+	defer panicToErr(&err)
+	d.decode(v)
+	return
+}
+
+func (d *Decoder) decode(iv interface{}) {
+	d.d.initReadNext()
+
+	switch v := iv.(type) {
+	case nil:
+		decErr("Cannot decode into nil.")
+
+	case reflect.Value:
+		d.chkPtrValue(v)
+		d.decodeValue(v.Elem())
+
+	case *string:
+		*v = d.d.decodeString()
+	case *bool:
+		*v = d.d.decodeBool()
+	case *int:
+		*v = int(d.d.decodeInt(intBitsize))
+	case *int8:
+		*v = int8(d.d.decodeInt(8))
+	case *int16:
+		*v = int16(d.d.decodeInt(16))
+	case *int32:
+		*v = int32(d.d.decodeInt(32))
+	case *int64:
+		*v = d.d.decodeInt(64)
+	case *uint:
+		*v = uint(d.d.decodeUint(uintBitsize))
+	case *uint8:
+		*v = uint8(d.d.decodeUint(8))
+	case *uint16:
+		*v = uint16(d.d.decodeUint(16))
+	case *uint32:
+		*v = uint32(d.d.decodeUint(32))
+	case *uint64:
+		*v = d.d.decodeUint(64)
+	case *float32:
+		*v = float32(d.d.decodeFloat(true))
+	case *float64:
+		*v = d.d.decodeFloat(false)
+	case *[]byte:
+		*v, _ = d.d.decodeBytes(*v)
+
+	case *[]interface{}:
+		d.decSliceIntf(v, valueTypeInvalid, false)
+	case *[]uint64:
+		d.decSliceUint64(v, valueTypeInvalid, false)
+	case *[]int64:
+		d.decSliceInt64(v, valueTypeInvalid, false)
+	case *[]string:
+		d.decSliceStr(v, valueTypeInvalid, false)
+	case *map[string]interface{}:
+		d.decMapStrIntf(v)
+	case *map[interface{}]interface{}:
+		d.decMapIntfIntf(v)
+	case *map[uint64]interface{}:
+		d.decMapUint64Intf(v)
+	case *map[int64]interface{}:
+		d.decMapInt64Intf(v)
+
+	case *interface{}:
+		d.decodeValue(reflect.ValueOf(iv).Elem())
+
+	default:
+		rv := reflect.ValueOf(iv)
+		d.chkPtrValue(rv)
+		d.decodeValue(rv.Elem())
+	}
+}
+
+func (d *Decoder) decodeValue(rv reflect.Value) {
+	d.d.initReadNext()
+
+	if d.d.tryDecodeAsNil() {
+		// If value in stream is nil, set the dereferenced value to its "zero" value (if settable)
+		if rv.Kind() == reflect.Ptr {
+			if !rv.IsNil() {
+				rv.Set(reflect.Zero(rv.Type()))
+			}
+			return
+		}
+		// for rv.Kind() == reflect.Ptr {
+		// 	rv = rv.Elem()
+		// }
+		if rv.IsValid() { // rv.CanSet() // always settable, except it's invalid
+			rv.Set(reflect.Zero(rv.Type()))
+		}
+		return
+	}
+
+	// If stream is not containing a nil value, then we can deref to the base
+	// non-pointer value, and decode into that.
+	for rv.Kind() == reflect.Ptr {
+		if rv.IsNil() {
+			rv.Set(reflect.New(rv.Type().Elem()))
+		}
+		rv = rv.Elem()
+	}
+
+	rt := rv.Type()
+	rtid := reflect.ValueOf(rt).Pointer()
+
+	// retrieve or register a focus'ed function for this type
+	// to eliminate need to do the retrieval multiple times
+
+	// if d.f == nil && d.s == nil { debugf("---->Creating new dec f map for type: %v\n", rt) }
+	var fn decFn
+	var ok bool
+	if useMapForCodecCache {
+		fn, ok = d.f[rtid]
+	} else {
+		for i, v := range d.x {
+			if v == rtid {
+				fn, ok = d.s[i], true
+				break
+			}
+		}
+	}
+	if !ok {
+		// debugf("\tCreating new dec fn for type: %v\n", rt)
+		fi := decFnInfo{ti: getTypeInfo(rtid, rt), d: d, dd: d.d}
+		fn.i = &fi
+		// An extension can be registered for any type, regardless of the Kind
+		// (e.g. type BitSet int64, type MyStruct { / * unexported fields * / }, type X []int, etc.
+		//
+		// We can't check if it's an extension byte here first, because the user may have
+		// registered a pointer or non-pointer type, meaning we may have to recurse first
+		// before matching a mapped type, even though the extension byte is already detected.
+		//
+		// NOTE: if decoding into a nil interface{}, we return a non-nil
+		// value except even if the container registers a length of 0.
+		if rtid == rawExtTypId {
+			fn.f = (*decFnInfo).rawExt
+		} else if d.d.isBuiltinType(rtid) {
+			fn.f = (*decFnInfo).builtin
+		} else if xfTag, xfFn := d.h.getDecodeExt(rtid); xfFn != nil {
+			fi.xfTag, fi.xfFn = xfTag, xfFn
+			fn.f = (*decFnInfo).ext
+		} else if supportBinaryMarshal && fi.ti.unm {
+			fn.f = (*decFnInfo).binaryMarshal
+		} else {
+			switch rk := rt.Kind(); rk {
+			case reflect.String:
+				fn.f = (*decFnInfo).kString
+			case reflect.Bool:
+				fn.f = (*decFnInfo).kBool
+			case reflect.Int:
+				fn.f = (*decFnInfo).kInt
+			case reflect.Int64:
+				fn.f = (*decFnInfo).kInt64
+			case reflect.Int32:
+				fn.f = (*decFnInfo).kInt32
+			case reflect.Int8:
+				fn.f = (*decFnInfo).kInt8
+			case reflect.Int16:
+				fn.f = (*decFnInfo).kInt16
+			case reflect.Float32:
+				fn.f = (*decFnInfo).kFloat32
+			case reflect.Float64:
+				fn.f = (*decFnInfo).kFloat64
+			case reflect.Uint8:
+				fn.f = (*decFnInfo).kUint8
+			case reflect.Uint64:
+				fn.f = (*decFnInfo).kUint64
+			case reflect.Uint:
+				fn.f = (*decFnInfo).kUint
+			case reflect.Uint32:
+				fn.f = (*decFnInfo).kUint32
+			case reflect.Uint16:
+				fn.f = (*decFnInfo).kUint16
+			// case reflect.Ptr:
+			// 	fn.f = (*decFnInfo).kPtr
+			case reflect.Interface:
+				fn.f = (*decFnInfo).kInterface
+			case reflect.Struct:
+				fn.f = (*decFnInfo).kStruct
+			case reflect.Slice:
+				fn.f = (*decFnInfo).kSlice
+			case reflect.Array:
+				fi.array = true
+				fn.f = (*decFnInfo).kArray
+			case reflect.Map:
+				fn.f = (*decFnInfo).kMap
+			default:
+				fn.f = (*decFnInfo).kErr
+			}
+		}
+		if useMapForCodecCache {
+			if d.f == nil {
+				d.f = make(map[uintptr]decFn, 16)
+			}
+			d.f[rtid] = fn
+		} else {
+			d.s = append(d.s, fn)
+			d.x = append(d.x, rtid)
+		}
+	}
+
+	fn.f(fn.i, rv)
+
+	return
+}
+
+func (d *Decoder) chkPtrValue(rv reflect.Value) {
+	// We can only decode into a non-nil pointer
+	if rv.Kind() == reflect.Ptr && !rv.IsNil() {
+		return
+	}
+	if !rv.IsValid() {
+		decErr("Cannot decode into a zero (ie invalid) reflect.Value")
+	}
+	if !rv.CanInterface() {
+		decErr("Cannot decode into a value without an interface: %v", rv)
+	}
+	rvi := rv.Interface()
+	decErr("Cannot decode into non-pointer or nil pointer. Got: %v, %T, %v",
+		rv.Kind(), rvi, rvi)
+}
+
+func (d *Decoder) decEmbeddedField(rv reflect.Value, index []int) {
+	// d.decodeValue(rv.FieldByIndex(index))
+	// nil pointers may be here; so reproduce FieldByIndex logic + enhancements
+	for _, j := range index {
+		if rv.Kind() == reflect.Ptr {
+			if rv.IsNil() {
+				rv.Set(reflect.New(rv.Type().Elem()))
+			}
+			// If a pointer, it must be a pointer to struct (based on typeInfo contract)
+			rv = rv.Elem()
+		}
+		rv = rv.Field(j)
+	}
+	d.decodeValue(rv)
+}
+
+// --------------------------------------------------
+
+// short circuit functions for common maps and slices
+
+func (d *Decoder) decSliceIntf(v *[]interface{}, currEncodedType valueType, doNotReset bool) {
+	_, containerLenS := decContLens(d.d, currEncodedType)
+	s := *v
+	if s == nil {
+		s = make([]interface{}, containerLenS, containerLenS)
+	} else if containerLenS > cap(s) {
+		if doNotReset {
+			decErr(msgDecCannotExpandArr, cap(s), containerLenS)
+		}
+		s = make([]interface{}, containerLenS, containerLenS)
+		copy(s, *v)
+	} else if containerLenS > len(s) {
+		s = s[:containerLenS]
+	}
+	for j := 0; j < containerLenS; j++ {
+		d.decode(&s[j])
+	}
+	*v = s
+}
+
+func (d *Decoder) decSliceInt64(v *[]int64, currEncodedType valueType, doNotReset bool) {
+	_, containerLenS := decContLens(d.d, currEncodedType)
+	s := *v
+	if s == nil {
+		s = make([]int64, containerLenS, containerLenS)
+	} else if containerLenS > cap(s) {
+		if doNotReset {
+			decErr(msgDecCannotExpandArr, cap(s), containerLenS)
+		}
+		s = make([]int64, containerLenS, containerLenS)
+		copy(s, *v)
+	} else if containerLenS > len(s) {
+		s = s[:containerLenS]
+	}
+	for j := 0; j < containerLenS; j++ {
+		// d.decode(&s[j])
+		d.d.initReadNext()
+		s[j] = d.d.decodeInt(intBitsize)
+	}
+	*v = s
+}
+
+func (d *Decoder) decSliceUint64(v *[]uint64, currEncodedType valueType, doNotReset bool) {
+	_, containerLenS := decContLens(d.d, currEncodedType)
+	s := *v
+	if s == nil {
+		s = make([]uint64, containerLenS, containerLenS)
+	} else if containerLenS > cap(s) {
+		if doNotReset {
+			decErr(msgDecCannotExpandArr, cap(s), containerLenS)
+		}
+		s = make([]uint64, containerLenS, containerLenS)
+		copy(s, *v)
+	} else if containerLenS > len(s) {
+		s = s[:containerLenS]
+	}
+	for j := 0; j < containerLenS; j++ {
+		// d.decode(&s[j])
+		d.d.initReadNext()
+		s[j] = d.d.decodeUint(intBitsize)
+	}
+	*v = s
+}
+
+func (d *Decoder) decSliceStr(v *[]string, currEncodedType valueType, doNotReset bool) {
+	_, containerLenS := decContLens(d.d, currEncodedType)
+	s := *v
+	if s == nil {
+		s = make([]string, containerLenS, containerLenS)
+	} else if containerLenS > cap(s) {
+		if doNotReset {
+			decErr(msgDecCannotExpandArr, cap(s), containerLenS)
+		}
+		s = make([]string, containerLenS, containerLenS)
+		copy(s, *v)
+	} else if containerLenS > len(s) {
+		s = s[:containerLenS]
+	}
+	for j := 0; j < containerLenS; j++ {
+		// d.decode(&s[j])
+		d.d.initReadNext()
+		s[j] = d.d.decodeString()
+	}
+	*v = s
+}
+
+func (d *Decoder) decMapIntfIntf(v *map[interface{}]interface{}) {
+	containerLen := d.d.readMapLen()
+	m := *v
+	if m == nil {
+		m = make(map[interface{}]interface{}, containerLen)
+		*v = m
+	}
+	for j := 0; j < containerLen; j++ {
+		var mk interface{}
+		d.decode(&mk)
+		// special case if a byte array.
+		if bv, bok := mk.([]byte); bok {
+			mk = string(bv)
+		}
+		mv := m[mk]
+		d.decode(&mv)
+		m[mk] = mv
+	}
+}
+
+func (d *Decoder) decMapInt64Intf(v *map[int64]interface{}) {
+	containerLen := d.d.readMapLen()
+	m := *v
+	if m == nil {
+		m = make(map[int64]interface{}, containerLen)
+		*v = m
+	}
+	for j := 0; j < containerLen; j++ {
+		d.d.initReadNext()
+		mk := d.d.decodeInt(intBitsize)
+		mv := m[mk]
+		d.decode(&mv)
+		m[mk] = mv
+	}
+}
+
+func (d *Decoder) decMapUint64Intf(v *map[uint64]interface{}) {
+	containerLen := d.d.readMapLen()
+	m := *v
+	if m == nil {
+		m = make(map[uint64]interface{}, containerLen)
+		*v = m
+	}
+	for j := 0; j < containerLen; j++ {
+		d.d.initReadNext()
+		mk := d.d.decodeUint(intBitsize)
+		mv := m[mk]
+		d.decode(&mv)
+		m[mk] = mv
+	}
+}
+
+func (d *Decoder) decMapStrIntf(v *map[string]interface{}) {
+	containerLen := d.d.readMapLen()
+	m := *v
+	if m == nil {
+		m = make(map[string]interface{}, containerLen)
+		*v = m
+	}
+	for j := 0; j < containerLen; j++ {
+		d.d.initReadNext()
+		mk := d.d.decodeString()
+		mv := m[mk]
+		d.decode(&mv)
+		m[mk] = mv
+	}
+}
+
+// ----------------------------------------
+
+func decContLens(dd decDriver, currEncodedType valueType) (containerLen, containerLenS int) {
+	if currEncodedType == valueTypeInvalid {
+		currEncodedType = dd.currentEncodedType()
+	}
+	switch currEncodedType {
+	case valueTypeArray:
+		containerLen = dd.readArrayLen()
+		containerLenS = containerLen
+	case valueTypeMap:
+		containerLen = dd.readMapLen()
+		containerLenS = containerLen * 2
+	default:
+		decErr("Only encoded map or array can be decoded into a slice. (valueType: %0x)",
+			currEncodedType)
+	}
+	return
+}
+
+func decErr(format string, params ...interface{}) {
+	doPanic(msgTagDec, format, params...)
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/encode.go b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/encode.go
new file mode 100644
index 00000000000..4914be0c748
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/encode.go
@@ -0,0 +1,1001 @@
+// Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
+// Use of this source code is governed by a BSD-style license found in the LICENSE file.
+
+package codec
+
+import (
+	"io"
+	"reflect"
+)
+
+const (
+	// Some tagging information for error messages.
+	msgTagEnc         = "codec.encoder"
+	defEncByteBufSize = 1 << 6 // 4:16, 6:64, 8:256, 10:1024
+	// maxTimeSecs32 = math.MaxInt32 / 60 / 24 / 366
+)
+
+// AsSymbolFlag defines what should be encoded as symbols.
+type AsSymbolFlag uint8
+
+const (
+	// AsSymbolDefault is default.
+	// Currently, this means only encode struct field names as symbols.
+	// The default is subject to change.
+	AsSymbolDefault AsSymbolFlag = iota
+
+	// AsSymbolAll means encode anything which could be a symbol as a symbol.
+	AsSymbolAll = 0xfe
+
+	// AsSymbolNone means do not encode anything as a symbol.
+	AsSymbolNone = 1 << iota
+
+	// AsSymbolMapStringKeys means encode keys in map[string]XXX as symbols.
+	AsSymbolMapStringKeysFlag
+
+	// AsSymbolStructFieldName means encode struct field names as symbols.
+	AsSymbolStructFieldNameFlag
+)
+
+// encWriter abstracting writing to a byte array or to an io.Writer.
+type encWriter interface {
+	writeUint16(uint16)
+	writeUint32(uint32)
+	writeUint64(uint64)
+	writeb([]byte)
+	writestr(string)
+	writen1(byte)
+	writen2(byte, byte)
+	atEndOfEncode()
+}
+
+// encDriver abstracts the actual codec (binc vs msgpack, etc)
+type encDriver interface {
+	isBuiltinType(rt uintptr) bool
+	encodeBuiltin(rt uintptr, v interface{})
+	encodeNil()
+	encodeInt(i int64)
+	encodeUint(i uint64)
+	encodeBool(b bool)
+	encodeFloat32(f float32)
+	encodeFloat64(f float64)
+	encodeExtPreamble(xtag byte, length int)
+	encodeArrayPreamble(length int)
+	encodeMapPreamble(length int)
+	encodeString(c charEncoding, v string)
+	encodeSymbol(v string)
+	encodeStringBytes(c charEncoding, v []byte)
+	//TODO
+	//encBignum(f *big.Int)
+	//encStringRunes(c charEncoding, v []rune)
+}
+
+type ioEncWriterWriter interface {
+	WriteByte(c byte) error
+	WriteString(s string) (n int, err error)
+	Write(p []byte) (n int, err error)
+}
+
+type ioEncStringWriter interface {
+	WriteString(s string) (n int, err error)
+}
+
+type EncodeOptions struct {
+	// Encode a struct as an array, and not as a map.
+	StructToArray bool
+
+	// AsSymbols defines what should be encoded as symbols.
+	//
+	// Encoding as symbols can reduce the encoded size significantly.
+	//
+	// However, during decoding, each string to be encoded as a symbol must
+	// be checked to see if it has been seen before. Consequently, encoding time
+	// will increase if using symbols, because string comparisons has a clear cost.
+	//
+	// Sample values:
+	//   AsSymbolNone
+	//   AsSymbolAll
+	//   AsSymbolMapStringKeys
+	//   AsSymbolMapStringKeysFlag | AsSymbolStructFieldNameFlag
+	AsSymbols AsSymbolFlag
+}
+
+// ---------------------------------------------
+
+type simpleIoEncWriterWriter struct {
+	w  io.Writer
+	bw io.ByteWriter
+	sw ioEncStringWriter
+}
+
+func (o *simpleIoEncWriterWriter) WriteByte(c byte) (err error) {
+	if o.bw != nil {
+		return o.bw.WriteByte(c)
+	}
+	_, err = o.w.Write([]byte{c})
+	return
+}
+
+func (o *simpleIoEncWriterWriter) WriteString(s string) (n int, err error) {
+	if o.sw != nil {
+		return o.sw.WriteString(s)
+	}
+	return o.w.Write([]byte(s))
+}
+
+func (o *simpleIoEncWriterWriter) Write(p []byte) (n int, err error) {
+	return o.w.Write(p)
+}
+
+// ----------------------------------------
+
+// ioEncWriter implements encWriter and can write to an io.Writer implementation
+type ioEncWriter struct {
+	w ioEncWriterWriter
+	x [8]byte // temp byte array re-used internally for efficiency
+}
+
+func (z *ioEncWriter) writeUint16(v uint16) {
+	bigen.PutUint16(z.x[:2], v)
+	z.writeb(z.x[:2])
+}
+
+func (z *ioEncWriter) writeUint32(v uint32) {
+	bigen.PutUint32(z.x[:4], v)
+	z.writeb(z.x[:4])
+}
+
+func (z *ioEncWriter) writeUint64(v uint64) {
+	bigen.PutUint64(z.x[:8], v)
+	z.writeb(z.x[:8])
+}
+
+func (z *ioEncWriter) writeb(bs []byte) {
+	if len(bs) == 0 {
+		return
+	}
+	n, err := z.w.Write(bs)
+	if err != nil {
+		panic(err)
+	}
+	if n != len(bs) {
+		encErr("write: Incorrect num bytes written. Expecting: %v, Wrote: %v", len(bs), n)
+	}
+}
+
+func (z *ioEncWriter) writestr(s string) {
+	n, err := z.w.WriteString(s)
+	if err != nil {
+		panic(err)
+	}
+	if n != len(s) {
+		encErr("write: Incorrect num bytes written. Expecting: %v, Wrote: %v", len(s), n)
+	}
+}
+
+func (z *ioEncWriter) writen1(b byte) {
+	if err := z.w.WriteByte(b); err != nil {
+		panic(err)
+	}
+}
+
+func (z *ioEncWriter) writen2(b1 byte, b2 byte) {
+	z.writen1(b1)
+	z.writen1(b2)
+}
+
+func (z *ioEncWriter) atEndOfEncode() {}
+
+// ----------------------------------------
+
+// bytesEncWriter implements encWriter and can write to an byte slice.
+// It is used by Marshal function.
+type bytesEncWriter struct {
+	b   []byte
+	c   int     // cursor
+	out *[]byte // write out on atEndOfEncode
+}
+
+func (z *bytesEncWriter) writeUint16(v uint16) {
+	c := z.grow(2)
+	z.b[c] = byte(v >> 8)
+	z.b[c+1] = byte(v)
+}
+
+func (z *bytesEncWriter) writeUint32(v uint32) {
+	c := z.grow(4)
+	z.b[c] = byte(v >> 24)
+	z.b[c+1] = byte(v >> 16)
+	z.b[c+2] = byte(v >> 8)
+	z.b[c+3] = byte(v)
+}
+
+func (z *bytesEncWriter) writeUint64(v uint64) {
+	c := z.grow(8)
+	z.b[c] = byte(v >> 56)
+	z.b[c+1] = byte(v >> 48)
+	z.b[c+2] = byte(v >> 40)
+	z.b[c+3] = byte(v >> 32)
+	z.b[c+4] = byte(v >> 24)
+	z.b[c+5] = byte(v >> 16)
+	z.b[c+6] = byte(v >> 8)
+	z.b[c+7] = byte(v)
+}
+
+func (z *bytesEncWriter) writeb(s []byte) {
+	if len(s) == 0 {
+		return
+	}
+	c := z.grow(len(s))
+	copy(z.b[c:], s)
+}
+
+func (z *bytesEncWriter) writestr(s string) {
+	c := z.grow(len(s))
+	copy(z.b[c:], s)
+}
+
+func (z *bytesEncWriter) writen1(b1 byte) {
+	c := z.grow(1)
+	z.b[c] = b1
+}
+
+func (z *bytesEncWriter) writen2(b1 byte, b2 byte) {
+	c := z.grow(2)
+	z.b[c] = b1
+	z.b[c+1] = b2
+}
+
+func (z *bytesEncWriter) atEndOfEncode() {
+	*(z.out) = z.b[:z.c]
+}
+
+func (z *bytesEncWriter) grow(n int) (oldcursor int) {
+	oldcursor = z.c
+	z.c = oldcursor + n
+	if z.c > cap(z.b) {
+		// Tried using appendslice logic: (if cap < 1024, *2, else *1.25).
+		// However, it was too expensive, causing too many iterations of copy.
+		// Using bytes.Buffer model was much better (2*cap + n)
+		bs := make([]byte, 2*cap(z.b)+n)
+		copy(bs, z.b[:oldcursor])
+		z.b = bs
+	} else if z.c > len(z.b) {
+		z.b = z.b[:cap(z.b)]
+	}
+	return
+}
+
+// ---------------------------------------------
+
+type encFnInfo struct {
+	ti    *typeInfo
+	e     *Encoder
+	ee    encDriver
+	xfFn  func(reflect.Value) ([]byte, error)
+	xfTag byte
+}
+
+func (f *encFnInfo) builtin(rv reflect.Value) {
+	f.ee.encodeBuiltin(f.ti.rtid, rv.Interface())
+}
+
+func (f *encFnInfo) rawExt(rv reflect.Value) {
+	f.e.encRawExt(rv.Interface().(RawExt))
+}
+
+func (f *encFnInfo) ext(rv reflect.Value) {
+	bs, fnerr := f.xfFn(rv)
+	if fnerr != nil {
+		panic(fnerr)
+	}
+	if bs == nil {
+		f.ee.encodeNil()
+		return
+	}
+	if f.e.hh.writeExt() {
+		f.ee.encodeExtPreamble(f.xfTag, len(bs))
+		f.e.w.writeb(bs)
+	} else {
+		f.ee.encodeStringBytes(c_RAW, bs)
+	}
+
+}
+
+func (f *encFnInfo) binaryMarshal(rv reflect.Value) {
+	var bm binaryMarshaler
+	if f.ti.mIndir == 0 {
+		bm = rv.Interface().(binaryMarshaler)
+	} else if f.ti.mIndir == -1 {
+		bm = rv.Addr().Interface().(binaryMarshaler)
+	} else {
+		for j, k := int8(0), f.ti.mIndir; j < k; j++ {
+			if rv.IsNil() {
+				f.ee.encodeNil()
+				return
+			}
+			rv = rv.Elem()
+		}
+		bm = rv.Interface().(binaryMarshaler)
+	}
+	// debugf(">>>> binaryMarshaler: %T", rv.Interface())
+	bs, fnerr := bm.MarshalBinary()
+	if fnerr != nil {
+		panic(fnerr)
+	}
+	if bs == nil {
+		f.ee.encodeNil()
+	} else {
+		f.ee.encodeStringBytes(c_RAW, bs)
+	}
+}
+
+func (f *encFnInfo) kBool(rv reflect.Value) {
+	f.ee.encodeBool(rv.Bool())
+}
+
+func (f *encFnInfo) kString(rv reflect.Value) {
+	f.ee.encodeString(c_UTF8, rv.String())
+}
+
+func (f *encFnInfo) kFloat64(rv reflect.Value) {
+	f.ee.encodeFloat64(rv.Float())
+}
+
+func (f *encFnInfo) kFloat32(rv reflect.Value) {
+	f.ee.encodeFloat32(float32(rv.Float()))
+}
+
+func (f *encFnInfo) kInt(rv reflect.Value) {
+	f.ee.encodeInt(rv.Int())
+}
+
+func (f *encFnInfo) kUint(rv reflect.Value) {
+	f.ee.encodeUint(rv.Uint())
+}
+
+func (f *encFnInfo) kInvalid(rv reflect.Value) {
+	f.ee.encodeNil()
+}
+
+func (f *encFnInfo) kErr(rv reflect.Value) {
+	encErr("Unsupported kind: %s, for: %#v", rv.Kind(), rv)
+}
+
+func (f *encFnInfo) kSlice(rv reflect.Value) {
+	if rv.IsNil() {
+		f.ee.encodeNil()
+		return
+	}
+
+	if shortCircuitReflectToFastPath {
+		switch f.ti.rtid {
+		case intfSliceTypId:
+			f.e.encSliceIntf(rv.Interface().([]interface{}))
+			return
+		case strSliceTypId:
+			f.e.encSliceStr(rv.Interface().([]string))
+			return
+		case uint64SliceTypId:
+			f.e.encSliceUint64(rv.Interface().([]uint64))
+			return
+		case int64SliceTypId:
+			f.e.encSliceInt64(rv.Interface().([]int64))
+			return
+		}
+	}
+
+	// If in this method, then there was no extension function defined.
+	// So it's okay to treat as []byte.
+	if f.ti.rtid == uint8SliceTypId || f.ti.rt.Elem().Kind() == reflect.Uint8 {
+		f.ee.encodeStringBytes(c_RAW, rv.Bytes())
+		return
+	}
+
+	l := rv.Len()
+	if f.ti.mbs {
+		if l%2 == 1 {
+			encErr("mapBySlice: invalid length (must be divisible by 2): %v", l)
+		}
+		f.ee.encodeMapPreamble(l / 2)
+	} else {
+		f.ee.encodeArrayPreamble(l)
+	}
+	if l == 0 {
+		return
+	}
+	for j := 0; j < l; j++ {
+		// TODO: Consider perf implication of encoding odd index values as symbols if type is string
+		f.e.encodeValue(rv.Index(j))
+	}
+}
+
+func (f *encFnInfo) kArray(rv reflect.Value) {
+	// We cannot share kSlice method, because the array may be non-addressable.
+	// E.g. type struct S{B [2]byte}; Encode(S{}) will bomb on "panic: slice of unaddressable array".
+	// So we have to duplicate the functionality here.
+	// f.e.encodeValue(rv.Slice(0, rv.Len()))
+	// f.kSlice(rv.Slice(0, rv.Len()))
+
+	l := rv.Len()
+	// Handle an array of bytes specially (in line with what is done for slices)
+	if f.ti.rt.Elem().Kind() == reflect.Uint8 {
+		if l == 0 {
+			f.ee.encodeStringBytes(c_RAW, nil)
+			return
+		}
+		var bs []byte
+		if rv.CanAddr() {
+			bs = rv.Slice(0, l).Bytes()
+		} else {
+			bs = make([]byte, l)
+			for i := 0; i < l; i++ {
+				bs[i] = byte(rv.Index(i).Uint())
+			}
+		}
+		f.ee.encodeStringBytes(c_RAW, bs)
+		return
+	}
+
+	if f.ti.mbs {
+		if l%2 == 1 {
+			encErr("mapBySlice: invalid length (must be divisible by 2): %v", l)
+		}
+		f.ee.encodeMapPreamble(l / 2)
+	} else {
+		f.ee.encodeArrayPreamble(l)
+	}
+	if l == 0 {
+		return
+	}
+	for j := 0; j < l; j++ {
+		// TODO: Consider perf implication of encoding odd index values as symbols if type is string
+		f.e.encodeValue(rv.Index(j))
+	}
+}
+
+func (f *encFnInfo) kStruct(rv reflect.Value) {
+	fti := f.ti
+	newlen := len(fti.sfi)
+	rvals := make([]reflect.Value, newlen)
+	var encnames []string
+	e := f.e
+	tisfi := fti.sfip
+	toMap := !(fti.toArray || e.h.StructToArray)
+	// if toMap, use the sorted array. If toArray, use unsorted array (to match sequence in struct)
+	if toMap {
+		tisfi = fti.sfi
+		encnames = make([]string, newlen)
+	}
+	newlen = 0
+	for _, si := range tisfi {
+		if si.i != -1 {
+			rvals[newlen] = rv.Field(int(si.i))
+		} else {
+			rvals[newlen] = rv.FieldByIndex(si.is)
+		}
+		if toMap {
+			if si.omitEmpty && isEmptyValue(rvals[newlen]) {
+				continue
+			}
+			encnames[newlen] = si.encName
+		} else {
+			if si.omitEmpty && isEmptyValue(rvals[newlen]) {
+				rvals[newlen] = reflect.Value{} //encode as nil
+			}
+		}
+		newlen++
+	}
+
+	// debugf(">>>> kStruct: newlen: %v", newlen)
+	if toMap {
+		ee := f.ee //don't dereference everytime
+		ee.encodeMapPreamble(newlen)
+		// asSymbols := e.h.AsSymbols&AsSymbolStructFieldNameFlag != 0
+		asSymbols := e.h.AsSymbols == AsSymbolDefault || e.h.AsSymbols&AsSymbolStructFieldNameFlag != 0
+		for j := 0; j < newlen; j++ {
+			if asSymbols {
+				ee.encodeSymbol(encnames[j])
+			} else {
+				ee.encodeString(c_UTF8, encnames[j])
+			}
+			e.encodeValue(rvals[j])
+		}
+	} else {
+		f.ee.encodeArrayPreamble(newlen)
+		for j := 0; j < newlen; j++ {
+			e.encodeValue(rvals[j])
+		}
+	}
+}
+
+// func (f *encFnInfo) kPtr(rv reflect.Value) {
+// 	debugf(">>>>>>> ??? encode kPtr called - shouldn't get called")
+// 	if rv.IsNil() {
+// 		f.ee.encodeNil()
+// 		return
+// 	}
+// 	f.e.encodeValue(rv.Elem())
+// }
+
+func (f *encFnInfo) kInterface(rv reflect.Value) {
+	if rv.IsNil() {
+		f.ee.encodeNil()
+		return
+	}
+	f.e.encodeValue(rv.Elem())
+}
+
+func (f *encFnInfo) kMap(rv reflect.Value) {
+	if rv.IsNil() {
+		f.ee.encodeNil()
+		return
+	}
+
+	if shortCircuitReflectToFastPath {
+		switch f.ti.rtid {
+		case mapIntfIntfTypId:
+			f.e.encMapIntfIntf(rv.Interface().(map[interface{}]interface{}))
+			return
+		case mapStrIntfTypId:
+			f.e.encMapStrIntf(rv.Interface().(map[string]interface{}))
+			return
+		case mapStrStrTypId:
+			f.e.encMapStrStr(rv.Interface().(map[string]string))
+			return
+		case mapInt64IntfTypId:
+			f.e.encMapInt64Intf(rv.Interface().(map[int64]interface{}))
+			return
+		case mapUint64IntfTypId:
+			f.e.encMapUint64Intf(rv.Interface().(map[uint64]interface{}))
+			return
+		}
+	}
+
+	l := rv.Len()
+	f.ee.encodeMapPreamble(l)
+	if l == 0 {
+		return
+	}
+	// keyTypeIsString := f.ti.rt.Key().Kind() == reflect.String
+	keyTypeIsString := f.ti.rt.Key() == stringTyp
+	var asSymbols bool
+	if keyTypeIsString {
+		asSymbols = f.e.h.AsSymbols&AsSymbolMapStringKeysFlag != 0
+	}
+	mks := rv.MapKeys()
+	// for j, lmks := 0, len(mks); j < lmks; j++ {
+	for j := range mks {
+		if keyTypeIsString {
+			if asSymbols {
+				f.ee.encodeSymbol(mks[j].String())
+			} else {
+				f.ee.encodeString(c_UTF8, mks[j].String())
+			}
+		} else {
+			f.e.encodeValue(mks[j])
+		}
+		f.e.encodeValue(rv.MapIndex(mks[j]))
+	}
+
+}
+
+// --------------------------------------------------
+
+// encFn encapsulates the captured variables and the encode function.
+// This way, we only do some calculations one times, and pass to the
+// code block that should be called (encapsulated in a function)
+// instead of executing the checks every time.
+type encFn struct {
+	i *encFnInfo
+	f func(*encFnInfo, reflect.Value)
+}
+
+// --------------------------------------------------
+
+// An Encoder writes an object to an output stream in the codec format.
+type Encoder struct {
+	w  encWriter
+	e  encDriver
+	h  *BasicHandle
+	hh Handle
+	f  map[uintptr]encFn
+	x  []uintptr
+	s  []encFn
+}
+
+// NewEncoder returns an Encoder for encoding into an io.Writer.
+//
+// For efficiency, Users are encouraged to pass in a memory buffered writer
+// (eg bufio.Writer, bytes.Buffer).
+func NewEncoder(w io.Writer, h Handle) *Encoder {
+	ww, ok := w.(ioEncWriterWriter)
+	if !ok {
+		sww := simpleIoEncWriterWriter{w: w}
+		sww.bw, _ = w.(io.ByteWriter)
+		sww.sw, _ = w.(ioEncStringWriter)
+		ww = &sww
+		//ww = bufio.NewWriterSize(w, defEncByteBufSize)
+	}
+	z := ioEncWriter{
+		w: ww,
+	}
+	return &Encoder{w: &z, hh: h, h: h.getBasicHandle(), e: h.newEncDriver(&z)}
+}
+
+// NewEncoderBytes returns an encoder for encoding directly and efficiently
+// into a byte slice, using zero-copying to temporary slices.
+//
+// It will potentially replace the output byte slice pointed to.
+// After encoding, the out parameter contains the encoded contents.
+func NewEncoderBytes(out *[]byte, h Handle) *Encoder {
+	in := *out
+	if in == nil {
+		in = make([]byte, defEncByteBufSize)
+	}
+	z := bytesEncWriter{
+		b:   in,
+		out: out,
+	}
+	return &Encoder{w: &z, hh: h, h: h.getBasicHandle(), e: h.newEncDriver(&z)}
+}
+
+// Encode writes an object into a stream in the codec format.
+//
+// Encoding can be configured via the "codec" struct tag for the fields.
+//
+// The "codec" key in struct field's tag value is the key name,
+// followed by an optional comma and options.
+//
+// To set an option on all fields (e.g. omitempty on all fields), you
+// can create a field called _struct, and set flags on it.
+//
+// Struct values "usually" encode as maps. Each exported struct field is encoded unless:
+//    - the field's codec tag is "-", OR
+//    - the field is empty and its codec tag specifies the "omitempty" option.
+//
+// When encoding as a map, the first string in the tag (before the comma)
+// is the map key string to use when encoding.
+//
+// However, struct values may encode as arrays. This happens when:
+//    - StructToArray Encode option is set, OR
+//    - the codec tag on the _struct field sets the "toarray" option
+//
+// Values with types that implement MapBySlice are encoded as stream maps.
+//
+// The empty values (for omitempty option) are false, 0, any nil pointer
+// or interface value, and any array, slice, map, or string of length zero.
+//
+// Anonymous fields are encoded inline if no struct tag is present.
+// Else they are encoded as regular fields.
+//
+// Examples:
+//
+//      type MyStruct struct {
+//          _struct bool    `codec:",omitempty"`   //set omitempty for every field
+//          Field1 string   `codec:"-"`            //skip this field
+//          Field2 int      `codec:"myName"`       //Use key "myName" in encode stream
+//          Field3 int32    `codec:",omitempty"`   //use key "Field3". Omit if empty.
+//          Field4 bool     `codec:"f4,omitempty"` //use key "f4". Omit if empty.
+//          ...
+//      }
+//
+//      type MyStruct struct {
+//          _struct bool    `codec:",omitempty,toarray"`   //set omitempty for every field
+//                                                         //and encode struct as an array
+//      }
+//
+// The mode of encoding is based on the type of the value. When a value is seen:
+//   - If an extension is registered for it, call that extension function
+//   - If it implements BinaryMarshaler, call its MarshalBinary() (data []byte, err error)
+//   - Else encode it based on its reflect.Kind
+//
+// Note that struct field names and keys in map[string]XXX will be treated as symbols.
+// Some formats support symbols (e.g. binc) and will properly encode the string
+// only once in the stream, and use a tag to refer to it thereafter.
+func (e *Encoder) Encode(v interface{}) (err error) {
+	defer panicToErr(&err)
+	e.encode(v)
+	e.w.atEndOfEncode()
+	return
+}
+
+func (e *Encoder) encode(iv interface{}) {
+	switch v := iv.(type) {
+	case nil:
+		e.e.encodeNil()
+
+	case reflect.Value:
+		e.encodeValue(v)
+
+	case string:
+		e.e.encodeString(c_UTF8, v)
+	case bool:
+		e.e.encodeBool(v)
+	case int:
+		e.e.encodeInt(int64(v))
+	case int8:
+		e.e.encodeInt(int64(v))
+	case int16:
+		e.e.encodeInt(int64(v))
+	case int32:
+		e.e.encodeInt(int64(v))
+	case int64:
+		e.e.encodeInt(v)
+	case uint:
+		e.e.encodeUint(uint64(v))
+	case uint8:
+		e.e.encodeUint(uint64(v))
+	case uint16:
+		e.e.encodeUint(uint64(v))
+	case uint32:
+		e.e.encodeUint(uint64(v))
+	case uint64:
+		e.e.encodeUint(v)
+	case float32:
+		e.e.encodeFloat32(v)
+	case float64:
+		e.e.encodeFloat64(v)
+
+	case []interface{}:
+		e.encSliceIntf(v)
+	case []string:
+		e.encSliceStr(v)
+	case []int64:
+		e.encSliceInt64(v)
+	case []uint64:
+		e.encSliceUint64(v)
+	case []uint8:
+		e.e.encodeStringBytes(c_RAW, v)
+
+	case map[interface{}]interface{}:
+		e.encMapIntfIntf(v)
+	case map[string]interface{}:
+		e.encMapStrIntf(v)
+	case map[string]string:
+		e.encMapStrStr(v)
+	case map[int64]interface{}:
+		e.encMapInt64Intf(v)
+	case map[uint64]interface{}:
+		e.encMapUint64Intf(v)
+
+	case *string:
+		e.e.encodeString(c_UTF8, *v)
+	case *bool:
+		e.e.encodeBool(*v)
+	case *int:
+		e.e.encodeInt(int64(*v))
+	case *int8:
+		e.e.encodeInt(int64(*v))
+	case *int16:
+		e.e.encodeInt(int64(*v))
+	case *int32:
+		e.e.encodeInt(int64(*v))
+	case *int64:
+		e.e.encodeInt(*v)
+	case *uint:
+		e.e.encodeUint(uint64(*v))
+	case *uint8:
+		e.e.encodeUint(uint64(*v))
+	case *uint16:
+		e.e.encodeUint(uint64(*v))
+	case *uint32:
+		e.e.encodeUint(uint64(*v))
+	case *uint64:
+		e.e.encodeUint(*v)
+	case *float32:
+		e.e.encodeFloat32(*v)
+	case *float64:
+		e.e.encodeFloat64(*v)
+
+	case *[]interface{}:
+		e.encSliceIntf(*v)
+	case *[]string:
+		e.encSliceStr(*v)
+	case *[]int64:
+		e.encSliceInt64(*v)
+	case *[]uint64:
+		e.encSliceUint64(*v)
+	case *[]uint8:
+		e.e.encodeStringBytes(c_RAW, *v)
+
+	case *map[interface{}]interface{}:
+		e.encMapIntfIntf(*v)
+	case *map[string]interface{}:
+		e.encMapStrIntf(*v)
+	case *map[string]string:
+		e.encMapStrStr(*v)
+	case *map[int64]interface{}:
+		e.encMapInt64Intf(*v)
+	case *map[uint64]interface{}:
+		e.encMapUint64Intf(*v)
+
+	default:
+		e.encodeValue(reflect.ValueOf(iv))
+	}
+}
+
+func (e *Encoder) encodeValue(rv reflect.Value) {
+	for rv.Kind() == reflect.Ptr {
+		if rv.IsNil() {
+			e.e.encodeNil()
+			return
+		}
+		rv = rv.Elem()
+	}
+
+	rt := rv.Type()
+	rtid := reflect.ValueOf(rt).Pointer()
+
+	// if e.f == nil && e.s == nil { debugf("---->Creating new enc f map for type: %v\n", rt) }
+	var fn encFn
+	var ok bool
+	if useMapForCodecCache {
+		fn, ok = e.f[rtid]
+	} else {
+		for i, v := range e.x {
+			if v == rtid {
+				fn, ok = e.s[i], true
+				break
+			}
+		}
+	}
+	if !ok {
+		// debugf("\tCreating new enc fn for type: %v\n", rt)
+		fi := encFnInfo{ti: getTypeInfo(rtid, rt), e: e, ee: e.e}
+		fn.i = &fi
+		if rtid == rawExtTypId {
+			fn.f = (*encFnInfo).rawExt
+		} else if e.e.isBuiltinType(rtid) {
+			fn.f = (*encFnInfo).builtin
+		} else if xfTag, xfFn := e.h.getEncodeExt(rtid); xfFn != nil {
+			fi.xfTag, fi.xfFn = xfTag, xfFn
+			fn.f = (*encFnInfo).ext
+		} else if supportBinaryMarshal && fi.ti.m {
+			fn.f = (*encFnInfo).binaryMarshal
+		} else {
+			switch rk := rt.Kind(); rk {
+			case reflect.Bool:
+				fn.f = (*encFnInfo).kBool
+			case reflect.String:
+				fn.f = (*encFnInfo).kString
+			case reflect.Float64:
+				fn.f = (*encFnInfo).kFloat64
+			case reflect.Float32:
+				fn.f = (*encFnInfo).kFloat32
+			case reflect.Int, reflect.Int8, reflect.Int64, reflect.Int32, reflect.Int16:
+				fn.f = (*encFnInfo).kInt
+			case reflect.Uint8, reflect.Uint64, reflect.Uint, reflect.Uint32, reflect.Uint16:
+				fn.f = (*encFnInfo).kUint
+			case reflect.Invalid:
+				fn.f = (*encFnInfo).kInvalid
+			case reflect.Slice:
+				fn.f = (*encFnInfo).kSlice
+			case reflect.Array:
+				fn.f = (*encFnInfo).kArray
+			case reflect.Struct:
+				fn.f = (*encFnInfo).kStruct
+			// case reflect.Ptr:
+			// 	fn.f = (*encFnInfo).kPtr
+			case reflect.Interface:
+				fn.f = (*encFnInfo).kInterface
+			case reflect.Map:
+				fn.f = (*encFnInfo).kMap
+			default:
+				fn.f = (*encFnInfo).kErr
+			}
+		}
+		if useMapForCodecCache {
+			if e.f == nil {
+				e.f = make(map[uintptr]encFn, 16)
+			}
+			e.f[rtid] = fn
+		} else {
+			e.s = append(e.s, fn)
+			e.x = append(e.x, rtid)
+		}
+	}
+
+	fn.f(fn.i, rv)
+
+}
+
+func (e *Encoder) encRawExt(re RawExt) {
+	if re.Data == nil {
+		e.e.encodeNil()
+		return
+	}
+	if e.hh.writeExt() {
+		e.e.encodeExtPreamble(re.Tag, len(re.Data))
+		e.w.writeb(re.Data)
+	} else {
+		e.e.encodeStringBytes(c_RAW, re.Data)
+	}
+}
+
+// ---------------------------------------------
+// short circuit functions for common maps and slices
+
+func (e *Encoder) encSliceIntf(v []interface{}) {
+	e.e.encodeArrayPreamble(len(v))
+	for _, v2 := range v {
+		e.encode(v2)
+	}
+}
+
+func (e *Encoder) encSliceStr(v []string) {
+	e.e.encodeArrayPreamble(len(v))
+	for _, v2 := range v {
+		e.e.encodeString(c_UTF8, v2)
+	}
+}
+
+func (e *Encoder) encSliceInt64(v []int64) {
+	e.e.encodeArrayPreamble(len(v))
+	for _, v2 := range v {
+		e.e.encodeInt(v2)
+	}
+}
+
+func (e *Encoder) encSliceUint64(v []uint64) {
+	e.e.encodeArrayPreamble(len(v))
+	for _, v2 := range v {
+		e.e.encodeUint(v2)
+	}
+}
+
+func (e *Encoder) encMapStrStr(v map[string]string) {
+	e.e.encodeMapPreamble(len(v))
+	asSymbols := e.h.AsSymbols&AsSymbolMapStringKeysFlag != 0
+	for k2, v2 := range v {
+		if asSymbols {
+			e.e.encodeSymbol(k2)
+		} else {
+			e.e.encodeString(c_UTF8, k2)
+		}
+		e.e.encodeString(c_UTF8, v2)
+	}
+}
+
+func (e *Encoder) encMapStrIntf(v map[string]interface{}) {
+	e.e.encodeMapPreamble(len(v))
+	asSymbols := e.h.AsSymbols&AsSymbolMapStringKeysFlag != 0
+	for k2, v2 := range v {
+		if asSymbols {
+			e.e.encodeSymbol(k2)
+		} else {
+			e.e.encodeString(c_UTF8, k2)
+		}
+		e.encode(v2)
+	}
+}
+
+func (e *Encoder) encMapInt64Intf(v map[int64]interface{}) {
+	e.e.encodeMapPreamble(len(v))
+	for k2, v2 := range v {
+		e.e.encodeInt(k2)
+		e.encode(v2)
+	}
+}
+
+func (e *Encoder) encMapUint64Intf(v map[uint64]interface{}) {
+	e.e.encodeMapPreamble(len(v))
+	for k2, v2 := range v {
+		e.e.encodeUint(uint64(k2))
+		e.encode(v2)
+	}
+}
+
+func (e *Encoder) encMapIntfIntf(v map[interface{}]interface{}) {
+	e.e.encodeMapPreamble(len(v))
+	for k2, v2 := range v {
+		e.encode(k2)
+		e.encode(v2)
+	}
+}
+
+// ----------------------------------------
+
+func encErr(format string, params ...interface{}) {
+	doPanic(msgTagEnc, format, params...)
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/helper.go b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/helper.go
new file mode 100644
index 00000000000..e6dc0563f09
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/helper.go
@@ -0,0 +1,589 @@
+// Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
+// Use of this source code is governed by a BSD-style license found in the LICENSE file.
+
+package codec
+
+// Contains code shared by both encode and decode.
+
+import (
+	"encoding/binary"
+	"fmt"
+	"math"
+	"reflect"
+	"sort"
+	"strings"
+	"sync"
+	"time"
+	"unicode"
+	"unicode/utf8"
+)
+
+const (
+	structTagName = "codec"
+
+	// Support
+	//    encoding.BinaryMarshaler: MarshalBinary() (data []byte, err error)
+	//    encoding.BinaryUnmarshaler: UnmarshalBinary(data []byte) error
+	// This constant flag will enable or disable it.
+	supportBinaryMarshal = true
+
+	// Each Encoder or Decoder uses a cache of functions based on conditionals,
+	// so that the conditionals are not run every time.
+	//
+	// Either a map or a slice is used to keep track of the functions.
+	// The map is more natural, but has a higher cost than a slice/array.
+	// This flag (useMapForCodecCache) controls which is used.
+	useMapForCodecCache = false
+
+	// For some common container types, we can short-circuit an elaborate
+	// reflection dance and call encode/decode directly.
+	// The currently supported types are:
+	//    - slices of strings, or id's (int64,uint64) or interfaces.
+	//    - maps of str->str, str->intf, id(int64,uint64)->intf, intf->intf
+	shortCircuitReflectToFastPath = true
+
+	// for debugging, set this to false, to catch panic traces.
+	// Note that this will always cause rpc tests to fail, since they need io.EOF sent via panic.
+	recoverPanicToErr = true
+)
+
+type charEncoding uint8
+
+const (
+	c_RAW charEncoding = iota
+	c_UTF8
+	c_UTF16LE
+	c_UTF16BE
+	c_UTF32LE
+	c_UTF32BE
+)
+
+// valueType is the stream type
+type valueType uint8
+
+const (
+	valueTypeUnset valueType = iota
+	valueTypeNil
+	valueTypeInt
+	valueTypeUint
+	valueTypeFloat
+	valueTypeBool
+	valueTypeString
+	valueTypeSymbol
+	valueTypeBytes
+	valueTypeMap
+	valueTypeArray
+	valueTypeTimestamp
+	valueTypeExt
+
+	valueTypeInvalid = 0xff
+)
+
+var (
+	bigen               = binary.BigEndian
+	structInfoFieldName = "_struct"
+
+	cachedTypeInfo      = make(map[uintptr]*typeInfo, 4)
+	cachedTypeInfoMutex sync.RWMutex
+
+	intfSliceTyp = reflect.TypeOf([]interface{}(nil))
+	intfTyp      = intfSliceTyp.Elem()
+
+	strSliceTyp     = reflect.TypeOf([]string(nil))
+	boolSliceTyp    = reflect.TypeOf([]bool(nil))
+	uintSliceTyp    = reflect.TypeOf([]uint(nil))
+	uint8SliceTyp   = reflect.TypeOf([]uint8(nil))
+	uint16SliceTyp  = reflect.TypeOf([]uint16(nil))
+	uint32SliceTyp  = reflect.TypeOf([]uint32(nil))
+	uint64SliceTyp  = reflect.TypeOf([]uint64(nil))
+	intSliceTyp     = reflect.TypeOf([]int(nil))
+	int8SliceTyp    = reflect.TypeOf([]int8(nil))
+	int16SliceTyp   = reflect.TypeOf([]int16(nil))
+	int32SliceTyp   = reflect.TypeOf([]int32(nil))
+	int64SliceTyp   = reflect.TypeOf([]int64(nil))
+	float32SliceTyp = reflect.TypeOf([]float32(nil))
+	float64SliceTyp = reflect.TypeOf([]float64(nil))
+
+	mapIntfIntfTyp = reflect.TypeOf(map[interface{}]interface{}(nil))
+	mapStrIntfTyp  = reflect.TypeOf(map[string]interface{}(nil))
+	mapStrStrTyp   = reflect.TypeOf(map[string]string(nil))
+
+	mapIntIntfTyp    = reflect.TypeOf(map[int]interface{}(nil))
+	mapInt64IntfTyp  = reflect.TypeOf(map[int64]interface{}(nil))
+	mapUintIntfTyp   = reflect.TypeOf(map[uint]interface{}(nil))
+	mapUint64IntfTyp = reflect.TypeOf(map[uint64]interface{}(nil))
+
+	stringTyp = reflect.TypeOf("")
+	timeTyp   = reflect.TypeOf(time.Time{})
+	rawExtTyp = reflect.TypeOf(RawExt{})
+
+	mapBySliceTyp        = reflect.TypeOf((*MapBySlice)(nil)).Elem()
+	binaryMarshalerTyp   = reflect.TypeOf((*binaryMarshaler)(nil)).Elem()
+	binaryUnmarshalerTyp = reflect.TypeOf((*binaryUnmarshaler)(nil)).Elem()
+
+	rawExtTypId = reflect.ValueOf(rawExtTyp).Pointer()
+	intfTypId   = reflect.ValueOf(intfTyp).Pointer()
+	timeTypId   = reflect.ValueOf(timeTyp).Pointer()
+
+	intfSliceTypId = reflect.ValueOf(intfSliceTyp).Pointer()
+	strSliceTypId  = reflect.ValueOf(strSliceTyp).Pointer()
+
+	boolSliceTypId    = reflect.ValueOf(boolSliceTyp).Pointer()
+	uintSliceTypId    = reflect.ValueOf(uintSliceTyp).Pointer()
+	uint8SliceTypId   = reflect.ValueOf(uint8SliceTyp).Pointer()
+	uint16SliceTypId  = reflect.ValueOf(uint16SliceTyp).Pointer()
+	uint32SliceTypId  = reflect.ValueOf(uint32SliceTyp).Pointer()
+	uint64SliceTypId  = reflect.ValueOf(uint64SliceTyp).Pointer()
+	intSliceTypId     = reflect.ValueOf(intSliceTyp).Pointer()
+	int8SliceTypId    = reflect.ValueOf(int8SliceTyp).Pointer()
+	int16SliceTypId   = reflect.ValueOf(int16SliceTyp).Pointer()
+	int32SliceTypId   = reflect.ValueOf(int32SliceTyp).Pointer()
+	int64SliceTypId   = reflect.ValueOf(int64SliceTyp).Pointer()
+	float32SliceTypId = reflect.ValueOf(float32SliceTyp).Pointer()
+	float64SliceTypId = reflect.ValueOf(float64SliceTyp).Pointer()
+
+	mapStrStrTypId     = reflect.ValueOf(mapStrStrTyp).Pointer()
+	mapIntfIntfTypId   = reflect.ValueOf(mapIntfIntfTyp).Pointer()
+	mapStrIntfTypId    = reflect.ValueOf(mapStrIntfTyp).Pointer()
+	mapIntIntfTypId    = reflect.ValueOf(mapIntIntfTyp).Pointer()
+	mapInt64IntfTypId  = reflect.ValueOf(mapInt64IntfTyp).Pointer()
+	mapUintIntfTypId   = reflect.ValueOf(mapUintIntfTyp).Pointer()
+	mapUint64IntfTypId = reflect.ValueOf(mapUint64IntfTyp).Pointer()
+	// Id = reflect.ValueOf().Pointer()
+	// mapBySliceTypId  = reflect.ValueOf(mapBySliceTyp).Pointer()
+
+	binaryMarshalerTypId   = reflect.ValueOf(binaryMarshalerTyp).Pointer()
+	binaryUnmarshalerTypId = reflect.ValueOf(binaryUnmarshalerTyp).Pointer()
+
+	intBitsize  uint8 = uint8(reflect.TypeOf(int(0)).Bits())
+	uintBitsize uint8 = uint8(reflect.TypeOf(uint(0)).Bits())
+
+	bsAll0x00 = []byte{0, 0, 0, 0, 0, 0, 0, 0}
+	bsAll0xff = []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
+)
+
+type binaryUnmarshaler interface {
+	UnmarshalBinary(data []byte) error
+}
+
+type binaryMarshaler interface {
+	MarshalBinary() (data []byte, err error)
+}
+
+// MapBySlice represents a slice which should be encoded as a map in the stream.
+// The slice contains a sequence of key-value pairs.
+type MapBySlice interface {
+	MapBySlice()
+}
+
+// WARNING: DO NOT USE DIRECTLY. EXPORTED FOR GODOC BENEFIT. WILL BE REMOVED.
+//
+// BasicHandle encapsulates the common options and extension functions.
+type BasicHandle struct {
+	extHandle
+	EncodeOptions
+	DecodeOptions
+}
+
+// Handle is the interface for a specific encoding format.
+//
+// Typically, a Handle is pre-configured before first time use,
+// and not modified while in use. Such a pre-configured Handle
+// is safe for concurrent access.
+type Handle interface {
+	writeExt() bool
+	getBasicHandle() *BasicHandle
+	newEncDriver(w encWriter) encDriver
+	newDecDriver(r decReader) decDriver
+}
+
+// RawExt represents raw unprocessed extension data.
+type RawExt struct {
+	Tag  byte
+	Data []byte
+}
+
+type extTypeTagFn struct {
+	rtid  uintptr
+	rt    reflect.Type
+	tag   byte
+	encFn func(reflect.Value) ([]byte, error)
+	decFn func(reflect.Value, []byte) error
+}
+
+type extHandle []*extTypeTagFn
+
+// AddExt registers an encode and decode function for a reflect.Type.
+// Note that the type must be a named type, and specifically not
+// a pointer or Interface. An error is returned if that is not honored.
+//
+// To Deregister an ext, call AddExt with 0 tag, nil encfn and nil decfn.
+func (o *extHandle) AddExt(
+	rt reflect.Type,
+	tag byte,
+	encfn func(reflect.Value) ([]byte, error),
+	decfn func(reflect.Value, []byte) error,
+) (err error) {
+	// o is a pointer, because we may need to initialize it
+	if rt.PkgPath() == "" || rt.Kind() == reflect.Interface {
+		err = fmt.Errorf("codec.Handle.AddExt: Takes named type, especially not a pointer or interface: %T",
+			reflect.Zero(rt).Interface())
+		return
+	}
+
+	// o cannot be nil, since it is always embedded in a Handle.
+	// if nil, let it panic.
+	// if o == nil {
+	// 	err = errors.New("codec.Handle.AddExt: extHandle cannot be a nil pointer.")
+	// 	return
+	// }
+
+	rtid := reflect.ValueOf(rt).Pointer()
+	for _, v := range *o {
+		if v.rtid == rtid {
+			v.tag, v.encFn, v.decFn = tag, encfn, decfn
+			return
+		}
+	}
+
+	*o = append(*o, &extTypeTagFn{rtid, rt, tag, encfn, decfn})
+	return
+}
+
+func (o extHandle) getExt(rtid uintptr) *extTypeTagFn {
+	for _, v := range o {
+		if v.rtid == rtid {
+			return v
+		}
+	}
+	return nil
+}
+
+func (o extHandle) getExtForTag(tag byte) *extTypeTagFn {
+	for _, v := range o {
+		if v.tag == tag {
+			return v
+		}
+	}
+	return nil
+}
+
+func (o extHandle) getDecodeExtForTag(tag byte) (
+	rv reflect.Value, fn func(reflect.Value, []byte) error) {
+	if x := o.getExtForTag(tag); x != nil {
+		// ext is only registered for base
+		rv = reflect.New(x.rt).Elem()
+		fn = x.decFn
+	}
+	return
+}
+
+func (o extHandle) getDecodeExt(rtid uintptr) (tag byte, fn func(reflect.Value, []byte) error) {
+	if x := o.getExt(rtid); x != nil {
+		tag = x.tag
+		fn = x.decFn
+	}
+	return
+}
+
+func (o extHandle) getEncodeExt(rtid uintptr) (tag byte, fn func(reflect.Value) ([]byte, error)) {
+	if x := o.getExt(rtid); x != nil {
+		tag = x.tag
+		fn = x.encFn
+	}
+	return
+}
+
+type structFieldInfo struct {
+	encName string // encode name
+
+	// only one of 'i' or 'is' can be set. If 'i' is -1, then 'is' has been set.
+
+	is        []int // (recursive/embedded) field index in struct
+	i         int16 // field index in struct
+	omitEmpty bool
+	toArray   bool // if field is _struct, is the toArray set?
+
+	// tag       string   // tag
+	// name      string   // field name
+	// encNameBs []byte   // encoded name as byte stream
+	// ikind     int      // kind of the field as an int i.e. int(reflect.Kind)
+}
+
+func parseStructFieldInfo(fname string, stag string) *structFieldInfo {
+	if fname == "" {
+		panic("parseStructFieldInfo: No Field Name")
+	}
+	si := structFieldInfo{
+		// name: fname,
+		encName: fname,
+		// tag: stag,
+	}
+
+	if stag != "" {
+		for i, s := range strings.Split(stag, ",") {
+			if i == 0 {
+				if s != "" {
+					si.encName = s
+				}
+			} else {
+				switch s {
+				case "omitempty":
+					si.omitEmpty = true
+				case "toarray":
+					si.toArray = true
+				}
+			}
+		}
+	}
+	// si.encNameBs = []byte(si.encName)
+	return &si
+}
+
+type sfiSortedByEncName []*structFieldInfo
+
+func (p sfiSortedByEncName) Len() int {
+	return len(p)
+}
+
+func (p sfiSortedByEncName) Less(i, j int) bool {
+	return p[i].encName < p[j].encName
+}
+
+func (p sfiSortedByEncName) Swap(i, j int) {
+	p[i], p[j] = p[j], p[i]
+}
+
+// typeInfo keeps information about each type referenced in the encode/decode sequence.
+//
+// During an encode/decode sequence, we work as below:
+//   - If base is a built in type, en/decode base value
+//   - If base is registered as an extension, en/decode base value
+//   - If type is binary(M/Unm)arshaler, call Binary(M/Unm)arshal method
+//   - Else decode appropriately based on the reflect.Kind
+type typeInfo struct {
+	sfi  []*structFieldInfo // sorted. Used when enc/dec struct to map.
+	sfip []*structFieldInfo // unsorted. Used when enc/dec struct to array.
+
+	rt   reflect.Type
+	rtid uintptr
+
+	// baseId gives pointer to the base reflect.Type, after deferencing
+	// the pointers. E.g. base type of ***time.Time is time.Time.
+	base      reflect.Type
+	baseId    uintptr
+	baseIndir int8 // number of indirections to get to base
+
+	mbs bool // base type (T or *T) is a MapBySlice
+
+	m        bool // base type (T or *T) is a binaryMarshaler
+	unm      bool // base type (T or *T) is a binaryUnmarshaler
+	mIndir   int8 // number of indirections to get to binaryMarshaler type
+	unmIndir int8 // number of indirections to get to binaryUnmarshaler type
+	toArray  bool // whether this (struct) type should be encoded as an array
+}
+
+func (ti *typeInfo) indexForEncName(name string) int {
+	//tisfi := ti.sfi
+	const binarySearchThreshold = 16
+	if sfilen := len(ti.sfi); sfilen < binarySearchThreshold {
+		// linear search. faster than binary search in my testing up to 16-field structs.
+		for i, si := range ti.sfi {
+			if si.encName == name {
+				return i
+			}
+		}
+	} else {
+		// binary search. adapted from sort/search.go.
+		h, i, j := 0, 0, sfilen
+		for i < j {
+			h = i + (j-i)/2
+			if ti.sfi[h].encName < name {
+				i = h + 1
+			} else {
+				j = h
+			}
+		}
+		if i < sfilen && ti.sfi[i].encName == name {
+			return i
+		}
+	}
+	return -1
+}
+
+func getTypeInfo(rtid uintptr, rt reflect.Type) (pti *typeInfo) {
+	var ok bool
+	cachedTypeInfoMutex.RLock()
+	pti, ok = cachedTypeInfo[rtid]
+	cachedTypeInfoMutex.RUnlock()
+	if ok {
+		return
+	}
+
+	cachedTypeInfoMutex.Lock()
+	defer cachedTypeInfoMutex.Unlock()
+	if pti, ok = cachedTypeInfo[rtid]; ok {
+		return
+	}
+
+	ti := typeInfo{rt: rt, rtid: rtid}
+	pti = &ti
+
+	var indir int8
+	if ok, indir = implementsIntf(rt, binaryMarshalerTyp); ok {
+		ti.m, ti.mIndir = true, indir
+	}
+	if ok, indir = implementsIntf(rt, binaryUnmarshalerTyp); ok {
+		ti.unm, ti.unmIndir = true, indir
+	}
+	if ok, _ = implementsIntf(rt, mapBySliceTyp); ok {
+		ti.mbs = true
+	}
+
+	pt := rt
+	var ptIndir int8
+	// for ; pt.Kind() == reflect.Ptr; pt, ptIndir = pt.Elem(), ptIndir+1 { }
+	for pt.Kind() == reflect.Ptr {
+		pt = pt.Elem()
+		ptIndir++
+	}
+	if ptIndir == 0 {
+		ti.base = rt
+		ti.baseId = rtid
+	} else {
+		ti.base = pt
+		ti.baseId = reflect.ValueOf(pt).Pointer()
+		ti.baseIndir = ptIndir
+	}
+
+	if rt.Kind() == reflect.Struct {
+		var siInfo *structFieldInfo
+		if f, ok := rt.FieldByName(structInfoFieldName); ok {
+			siInfo = parseStructFieldInfo(structInfoFieldName, f.Tag.Get(structTagName))
+			ti.toArray = siInfo.toArray
+		}
+		sfip := make([]*structFieldInfo, 0, rt.NumField())
+		rgetTypeInfo(rt, nil, make(map[string]bool), &sfip, siInfo)
+
+		// // try to put all si close together
+		// const tryToPutAllStructFieldInfoTogether = true
+		// if tryToPutAllStructFieldInfoTogether {
+		// 	sfip2 := make([]structFieldInfo, len(sfip))
+		// 	for i, si := range sfip {
+		// 		sfip2[i] = *si
+		// 	}
+		// 	for i := range sfip {
+		// 		sfip[i] = &sfip2[i]
+		// 	}
+		// }
+
+		ti.sfip = make([]*structFieldInfo, len(sfip))
+		ti.sfi = make([]*structFieldInfo, len(sfip))
+		copy(ti.sfip, sfip)
+		sort.Sort(sfiSortedByEncName(sfip))
+		copy(ti.sfi, sfip)
+	}
+	// sfi = sfip
+	cachedTypeInfo[rtid] = pti
+	return
+}
+
+func rgetTypeInfo(rt reflect.Type, indexstack []int, fnameToHastag map[string]bool,
+	sfi *[]*structFieldInfo, siInfo *structFieldInfo,
+) {
+	// for rt.Kind() == reflect.Ptr {
+	// 	// indexstack = append(indexstack, 0)
+	// 	rt = rt.Elem()
+	// }
+	for j := 0; j < rt.NumField(); j++ {
+		f := rt.Field(j)
+		stag := f.Tag.Get(structTagName)
+		if stag == "-" {
+			continue
+		}
+		if r1, _ := utf8.DecodeRuneInString(f.Name); r1 == utf8.RuneError || !unicode.IsUpper(r1) {
+			continue
+		}
+		// if anonymous and there is no struct tag and its a struct (or pointer to struct), inline it.
+		if f.Anonymous && stag == "" {
+			ft := f.Type
+			for ft.Kind() == reflect.Ptr {
+				ft = ft.Elem()
+			}
+			if ft.Kind() == reflect.Struct {
+				indexstack2 := append(append(make([]int, 0, len(indexstack)+4), indexstack...), j)
+				rgetTypeInfo(ft, indexstack2, fnameToHastag, sfi, siInfo)
+				continue
+			}
+		}
+		// do not let fields with same name in embedded structs override field at higher level.
+		// this must be done after anonymous check, to allow anonymous field
+		// still include their child fields
+		if _, ok := fnameToHastag[f.Name]; ok {
+			continue
+		}
+		si := parseStructFieldInfo(f.Name, stag)
+		// si.ikind = int(f.Type.Kind())
+		if len(indexstack) == 0 {
+			si.i = int16(j)
+		} else {
+			si.i = -1
+			si.is = append(append(make([]int, 0, len(indexstack)+4), indexstack...), j)
+		}
+
+		if siInfo != nil {
+			if siInfo.omitEmpty {
+				si.omitEmpty = true
+			}
+		}
+		*sfi = append(*sfi, si)
+		fnameToHastag[f.Name] = stag != ""
+	}
+}
+
+func panicToErr(err *error) {
+	if recoverPanicToErr {
+		if x := recover(); x != nil {
+			//debug.PrintStack()
+			panicValToErr(x, err)
+		}
+	}
+}
+
+func doPanic(tag string, format string, params ...interface{}) {
+	params2 := make([]interface{}, len(params)+1)
+	params2[0] = tag
+	copy(params2[1:], params)
+	panic(fmt.Errorf("%s: "+format, params2...))
+}
+
+func checkOverflowFloat32(f float64, doCheck bool) {
+	if !doCheck {
+		return
+	}
+	// check overflow (logic adapted from std pkg reflect/value.go OverflowFloat()
+	f2 := f
+	if f2 < 0 {
+		f2 = -f
+	}
+	if math.MaxFloat32 < f2 && f2 <= math.MaxFloat64 {
+		decErr("Overflow float32 value: %v", f2)
+	}
+}
+
+func checkOverflow(ui uint64, i int64, bitsize uint8) {
+	// check overflow (logic adapted from std pkg reflect/value.go OverflowUint()
+	if bitsize == 0 {
+		return
+	}
+	if i != 0 {
+		if trunc := (i << (64 - bitsize)) >> (64 - bitsize); i != trunc {
+			decErr("Overflow int value: %v", i)
+		}
+	}
+	if ui != 0 {
+		if trunc := (ui << (64 - bitsize)) >> (64 - bitsize); ui != trunc {
+			decErr("Overflow uint value: %v", ui)
+		}
+	}
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/helper_internal.go b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/helper_internal.go
new file mode 100644
index 00000000000..58417da958f
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/helper_internal.go
@@ -0,0 +1,127 @@
+// Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
+// Use of this source code is governed by a BSD-style license found in the LICENSE file.
+
+package codec
+
+// All non-std package dependencies live in this file,
+// so porting to different environment is easy (just update functions).
+
+import (
+	"errors"
+	"fmt"
+	"math"
+	"reflect"
+)
+
+var (
+	raisePanicAfterRecover = false
+	debugging              = true
+)
+
+func panicValToErr(panicVal interface{}, err *error) {
+	switch xerr := panicVal.(type) {
+	case error:
+		*err = xerr
+	case string:
+		*err = errors.New(xerr)
+	default:
+		*err = fmt.Errorf("%v", panicVal)
+	}
+	if raisePanicAfterRecover {
+		panic(panicVal)
+	}
+	return
+}
+
+func isEmptyValueDeref(v reflect.Value, deref bool) bool {
+	switch v.Kind() {
+	case reflect.Array, reflect.Map, reflect.Slice, reflect.String:
+		return v.Len() == 0
+	case reflect.Bool:
+		return !v.Bool()
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		return v.Int() == 0
+	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+		return v.Uint() == 0
+	case reflect.Float32, reflect.Float64:
+		return v.Float() == 0
+	case reflect.Interface, reflect.Ptr:
+		if deref {
+			if v.IsNil() {
+				return true
+			}
+			return isEmptyValueDeref(v.Elem(), deref)
+		} else {
+			return v.IsNil()
+		}
+	case reflect.Struct:
+		// return true if all fields are empty. else return false.
+
+		// we cannot use equality check, because some fields may be maps/slices/etc
+		// and consequently the structs are not comparable.
+		// return v.Interface() == reflect.Zero(v.Type()).Interface()
+		for i, n := 0, v.NumField(); i < n; i++ {
+			if !isEmptyValueDeref(v.Field(i), deref) {
+				return false
+			}
+		}
+		return true
+	}
+	return false
+}
+
+func isEmptyValue(v reflect.Value) bool {
+	return isEmptyValueDeref(v, true)
+}
+
+func debugf(format string, args ...interface{}) {
+	if debugging {
+		if len(format) == 0 || format[len(format)-1] != '\n' {
+			format = format + "\n"
+		}
+		fmt.Printf(format, args...)
+	}
+}
+
+func pruneSignExt(v []byte, pos bool) (n int) {
+	if len(v) < 2 {
+	} else if pos && v[0] == 0 {
+		for ; v[n] == 0 && n+1 < len(v) && (v[n+1]&(1<<7) == 0); n++ {
+		}
+	} else if !pos && v[0] == 0xff {
+		for ; v[n] == 0xff && n+1 < len(v) && (v[n+1]&(1<<7) != 0); n++ {
+		}
+	}
+	return
+}
+
+func implementsIntf(typ, iTyp reflect.Type) (success bool, indir int8) {
+	if typ == nil {
+		return
+	}
+	rt := typ
+	// The type might be a pointer and we need to keep
+	// dereferencing to the base type until we find an implementation.
+	for {
+		if rt.Implements(iTyp) {
+			return true, indir
+		}
+		if p := rt; p.Kind() == reflect.Ptr {
+			indir++
+			if indir >= math.MaxInt8 { // insane number of indirections
+				return false, 0
+			}
+			rt = p.Elem()
+			continue
+		}
+		break
+	}
+	// No luck yet, but if this is a base type (non-pointer), the pointer might satisfy.
+	if typ.Kind() != reflect.Ptr {
+		// Not a pointer, but does the pointer work?
+		if reflect.PtrTo(typ).Implements(iTyp) {
+			return true, -1
+		}
+	}
+	return false, 0
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/msgpack.go b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/msgpack.go
new file mode 100644
index 00000000000..da0500d1922
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/msgpack.go
@@ -0,0 +1,816 @@
+// Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
+// Use of this source code is governed by a BSD-style license found in the LICENSE file.
+
+/*
+MSGPACK
+
+Msgpack-c implementation powers the c, c++, python, ruby, etc libraries.
+We need to maintain compatibility with it and how it encodes integer values
+without caring about the type.
+
+For compatibility with behaviour of msgpack-c reference implementation:
+  - Go intX (>0) and uintX
+       IS ENCODED AS
+    msgpack +ve fixnum, unsigned
+  - Go intX (<0)
+       IS ENCODED AS
+    msgpack -ve fixnum, signed
+
+*/
+package codec
+
+import (
+	"fmt"
+	"io"
+	"math"
+	"net/rpc"
+)
+
+const (
+	mpPosFixNumMin byte = 0x00
+	mpPosFixNumMax      = 0x7f
+	mpFixMapMin         = 0x80
+	mpFixMapMax         = 0x8f
+	mpFixArrayMin       = 0x90
+	mpFixArrayMax       = 0x9f
+	mpFixStrMin         = 0xa0
+	mpFixStrMax         = 0xbf
+	mpNil               = 0xc0
+	_                   = 0xc1
+	mpFalse             = 0xc2
+	mpTrue              = 0xc3
+	mpFloat             = 0xca
+	mpDouble            = 0xcb
+	mpUint8             = 0xcc
+	mpUint16            = 0xcd
+	mpUint32            = 0xce
+	mpUint64            = 0xcf
+	mpInt8              = 0xd0
+	mpInt16             = 0xd1
+	mpInt32             = 0xd2
+	mpInt64             = 0xd3
+
+	// extensions below
+	mpBin8     = 0xc4
+	mpBin16    = 0xc5
+	mpBin32    = 0xc6
+	mpExt8     = 0xc7
+	mpExt16    = 0xc8
+	mpExt32    = 0xc9
+	mpFixExt1  = 0xd4
+	mpFixExt2  = 0xd5
+	mpFixExt4  = 0xd6
+	mpFixExt8  = 0xd7
+	mpFixExt16 = 0xd8
+
+	mpStr8  = 0xd9 // new
+	mpStr16 = 0xda
+	mpStr32 = 0xdb
+
+	mpArray16 = 0xdc
+	mpArray32 = 0xdd
+
+	mpMap16 = 0xde
+	mpMap32 = 0xdf
+
+	mpNegFixNumMin = 0xe0
+	mpNegFixNumMax = 0xff
+)
+
+// MsgpackSpecRpcMultiArgs is a special type which signifies to the MsgpackSpecRpcCodec
+// that the backend RPC service takes multiple arguments, which have been arranged
+// in sequence in the slice.
+//
+// The Codec then passes it AS-IS to the rpc service (without wrapping it in an
+// array of 1 element).
+type MsgpackSpecRpcMultiArgs []interface{}
+
+// A MsgpackContainer type specifies the different types of msgpackContainers.
+type msgpackContainerType struct {
+	fixCutoff                   int
+	bFixMin, b8, b16, b32       byte
+	hasFixMin, has8, has8Always bool
+}
+
+var (
+	msgpackContainerStr  = msgpackContainerType{32, mpFixStrMin, mpStr8, mpStr16, mpStr32, true, true, false}
+	msgpackContainerBin  = msgpackContainerType{0, 0, mpBin8, mpBin16, mpBin32, false, true, true}
+	msgpackContainerList = msgpackContainerType{16, mpFixArrayMin, 0, mpArray16, mpArray32, true, false, false}
+	msgpackContainerMap  = msgpackContainerType{16, mpFixMapMin, 0, mpMap16, mpMap32, true, false, false}
+)
+
+//---------------------------------------------
+
+type msgpackEncDriver struct {
+	w encWriter
+	h *MsgpackHandle
+}
+
+func (e *msgpackEncDriver) isBuiltinType(rt uintptr) bool {
+	//no builtin types. All encodings are based on kinds. Types supported as extensions.
+	return false
+}
+
+func (e *msgpackEncDriver) encodeBuiltin(rt uintptr, v interface{}) {}
+
+func (e *msgpackEncDriver) encodeNil() {
+	e.w.writen1(mpNil)
+}
+
+func (e *msgpackEncDriver) encodeInt(i int64) {
+
+	switch {
+	case i >= 0:
+		e.encodeUint(uint64(i))
+	case i >= -32:
+		e.w.writen1(byte(i))
+	case i >= math.MinInt8:
+		e.w.writen2(mpInt8, byte(i))
+	case i >= math.MinInt16:
+		e.w.writen1(mpInt16)
+		e.w.writeUint16(uint16(i))
+	case i >= math.MinInt32:
+		e.w.writen1(mpInt32)
+		e.w.writeUint32(uint32(i))
+	default:
+		e.w.writen1(mpInt64)
+		e.w.writeUint64(uint64(i))
+	}
+}
+
+func (e *msgpackEncDriver) encodeUint(i uint64) {
+	switch {
+	case i <= math.MaxInt8:
+		e.w.writen1(byte(i))
+	case i <= math.MaxUint8:
+		e.w.writen2(mpUint8, byte(i))
+	case i <= math.MaxUint16:
+		e.w.writen1(mpUint16)
+		e.w.writeUint16(uint16(i))
+	case i <= math.MaxUint32:
+		e.w.writen1(mpUint32)
+		e.w.writeUint32(uint32(i))
+	default:
+		e.w.writen1(mpUint64)
+		e.w.writeUint64(uint64(i))
+	}
+}
+
+func (e *msgpackEncDriver) encodeBool(b bool) {
+	if b {
+		e.w.writen1(mpTrue)
+	} else {
+		e.w.writen1(mpFalse)
+	}
+}
+
+func (e *msgpackEncDriver) encodeFloat32(f float32) {
+	e.w.writen1(mpFloat)
+	e.w.writeUint32(math.Float32bits(f))
+}
+
+func (e *msgpackEncDriver) encodeFloat64(f float64) {
+	e.w.writen1(mpDouble)
+	e.w.writeUint64(math.Float64bits(f))
+}
+
+func (e *msgpackEncDriver) encodeExtPreamble(xtag byte, l int) {
+	switch {
+	case l == 1:
+		e.w.writen2(mpFixExt1, xtag)
+	case l == 2:
+		e.w.writen2(mpFixExt2, xtag)
+	case l == 4:
+		e.w.writen2(mpFixExt4, xtag)
+	case l == 8:
+		e.w.writen2(mpFixExt8, xtag)
+	case l == 16:
+		e.w.writen2(mpFixExt16, xtag)
+	case l < 256:
+		e.w.writen2(mpExt8, byte(l))
+		e.w.writen1(xtag)
+	case l < 65536:
+		e.w.writen1(mpExt16)
+		e.w.writeUint16(uint16(l))
+		e.w.writen1(xtag)
+	default:
+		e.w.writen1(mpExt32)
+		e.w.writeUint32(uint32(l))
+		e.w.writen1(xtag)
+	}
+}
+
+func (e *msgpackEncDriver) encodeArrayPreamble(length int) {
+	e.writeContainerLen(msgpackContainerList, length)
+}
+
+func (e *msgpackEncDriver) encodeMapPreamble(length int) {
+	e.writeContainerLen(msgpackContainerMap, length)
+}
+
+func (e *msgpackEncDriver) encodeString(c charEncoding, s string) {
+	if c == c_RAW && e.h.WriteExt {
+		e.writeContainerLen(msgpackContainerBin, len(s))
+	} else {
+		e.writeContainerLen(msgpackContainerStr, len(s))
+	}
+	if len(s) > 0 {
+		e.w.writestr(s)
+	}
+}
+
+func (e *msgpackEncDriver) encodeSymbol(v string) {
+	e.encodeString(c_UTF8, v)
+}
+
+func (e *msgpackEncDriver) encodeStringBytes(c charEncoding, bs []byte) {
+	if c == c_RAW && e.h.WriteExt {
+		e.writeContainerLen(msgpackContainerBin, len(bs))
+	} else {
+		e.writeContainerLen(msgpackContainerStr, len(bs))
+	}
+	if len(bs) > 0 {
+		e.w.writeb(bs)
+	}
+}
+
+func (e *msgpackEncDriver) writeContainerLen(ct msgpackContainerType, l int) {
+	switch {
+	case ct.hasFixMin && l < ct.fixCutoff:
+		e.w.writen1(ct.bFixMin | byte(l))
+	case ct.has8 && l < 256 && (ct.has8Always || e.h.WriteExt):
+		e.w.writen2(ct.b8, uint8(l))
+	case l < 65536:
+		e.w.writen1(ct.b16)
+		e.w.writeUint16(uint16(l))
+	default:
+		e.w.writen1(ct.b32)
+		e.w.writeUint32(uint32(l))
+	}
+}
+
+//---------------------------------------------
+
+type msgpackDecDriver struct {
+	r      decReader
+	h      *MsgpackHandle
+	bd     byte
+	bdRead bool
+	bdType valueType
+}
+
+func (d *msgpackDecDriver) isBuiltinType(rt uintptr) bool {
+	//no builtin types. All encodings are based on kinds. Types supported as extensions.
+	return false
+}
+
+func (d *msgpackDecDriver) decodeBuiltin(rt uintptr, v interface{}) {}
+
+// Note: This returns either a primitive (int, bool, etc) for non-containers,
+// or a containerType, or a specific type denoting nil or extension.
+// It is called when a nil interface{} is passed, leaving it up to the DecDriver
+// to introspect the stream and decide how best to decode.
+// It deciphers the value by looking at the stream first.
+func (d *msgpackDecDriver) decodeNaked() (v interface{}, vt valueType, decodeFurther bool) {
+	d.initReadNext()
+	bd := d.bd
+
+	switch bd {
+	case mpNil:
+		vt = valueTypeNil
+		d.bdRead = false
+	case mpFalse:
+		vt = valueTypeBool
+		v = false
+	case mpTrue:
+		vt = valueTypeBool
+		v = true
+
+	case mpFloat:
+		vt = valueTypeFloat
+		v = float64(math.Float32frombits(d.r.readUint32()))
+	case mpDouble:
+		vt = valueTypeFloat
+		v = math.Float64frombits(d.r.readUint64())
+
+	case mpUint8:
+		vt = valueTypeUint
+		v = uint64(d.r.readn1())
+	case mpUint16:
+		vt = valueTypeUint
+		v = uint64(d.r.readUint16())
+	case mpUint32:
+		vt = valueTypeUint
+		v = uint64(d.r.readUint32())
+	case mpUint64:
+		vt = valueTypeUint
+		v = uint64(d.r.readUint64())
+
+	case mpInt8:
+		vt = valueTypeInt
+		v = int64(int8(d.r.readn1()))
+	case mpInt16:
+		vt = valueTypeInt
+		v = int64(int16(d.r.readUint16()))
+	case mpInt32:
+		vt = valueTypeInt
+		v = int64(int32(d.r.readUint32()))
+	case mpInt64:
+		vt = valueTypeInt
+		v = int64(int64(d.r.readUint64()))
+
+	default:
+		switch {
+		case bd >= mpPosFixNumMin && bd <= mpPosFixNumMax:
+			// positive fixnum (always signed)
+			vt = valueTypeInt
+			v = int64(int8(bd))
+		case bd >= mpNegFixNumMin && bd <= mpNegFixNumMax:
+			// negative fixnum
+			vt = valueTypeInt
+			v = int64(int8(bd))
+		case bd == mpStr8, bd == mpStr16, bd == mpStr32, bd >= mpFixStrMin && bd <= mpFixStrMax:
+			if d.h.RawToString {
+				var rvm string
+				vt = valueTypeString
+				v = &rvm
+			} else {
+				var rvm = []byte{}
+				vt = valueTypeBytes
+				v = &rvm
+			}
+			decodeFurther = true
+		case bd == mpBin8, bd == mpBin16, bd == mpBin32:
+			var rvm = []byte{}
+			vt = valueTypeBytes
+			v = &rvm
+			decodeFurther = true
+		case bd == mpArray16, bd == mpArray32, bd >= mpFixArrayMin && bd <= mpFixArrayMax:
+			vt = valueTypeArray
+			decodeFurther = true
+		case bd == mpMap16, bd == mpMap32, bd >= mpFixMapMin && bd <= mpFixMapMax:
+			vt = valueTypeMap
+			decodeFurther = true
+		case bd >= mpFixExt1 && bd <= mpFixExt16, bd >= mpExt8 && bd <= mpExt32:
+			clen := d.readExtLen()
+			var re RawExt
+			re.Tag = d.r.readn1()
+			re.Data = d.r.readn(clen)
+			v = &re
+			vt = valueTypeExt
+		default:
+			decErr("Nil-Deciphered DecodeValue: %s: hex: %x, dec: %d", msgBadDesc, bd, bd)
+		}
+	}
+	if !decodeFurther {
+		d.bdRead = false
+	}
+	return
+}
+
+// int can be decoded from msgpack type: intXXX or uintXXX
+func (d *msgpackDecDriver) decodeInt(bitsize uint8) (i int64) {
+	switch d.bd {
+	case mpUint8:
+		i = int64(uint64(d.r.readn1()))
+	case mpUint16:
+		i = int64(uint64(d.r.readUint16()))
+	case mpUint32:
+		i = int64(uint64(d.r.readUint32()))
+	case mpUint64:
+		i = int64(d.r.readUint64())
+	case mpInt8:
+		i = int64(int8(d.r.readn1()))
+	case mpInt16:
+		i = int64(int16(d.r.readUint16()))
+	case mpInt32:
+		i = int64(int32(d.r.readUint32()))
+	case mpInt64:
+		i = int64(d.r.readUint64())
+	default:
+		switch {
+		case d.bd >= mpPosFixNumMin && d.bd <= mpPosFixNumMax:
+			i = int64(int8(d.bd))
+		case d.bd >= mpNegFixNumMin && d.bd <= mpNegFixNumMax:
+			i = int64(int8(d.bd))
+		default:
+			decErr("Unhandled single-byte unsigned integer value: %s: %x", msgBadDesc, d.bd)
+		}
+	}
+	// check overflow (logic adapted from std pkg reflect/value.go OverflowUint()
+	if bitsize > 0 {
+		if trunc := (i << (64 - bitsize)) >> (64 - bitsize); i != trunc {
+			decErr("Overflow int value: %v", i)
+		}
+	}
+	d.bdRead = false
+	return
+}
+
+// uint can be decoded from msgpack type: intXXX or uintXXX
+func (d *msgpackDecDriver) decodeUint(bitsize uint8) (ui uint64) {
+	switch d.bd {
+	case mpUint8:
+		ui = uint64(d.r.readn1())
+	case mpUint16:
+		ui = uint64(d.r.readUint16())
+	case mpUint32:
+		ui = uint64(d.r.readUint32())
+	case mpUint64:
+		ui = d.r.readUint64()
+	case mpInt8:
+		if i := int64(int8(d.r.readn1())); i >= 0 {
+			ui = uint64(i)
+		} else {
+			decErr("Assigning negative signed value: %v, to unsigned type", i)
+		}
+	case mpInt16:
+		if i := int64(int16(d.r.readUint16())); i >= 0 {
+			ui = uint64(i)
+		} else {
+			decErr("Assigning negative signed value: %v, to unsigned type", i)
+		}
+	case mpInt32:
+		if i := int64(int32(d.r.readUint32())); i >= 0 {
+			ui = uint64(i)
+		} else {
+			decErr("Assigning negative signed value: %v, to unsigned type", i)
+		}
+	case mpInt64:
+		if i := int64(d.r.readUint64()); i >= 0 {
+			ui = uint64(i)
+		} else {
+			decErr("Assigning negative signed value: %v, to unsigned type", i)
+		}
+	default:
+		switch {
+		case d.bd >= mpPosFixNumMin && d.bd <= mpPosFixNumMax:
+			ui = uint64(d.bd)
+		case d.bd >= mpNegFixNumMin && d.bd <= mpNegFixNumMax:
+			decErr("Assigning negative signed value: %v, to unsigned type", int(d.bd))
+		default:
+			decErr("Unhandled single-byte unsigned integer value: %s: %x", msgBadDesc, d.bd)
+		}
+	}
+	// check overflow (logic adapted from std pkg reflect/value.go OverflowUint()
+	if bitsize > 0 {
+		if trunc := (ui << (64 - bitsize)) >> (64 - bitsize); ui != trunc {
+			decErr("Overflow uint value: %v", ui)
+		}
+	}
+	d.bdRead = false
+	return
+}
+
+// float can either be decoded from msgpack type: float, double or intX
+func (d *msgpackDecDriver) decodeFloat(chkOverflow32 bool) (f float64) {
+	switch d.bd {
+	case mpFloat:
+		f = float64(math.Float32frombits(d.r.readUint32()))
+	case mpDouble:
+		f = math.Float64frombits(d.r.readUint64())
+	default:
+		f = float64(d.decodeInt(0))
+	}
+	checkOverflowFloat32(f, chkOverflow32)
+	d.bdRead = false
+	return
+}
+
+// bool can be decoded from bool, fixnum 0 or 1.
+func (d *msgpackDecDriver) decodeBool() (b bool) {
+	switch d.bd {
+	case mpFalse, 0:
+		// b = false
+	case mpTrue, 1:
+		b = true
+	default:
+		decErr("Invalid single-byte value for bool: %s: %x", msgBadDesc, d.bd)
+	}
+	d.bdRead = false
+	return
+}
+
+func (d *msgpackDecDriver) decodeString() (s string) {
+	clen := d.readContainerLen(msgpackContainerStr)
+	if clen > 0 {
+		s = string(d.r.readn(clen))
+	}
+	d.bdRead = false
+	return
+}
+
+// Callers must check if changed=true (to decide whether to replace the one they have)
+func (d *msgpackDecDriver) decodeBytes(bs []byte) (bsOut []byte, changed bool) {
+	// bytes can be decoded from msgpackContainerStr or msgpackContainerBin
+	var clen int
+	switch d.bd {
+	case mpBin8, mpBin16, mpBin32:
+		clen = d.readContainerLen(msgpackContainerBin)
+	default:
+		clen = d.readContainerLen(msgpackContainerStr)
+	}
+	// if clen < 0 {
+	// 	changed = true
+	// 	panic("length cannot be zero. this cannot be nil.")
+	// }
+	if clen > 0 {
+		// if no contents in stream, don't update the passed byteslice
+		if len(bs) != clen {
+			// Return changed=true if length of passed slice diff from length of bytes in stream
+			if len(bs) > clen {
+				bs = bs[:clen]
+			} else {
+				bs = make([]byte, clen)
+			}
+			bsOut = bs
+			changed = true
+		}
+		d.r.readb(bs)
+	}
+	d.bdRead = false
+	return
+}
+
+// Every top-level decode funcs (i.e. decodeValue, decode) must call this first.
+func (d *msgpackDecDriver) initReadNext() {
+	if d.bdRead {
+		return
+	}
+	d.bd = d.r.readn1()
+	d.bdRead = true
+	d.bdType = valueTypeUnset
+}
+
+func (d *msgpackDecDriver) currentEncodedType() valueType {
+	if d.bdType == valueTypeUnset {
+		bd := d.bd
+		switch bd {
+		case mpNil:
+			d.bdType = valueTypeNil
+		case mpFalse, mpTrue:
+			d.bdType = valueTypeBool
+		case mpFloat, mpDouble:
+			d.bdType = valueTypeFloat
+		case mpUint8, mpUint16, mpUint32, mpUint64:
+			d.bdType = valueTypeUint
+		case mpInt8, mpInt16, mpInt32, mpInt64:
+			d.bdType = valueTypeInt
+		default:
+			switch {
+			case bd >= mpPosFixNumMin && bd <= mpPosFixNumMax:
+				d.bdType = valueTypeInt
+			case bd >= mpNegFixNumMin && bd <= mpNegFixNumMax:
+				d.bdType = valueTypeInt
+			case bd == mpStr8, bd == mpStr16, bd == mpStr32, bd >= mpFixStrMin && bd <= mpFixStrMax:
+				if d.h.RawToString {
+					d.bdType = valueTypeString
+				} else {
+					d.bdType = valueTypeBytes
+				}
+			case bd == mpBin8, bd == mpBin16, bd == mpBin32:
+				d.bdType = valueTypeBytes
+			case bd == mpArray16, bd == mpArray32, bd >= mpFixArrayMin && bd <= mpFixArrayMax:
+				d.bdType = valueTypeArray
+			case bd == mpMap16, bd == mpMap32, bd >= mpFixMapMin && bd <= mpFixMapMax:
+				d.bdType = valueTypeMap
+			case bd >= mpFixExt1 && bd <= mpFixExt16, bd >= mpExt8 && bd <= mpExt32:
+				d.bdType = valueTypeExt
+			default:
+				decErr("currentEncodedType: Undeciphered descriptor: %s: hex: %x, dec: %d", msgBadDesc, bd, bd)
+			}
+		}
+	}
+	return d.bdType
+}
+
+func (d *msgpackDecDriver) tryDecodeAsNil() bool {
+	if d.bd == mpNil {
+		d.bdRead = false
+		return true
+	}
+	return false
+}
+
+func (d *msgpackDecDriver) readContainerLen(ct msgpackContainerType) (clen int) {
+	bd := d.bd
+	switch {
+	case bd == mpNil:
+		clen = -1 // to represent nil
+	case bd == ct.b8:
+		clen = int(d.r.readn1())
+	case bd == ct.b16:
+		clen = int(d.r.readUint16())
+	case bd == ct.b32:
+		clen = int(d.r.readUint32())
+	case (ct.bFixMin & bd) == ct.bFixMin:
+		clen = int(ct.bFixMin ^ bd)
+	default:
+		decErr("readContainerLen: %s: hex: %x, dec: %d", msgBadDesc, bd, bd)
+	}
+	d.bdRead = false
+	return
+}
+
+func (d *msgpackDecDriver) readMapLen() int {
+	return d.readContainerLen(msgpackContainerMap)
+}
+
+func (d *msgpackDecDriver) readArrayLen() int {
+	return d.readContainerLen(msgpackContainerList)
+}
+
+func (d *msgpackDecDriver) readExtLen() (clen int) {
+	switch d.bd {
+	case mpNil:
+		clen = -1 // to represent nil
+	case mpFixExt1:
+		clen = 1
+	case mpFixExt2:
+		clen = 2
+	case mpFixExt4:
+		clen = 4
+	case mpFixExt8:
+		clen = 8
+	case mpFixExt16:
+		clen = 16
+	case mpExt8:
+		clen = int(d.r.readn1())
+	case mpExt16:
+		clen = int(d.r.readUint16())
+	case mpExt32:
+		clen = int(d.r.readUint32())
+	default:
+		decErr("decoding ext bytes: found unexpected byte: %x", d.bd)
+	}
+	return
+}
+
+func (d *msgpackDecDriver) decodeExt(verifyTag bool, tag byte) (xtag byte, xbs []byte) {
+	xbd := d.bd
+	switch {
+	case xbd == mpBin8, xbd == mpBin16, xbd == mpBin32:
+		xbs, _ = d.decodeBytes(nil)
+	case xbd == mpStr8, xbd == mpStr16, xbd == mpStr32,
+		xbd >= mpFixStrMin && xbd <= mpFixStrMax:
+		xbs = []byte(d.decodeString())
+	default:
+		clen := d.readExtLen()
+		xtag = d.r.readn1()
+		if verifyTag && xtag != tag {
+			decErr("Wrong extension tag. Got %b. Expecting: %v", xtag, tag)
+		}
+		xbs = d.r.readn(clen)
+	}
+	d.bdRead = false
+	return
+}
+
+//--------------------------------------------------
+
+//MsgpackHandle is a Handle for the Msgpack Schema-Free Encoding Format.
+type MsgpackHandle struct {
+	BasicHandle
+
+	// RawToString controls how raw bytes are decoded into a nil interface{}.
+	RawToString bool
+	// WriteExt flag supports encoding configured extensions with extension tags.
+	// It also controls whether other elements of the new spec are encoded (ie Str8).
+	//
+	// With WriteExt=false, configured extensions are serialized as raw bytes
+	// and Str8 is not encoded.
+	//
+	// A stream can still be decoded into a typed value, provided an appropriate value
+	// is provided, but the type cannot be inferred from the stream. If no appropriate
+	// type is provided (e.g. decoding into a nil interface{}), you get back
+	// a []byte or string based on the setting of RawToString.
+	WriteExt bool
+}
+
+func (h *MsgpackHandle) newEncDriver(w encWriter) encDriver {
+	return &msgpackEncDriver{w: w, h: h}
+}
+
+func (h *MsgpackHandle) newDecDriver(r decReader) decDriver {
+	return &msgpackDecDriver{r: r, h: h}
+}
+
+func (h *MsgpackHandle) writeExt() bool {
+	return h.WriteExt
+}
+
+func (h *MsgpackHandle) getBasicHandle() *BasicHandle {
+	return &h.BasicHandle
+}
+
+//--------------------------------------------------
+
+type msgpackSpecRpcCodec struct {
+	rpcCodec
+}
+
+// /////////////// Spec RPC Codec ///////////////////
+func (c *msgpackSpecRpcCodec) WriteRequest(r *rpc.Request, body interface{}) error {
+	// WriteRequest can write to both a Go service, and other services that do
+	// not abide by the 1 argument rule of a Go service.
+	// We discriminate based on if the body is a MsgpackSpecRpcMultiArgs
+	var bodyArr []interface{}
+	if m, ok := body.(MsgpackSpecRpcMultiArgs); ok {
+		bodyArr = ([]interface{})(m)
+	} else {
+		bodyArr = []interface{}{body}
+	}
+	r2 := []interface{}{0, uint32(r.Seq), r.ServiceMethod, bodyArr}
+	return c.write(r2, nil, false, true)
+}
+
+func (c *msgpackSpecRpcCodec) WriteResponse(r *rpc.Response, body interface{}) error {
+	var moe interface{}
+	if r.Error != "" {
+		moe = r.Error
+	}
+	if moe != nil && body != nil {
+		body = nil
+	}
+	r2 := []interface{}{1, uint32(r.Seq), moe, body}
+	return c.write(r2, nil, false, true)
+}
+
+func (c *msgpackSpecRpcCodec) ReadResponseHeader(r *rpc.Response) error {
+	return c.parseCustomHeader(1, &r.Seq, &r.Error)
+}
+
+func (c *msgpackSpecRpcCodec) ReadRequestHeader(r *rpc.Request) error {
+	return c.parseCustomHeader(0, &r.Seq, &r.ServiceMethod)
+}
+
+func (c *msgpackSpecRpcCodec) ReadRequestBody(body interface{}) error {
+	if body == nil { // read and discard
+		return c.read(nil)
+	}
+	bodyArr := []interface{}{body}
+	return c.read(&bodyArr)
+}
+
+func (c *msgpackSpecRpcCodec) parseCustomHeader(expectTypeByte byte, msgid *uint64, methodOrError *string) (err error) {
+
+	if c.cls {
+		return io.EOF
+	}
+
+	// We read the response header by hand
+	// so that the body can be decoded on its own from the stream at a later time.
+
+	const fia byte = 0x94 //four item array descriptor value
+	// Not sure why the panic of EOF is swallowed above.
+	// if bs1 := c.dec.r.readn1(); bs1 != fia {
+	// 	err = fmt.Errorf("Unexpected value for array descriptor: Expecting %v. Received %v", fia, bs1)
+	// 	return
+	// }
+	var b byte
+	b, err = c.br.ReadByte()
+	if err != nil {
+		return
+	}
+	if b != fia {
+		err = fmt.Errorf("Unexpected value for array descriptor: Expecting %v. Received %v", fia, b)
+		return
+	}
+
+	if err = c.read(&b); err != nil {
+		return
+	}
+	if b != expectTypeByte {
+		err = fmt.Errorf("Unexpected byte descriptor in header. Expecting %v. Received %v", expectTypeByte, b)
+		return
+	}
+	if err = c.read(msgid); err != nil {
+		return
+	}
+	if err = c.read(methodOrError); err != nil {
+		return
+	}
+	return
+}
+
+//--------------------------------------------------
+
+// msgpackSpecRpc is the implementation of Rpc that uses custom communication protocol
+// as defined in the msgpack spec at https://github.com/msgpack-rpc/msgpack-rpc/blob/master/spec.md
+type msgpackSpecRpc struct{}
+
+// MsgpackSpecRpc implements Rpc using the communication protocol defined in
+// the msgpack spec at https://github.com/msgpack-rpc/msgpack-rpc/blob/master/spec.md .
+// Its methods (ServerCodec and ClientCodec) return values that implement RpcCodecBuffered.
+var MsgpackSpecRpc msgpackSpecRpc
+
+func (x msgpackSpecRpc) ServerCodec(conn io.ReadWriteCloser, h Handle) rpc.ServerCodec {
+	return &msgpackSpecRpcCodec{newRPCCodec(conn, h)}
+}
+
+func (x msgpackSpecRpc) ClientCodec(conn io.ReadWriteCloser, h Handle) rpc.ClientCodec {
+	return &msgpackSpecRpcCodec{newRPCCodec(conn, h)}
+}
+
+var _ decDriver = (*msgpackDecDriver)(nil)
+var _ encDriver = (*msgpackEncDriver)(nil)
diff --git a/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/msgpack_test.py b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/msgpack_test.py
new file mode 100644
index 00000000000..e933838c56a
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/msgpack_test.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python
+
+# This will create golden files in a directory passed to it.
+# A Test calls this internally to create the golden files
+# So it can process them (so we don't have to checkin the files).
+
+import msgpack, msgpackrpc, sys, os, threading
+
+def get_test_data_list():
+    # get list with all primitive types, and a combo type
+    l0 = [ 
+        -8,
+         -1616,
+         -32323232,
+         -6464646464646464,
+         192,
+         1616,
+         32323232,
+         6464646464646464,
+         192,
+         -3232.0,
+         -6464646464.0,
+         3232.0,
+         6464646464.0,
+         False,
+         True,
+         None,
+         "someday",
+         "",
+         "bytestring",
+         1328176922000002000,
+         -2206187877999998000,
+         0,
+         -6795364578871345152
+         ]
+    l1 = [
+        { "true": True,
+          "false": False },
+        { "true": "True",
+          "false": False,
+          "uint16(1616)": 1616 },
+        { "list": [1616, 32323232, True, -3232.0, {"TRUE":True, "FALSE":False}, [True, False] ],
+          "int32":32323232, "bool": True, 
+          "LONG STRING": "123456789012345678901234567890123456789012345678901234567890",
+          "SHORT STRING": "1234567890" },	
+        { True: "true", 8: False, "false": 0 }
+        ]
+    
+    l = []
+    l.extend(l0)
+    l.append(l0)
+    l.extend(l1)
+    return l
+
+def build_test_data(destdir):
+    l = get_test_data_list()
+    for i in range(len(l)):
+        packer = msgpack.Packer()
+        serialized = packer.pack(l[i])
+        f = open(os.path.join(destdir, str(i) + '.golden'), 'wb')
+        f.write(serialized)
+        f.close()
+
+def doRpcServer(port, stopTimeSec):
+    class EchoHandler(object):
+        def Echo123(self, msg1, msg2, msg3):
+            return ("1:%s 2:%s 3:%s" % (msg1, msg2, msg3))
+        def EchoStruct(self, msg):
+            return ("%s" % msg)
+    
+    addr = msgpackrpc.Address('localhost', port)
+    server = msgpackrpc.Server(EchoHandler())
+    server.listen(addr)
+    # run thread to stop it after stopTimeSec seconds if > 0
+    if stopTimeSec > 0:
+        def myStopRpcServer():
+            server.stop()
+        t = threading.Timer(stopTimeSec, myStopRpcServer)
+        t.start()
+    server.start()
+
+def doRpcClientToPythonSvc(port):
+    address = msgpackrpc.Address('localhost', port)
+    client = msgpackrpc.Client(address, unpack_encoding='utf-8')
+    print client.call("Echo123", "A1", "B2", "C3")
+    print client.call("EchoStruct", {"A" :"Aa", "B":"Bb", "C":"Cc"})
+   
+def doRpcClientToGoSvc(port):
+    # print ">>>> port: ", port, " <<<<<"
+    address = msgpackrpc.Address('localhost', port)
+    client = msgpackrpc.Client(address, unpack_encoding='utf-8')
+    print client.call("TestRpcInt.Echo123", ["A1", "B2", "C3"])
+    print client.call("TestRpcInt.EchoStruct", {"A" :"Aa", "B":"Bb", "C":"Cc"})
+
+def doMain(args):
+    if len(args) == 2 and args[0] == "testdata":
+        build_test_data(args[1])
+    elif len(args) == 3 and args[0] == "rpc-server":
+        doRpcServer(int(args[1]), int(args[2]))
+    elif len(args) == 2 and args[0] == "rpc-client-python-service":
+        doRpcClientToPythonSvc(int(args[1]))
+    elif len(args) == 2 and args[0] == "rpc-client-go-service":
+        doRpcClientToGoSvc(int(args[1]))
+    else:
+        print("Usage: msgpack_test.py " + 
+              "[testdata|rpc-server|rpc-client-python-service|rpc-client-go-service] ...")
+    
+if __name__ == "__main__":
+    doMain(sys.argv[1:])
+
diff --git a/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/rpc.go b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/rpc.go
new file mode 100644
index 00000000000..d014dbdcc7d
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/rpc.go
@@ -0,0 +1,152 @@
+// Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
+// Use of this source code is governed by a BSD-style license found in the LICENSE file.
+
+package codec
+
+import (
+	"bufio"
+	"io"
+	"net/rpc"
+	"sync"
+)
+
+// Rpc provides a rpc Server or Client Codec for rpc communication.
+type Rpc interface {
+	ServerCodec(conn io.ReadWriteCloser, h Handle) rpc.ServerCodec
+	ClientCodec(conn io.ReadWriteCloser, h Handle) rpc.ClientCodec
+}
+
+// RpcCodecBuffered allows access to the underlying bufio.Reader/Writer
+// used by the rpc connection. It accomodates use-cases where the connection
+// should be used by rpc and non-rpc functions, e.g. streaming a file after
+// sending an rpc response.
+type RpcCodecBuffered interface {
+	BufferedReader() *bufio.Reader
+	BufferedWriter() *bufio.Writer
+}
+
+// -------------------------------------
+
+// rpcCodec defines the struct members and common methods.
+type rpcCodec struct {
+	rwc io.ReadWriteCloser
+	dec *Decoder
+	enc *Encoder
+	bw  *bufio.Writer
+	br  *bufio.Reader
+	mu  sync.Mutex
+	cls bool
+}
+
+func newRPCCodec(conn io.ReadWriteCloser, h Handle) rpcCodec {
+	bw := bufio.NewWriter(conn)
+	br := bufio.NewReader(conn)
+	return rpcCodec{
+		rwc: conn,
+		bw:  bw,
+		br:  br,
+		enc: NewEncoder(bw, h),
+		dec: NewDecoder(br, h),
+	}
+}
+
+func (c *rpcCodec) BufferedReader() *bufio.Reader {
+	return c.br
+}
+
+func (c *rpcCodec) BufferedWriter() *bufio.Writer {
+	return c.bw
+}
+
+func (c *rpcCodec) write(obj1, obj2 interface{}, writeObj2, doFlush bool) (err error) {
+	if c.cls {
+		return io.EOF
+	}
+	if err = c.enc.Encode(obj1); err != nil {
+		return
+	}
+	if writeObj2 {
+		if err = c.enc.Encode(obj2); err != nil {
+			return
+		}
+	}
+	if doFlush && c.bw != nil {
+		return c.bw.Flush()
+	}
+	return
+}
+
+func (c *rpcCodec) read(obj interface{}) (err error) {
+	if c.cls {
+		return io.EOF
+	}
+	//If nil is passed in, we should still attempt to read content to nowhere.
+	if obj == nil {
+		var obj2 interface{}
+		return c.dec.Decode(&obj2)
+	}
+	return c.dec.Decode(obj)
+}
+
+func (c *rpcCodec) Close() error {
+	if c.cls {
+		return io.EOF
+	}
+	c.cls = true
+	return c.rwc.Close()
+}
+
+func (c *rpcCodec) ReadResponseBody(body interface{}) error {
+	return c.read(body)
+}
+
+// -------------------------------------
+
+type goRpcCodec struct {
+	rpcCodec
+}
+
+func (c *goRpcCodec) WriteRequest(r *rpc.Request, body interface{}) error {
+	// Must protect for concurrent access as per API
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	return c.write(r, body, true, true)
+}
+
+func (c *goRpcCodec) WriteResponse(r *rpc.Response, body interface{}) error {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	return c.write(r, body, true, true)
+}
+
+func (c *goRpcCodec) ReadResponseHeader(r *rpc.Response) error {
+	return c.read(r)
+}
+
+func (c *goRpcCodec) ReadRequestHeader(r *rpc.Request) error {
+	return c.read(r)
+}
+
+func (c *goRpcCodec) ReadRequestBody(body interface{}) error {
+	return c.read(body)
+}
+
+// -------------------------------------
+
+// goRpc is the implementation of Rpc that uses the communication protocol
+// as defined in net/rpc package.
+type goRpc struct{}
+
+// GoRpc implements Rpc using the communication protocol defined in net/rpc package.
+// Its methods (ServerCodec and ClientCodec) return values that implement RpcCodecBuffered.
+var GoRpc goRpc
+
+func (x goRpc) ServerCodec(conn io.ReadWriteCloser, h Handle) rpc.ServerCodec {
+	return &goRpcCodec{newRPCCodec(conn, h)}
+}
+
+func (x goRpc) ClientCodec(conn io.ReadWriteCloser, h Handle) rpc.ClientCodec {
+	return &goRpcCodec{newRPCCodec(conn, h)}
+}
+
+var _ RpcCodecBuffered = (*rpcCodec)(nil) // ensure *rpcCodec implements RpcCodecBuffered
diff --git a/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/simple.go b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/simple.go
new file mode 100644
index 00000000000..9e4d148a2a1
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/simple.go
@@ -0,0 +1,461 @@
+// Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
+// Use of this source code is governed by a BSD-style license found in the LICENSE file.
+
+package codec
+
+import "math"
+
+const (
+	_               uint8 = iota
+	simpleVdNil           = 1
+	simpleVdFalse         = 2
+	simpleVdTrue          = 3
+	simpleVdFloat32       = 4
+	simpleVdFloat64       = 5
+
+	// each lasts for 4 (ie n, n+1, n+2, n+3)
+	simpleVdPosInt = 8
+	simpleVdNegInt = 12
+
+	// containers: each lasts for 4 (ie n, n+1, n+2, ... n+7)
+	simpleVdString    = 216
+	simpleVdByteArray = 224
+	simpleVdArray     = 232
+	simpleVdMap       = 240
+	simpleVdExt       = 248
+)
+
+type simpleEncDriver struct {
+	h *SimpleHandle
+	w encWriter
+	//b [8]byte
+}
+
+func (e *simpleEncDriver) isBuiltinType(rt uintptr) bool {
+	return false
+}
+
+func (e *simpleEncDriver) encodeBuiltin(rt uintptr, v interface{}) {
+}
+
+func (e *simpleEncDriver) encodeNil() {
+	e.w.writen1(simpleVdNil)
+}
+
+func (e *simpleEncDriver) encodeBool(b bool) {
+	if b {
+		e.w.writen1(simpleVdTrue)
+	} else {
+		e.w.writen1(simpleVdFalse)
+	}
+}
+
+func (e *simpleEncDriver) encodeFloat32(f float32) {
+	e.w.writen1(simpleVdFloat32)
+	e.w.writeUint32(math.Float32bits(f))
+}
+
+func (e *simpleEncDriver) encodeFloat64(f float64) {
+	e.w.writen1(simpleVdFloat64)
+	e.w.writeUint64(math.Float64bits(f))
+}
+
+func (e *simpleEncDriver) encodeInt(v int64) {
+	if v < 0 {
+		e.encUint(uint64(-v), simpleVdNegInt)
+	} else {
+		e.encUint(uint64(v), simpleVdPosInt)
+	}
+}
+
+func (e *simpleEncDriver) encodeUint(v uint64) {
+	e.encUint(v, simpleVdPosInt)
+}
+
+func (e *simpleEncDriver) encUint(v uint64, bd uint8) {
+	switch {
+	case v <= math.MaxUint8:
+		e.w.writen2(bd, uint8(v))
+	case v <= math.MaxUint16:
+		e.w.writen1(bd + 1)
+		e.w.writeUint16(uint16(v))
+	case v <= math.MaxUint32:
+		e.w.writen1(bd + 2)
+		e.w.writeUint32(uint32(v))
+	case v <= math.MaxUint64:
+		e.w.writen1(bd + 3)
+		e.w.writeUint64(v)
+	}
+}
+
+func (e *simpleEncDriver) encLen(bd byte, length int) {
+	switch {
+	case length == 0:
+		e.w.writen1(bd)
+	case length <= math.MaxUint8:
+		e.w.writen1(bd + 1)
+		e.w.writen1(uint8(length))
+	case length <= math.MaxUint16:
+		e.w.writen1(bd + 2)
+		e.w.writeUint16(uint16(length))
+	case int64(length) <= math.MaxUint32:
+		e.w.writen1(bd + 3)
+		e.w.writeUint32(uint32(length))
+	default:
+		e.w.writen1(bd + 4)
+		e.w.writeUint64(uint64(length))
+	}
+}
+
+func (e *simpleEncDriver) encodeExtPreamble(xtag byte, length int) {
+	e.encLen(simpleVdExt, length)
+	e.w.writen1(xtag)
+}
+
+func (e *simpleEncDriver) encodeArrayPreamble(length int) {
+	e.encLen(simpleVdArray, length)
+}
+
+func (e *simpleEncDriver) encodeMapPreamble(length int) {
+	e.encLen(simpleVdMap, length)
+}
+
+func (e *simpleEncDriver) encodeString(c charEncoding, v string) {
+	e.encLen(simpleVdString, len(v))
+	e.w.writestr(v)
+}
+
+func (e *simpleEncDriver) encodeSymbol(v string) {
+	e.encodeString(c_UTF8, v)
+}
+
+func (e *simpleEncDriver) encodeStringBytes(c charEncoding, v []byte) {
+	e.encLen(simpleVdByteArray, len(v))
+	e.w.writeb(v)
+}
+
+//------------------------------------
+
+type simpleDecDriver struct {
+	h      *SimpleHandle
+	r      decReader
+	bdRead bool
+	bdType valueType
+	bd     byte
+	//b      [8]byte
+}
+
+func (d *simpleDecDriver) initReadNext() {
+	if d.bdRead {
+		return
+	}
+	d.bd = d.r.readn1()
+	d.bdRead = true
+	d.bdType = valueTypeUnset
+}
+
+func (d *simpleDecDriver) currentEncodedType() valueType {
+	if d.bdType == valueTypeUnset {
+		switch d.bd {
+		case simpleVdNil:
+			d.bdType = valueTypeNil
+		case simpleVdTrue, simpleVdFalse:
+			d.bdType = valueTypeBool
+		case simpleVdPosInt, simpleVdPosInt + 1, simpleVdPosInt + 2, simpleVdPosInt + 3:
+			d.bdType = valueTypeUint
+		case simpleVdNegInt, simpleVdNegInt + 1, simpleVdNegInt + 2, simpleVdNegInt + 3:
+			d.bdType = valueTypeInt
+		case simpleVdFloat32, simpleVdFloat64:
+			d.bdType = valueTypeFloat
+		case simpleVdString, simpleVdString + 1, simpleVdString + 2, simpleVdString + 3, simpleVdString + 4:
+			d.bdType = valueTypeString
+		case simpleVdByteArray, simpleVdByteArray + 1, simpleVdByteArray + 2, simpleVdByteArray + 3, simpleVdByteArray + 4:
+			d.bdType = valueTypeBytes
+		case simpleVdExt, simpleVdExt + 1, simpleVdExt + 2, simpleVdExt + 3, simpleVdExt + 4:
+			d.bdType = valueTypeExt
+		case simpleVdArray, simpleVdArray + 1, simpleVdArray + 2, simpleVdArray + 3, simpleVdArray + 4:
+			d.bdType = valueTypeArray
+		case simpleVdMap, simpleVdMap + 1, simpleVdMap + 2, simpleVdMap + 3, simpleVdMap + 4:
+			d.bdType = valueTypeMap
+		default:
+			decErr("currentEncodedType: Unrecognized d.vd: 0x%x", d.bd)
+		}
+	}
+	return d.bdType
+}
+
+func (d *simpleDecDriver) tryDecodeAsNil() bool {
+	if d.bd == simpleVdNil {
+		d.bdRead = false
+		return true
+	}
+	return false
+}
+
+func (d *simpleDecDriver) isBuiltinType(rt uintptr) bool {
+	return false
+}
+
+func (d *simpleDecDriver) decodeBuiltin(rt uintptr, v interface{}) {
+}
+
+func (d *simpleDecDriver) decIntAny() (ui uint64, i int64, neg bool) {
+	switch d.bd {
+	case simpleVdPosInt:
+		ui = uint64(d.r.readn1())
+		i = int64(ui)
+	case simpleVdPosInt + 1:
+		ui = uint64(d.r.readUint16())
+		i = int64(ui)
+	case simpleVdPosInt + 2:
+		ui = uint64(d.r.readUint32())
+		i = int64(ui)
+	case simpleVdPosInt + 3:
+		ui = uint64(d.r.readUint64())
+		i = int64(ui)
+	case simpleVdNegInt:
+		ui = uint64(d.r.readn1())
+		i = -(int64(ui))
+		neg = true
+	case simpleVdNegInt + 1:
+		ui = uint64(d.r.readUint16())
+		i = -(int64(ui))
+		neg = true
+	case simpleVdNegInt + 2:
+		ui = uint64(d.r.readUint32())
+		i = -(int64(ui))
+		neg = true
+	case simpleVdNegInt + 3:
+		ui = uint64(d.r.readUint64())
+		i = -(int64(ui))
+		neg = true
+	default:
+		decErr("decIntAny: Integer only valid from pos/neg integer1..8. Invalid descriptor: %v", d.bd)
+	}
+	// don't do this check, because callers may only want the unsigned value.
+	// if ui > math.MaxInt64 {
+	// 	decErr("decIntAny: Integer out of range for signed int64: %v", ui)
+	// }
+	return
+}
+
+func (d *simpleDecDriver) decodeInt(bitsize uint8) (i int64) {
+	_, i, _ = d.decIntAny()
+	checkOverflow(0, i, bitsize)
+	d.bdRead = false
+	return
+}
+
+func (d *simpleDecDriver) decodeUint(bitsize uint8) (ui uint64) {
+	ui, i, neg := d.decIntAny()
+	if neg {
+		decErr("Assigning negative signed value: %v, to unsigned type", i)
+	}
+	checkOverflow(ui, 0, bitsize)
+	d.bdRead = false
+	return
+}
+
+func (d *simpleDecDriver) decodeFloat(chkOverflow32 bool) (f float64) {
+	switch d.bd {
+	case simpleVdFloat32:
+		f = float64(math.Float32frombits(d.r.readUint32()))
+	case simpleVdFloat64:
+		f = math.Float64frombits(d.r.readUint64())
+	default:
+		if d.bd >= simpleVdPosInt && d.bd <= simpleVdNegInt+3 {
+			_, i, _ := d.decIntAny()
+			f = float64(i)
+		} else {
+			decErr("Float only valid from float32/64: Invalid descriptor: %v", d.bd)
+		}
+	}
+	checkOverflowFloat32(f, chkOverflow32)
+	d.bdRead = false
+	return
+}
+
+// bool can be decoded from bool only (single byte).
+func (d *simpleDecDriver) decodeBool() (b bool) {
+	switch d.bd {
+	case simpleVdTrue:
+		b = true
+	case simpleVdFalse:
+	default:
+		decErr("Invalid single-byte value for bool: %s: %x", msgBadDesc, d.bd)
+	}
+	d.bdRead = false
+	return
+}
+
+func (d *simpleDecDriver) readMapLen() (length int) {
+	d.bdRead = false
+	return d.decLen()
+}
+
+func (d *simpleDecDriver) readArrayLen() (length int) {
+	d.bdRead = false
+	return d.decLen()
+}
+
+func (d *simpleDecDriver) decLen() int {
+	switch d.bd % 8 {
+	case 0:
+		return 0
+	case 1:
+		return int(d.r.readn1())
+	case 2:
+		return int(d.r.readUint16())
+	case 3:
+		ui := uint64(d.r.readUint32())
+		checkOverflow(ui, 0, intBitsize)
+		return int(ui)
+	case 4:
+		ui := d.r.readUint64()
+		checkOverflow(ui, 0, intBitsize)
+		return int(ui)
+	}
+	decErr("decLen: Cannot read length: bd%8 must be in range 0..4. Got: %d", d.bd%8)
+	return -1
+}
+
+func (d *simpleDecDriver) decodeString() (s string) {
+	s = string(d.r.readn(d.decLen()))
+	d.bdRead = false
+	return
+}
+
+func (d *simpleDecDriver) decodeBytes(bs []byte) (bsOut []byte, changed bool) {
+	if clen := d.decLen(); clen > 0 {
+		// if no contents in stream, don't update the passed byteslice
+		if len(bs) != clen {
+			if len(bs) > clen {
+				bs = bs[:clen]
+			} else {
+				bs = make([]byte, clen)
+			}
+			bsOut = bs
+			changed = true
+		}
+		d.r.readb(bs)
+	}
+	d.bdRead = false
+	return
+}
+
+func (d *simpleDecDriver) decodeExt(verifyTag bool, tag byte) (xtag byte, xbs []byte) {
+	switch d.bd {
+	case simpleVdExt, simpleVdExt + 1, simpleVdExt + 2, simpleVdExt + 3, simpleVdExt + 4:
+		l := d.decLen()
+		xtag = d.r.readn1()
+		if verifyTag && xtag != tag {
+			decErr("Wrong extension tag. Got %b. Expecting: %v", xtag, tag)
+		}
+		xbs = d.r.readn(l)
+	case simpleVdByteArray, simpleVdByteArray + 1, simpleVdByteArray + 2, simpleVdByteArray + 3, simpleVdByteArray + 4:
+		xbs, _ = d.decodeBytes(nil)
+	default:
+		decErr("Invalid d.vd for extensions (Expecting extensions or byte array). Got: 0x%x", d.bd)
+	}
+	d.bdRead = false
+	return
+}
+
+func (d *simpleDecDriver) decodeNaked() (v interface{}, vt valueType, decodeFurther bool) {
+	d.initReadNext()
+
+	switch d.bd {
+	case simpleVdNil:
+		vt = valueTypeNil
+	case simpleVdFalse:
+		vt = valueTypeBool
+		v = false
+	case simpleVdTrue:
+		vt = valueTypeBool
+		v = true
+	case simpleVdPosInt, simpleVdPosInt + 1, simpleVdPosInt + 2, simpleVdPosInt + 3:
+		vt = valueTypeUint
+		ui, _, _ := d.decIntAny()
+		v = ui
+	case simpleVdNegInt, simpleVdNegInt + 1, simpleVdNegInt + 2, simpleVdNegInt + 3:
+		vt = valueTypeInt
+		_, i, _ := d.decIntAny()
+		v = i
+	case simpleVdFloat32:
+		vt = valueTypeFloat
+		v = d.decodeFloat(true)
+	case simpleVdFloat64:
+		vt = valueTypeFloat
+		v = d.decodeFloat(false)
+	case simpleVdString, simpleVdString + 1, simpleVdString + 2, simpleVdString + 3, simpleVdString + 4:
+		vt = valueTypeString
+		v = d.decodeString()
+	case simpleVdByteArray, simpleVdByteArray + 1, simpleVdByteArray + 2, simpleVdByteArray + 3, simpleVdByteArray + 4:
+		vt = valueTypeBytes
+		v, _ = d.decodeBytes(nil)
+	case simpleVdExt, simpleVdExt + 1, simpleVdExt + 2, simpleVdExt + 3, simpleVdExt + 4:
+		vt = valueTypeExt
+		l := d.decLen()
+		var re RawExt
+		re.Tag = d.r.readn1()
+		re.Data = d.r.readn(l)
+		v = &re
+		vt = valueTypeExt
+	case simpleVdArray, simpleVdArray + 1, simpleVdArray + 2, simpleVdArray + 3, simpleVdArray + 4:
+		vt = valueTypeArray
+		decodeFurther = true
+	case simpleVdMap, simpleVdMap + 1, simpleVdMap + 2, simpleVdMap + 3, simpleVdMap + 4:
+		vt = valueTypeMap
+		decodeFurther = true
+	default:
+		decErr("decodeNaked: Unrecognized d.vd: 0x%x", d.bd)
+	}
+
+	if !decodeFurther {
+		d.bdRead = false
+	}
+	return
+}
+
+//------------------------------------
+
+// SimpleHandle is a Handle for a very simple encoding format.
+//
+// simple is a simplistic codec similar to binc, but not as compact.
+//   - Encoding of a value is always preceeded by the descriptor byte (bd)
+//   - True, false, nil are encoded fully in 1 byte (the descriptor)
+//   - Integers (intXXX, uintXXX) are encoded in 1, 2, 4 or 8 bytes (plus a descriptor byte).
+//     There are positive (uintXXX and intXXX >= 0) and negative (intXXX < 0) integers.
+//   - Floats are encoded in 4 or 8 bytes (plus a descriptor byte)
+//   - Lenght of containers (strings, bytes, array, map, extensions)
+//     are encoded in 0, 1, 2, 4 or 8 bytes.
+//     Zero-length containers have no length encoded.
+//     For others, the number of bytes is given by pow(2, bd%3)
+//   - maps are encoded as [bd] [length] [[key][value]]...
+//   - arrays are encoded as [bd] [length] [value]...
+//   - extensions are encoded as [bd] [length] [tag] [byte]...
+//   - strings/bytearrays are encoded as [bd] [length] [byte]...
+//
+// The full spec will be published soon.
+type SimpleHandle struct {
+	BasicHandle
+}
+
+func (h *SimpleHandle) newEncDriver(w encWriter) encDriver {
+	return &simpleEncDriver{w: w, h: h}
+}
+
+func (h *SimpleHandle) newDecDriver(r decReader) decDriver {
+	return &simpleDecDriver{r: r, h: h}
+}
+
+func (_ *SimpleHandle) writeExt() bool {
+	return true
+}
+
+func (h *SimpleHandle) getBasicHandle() *BasicHandle {
+	return &h.BasicHandle
+}
+
+var _ decDriver = (*simpleDecDriver)(nil)
+var _ encDriver = (*simpleEncDriver)(nil)
diff --git a/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/time.go b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/time.go
new file mode 100644
index 00000000000..c86d65328d7
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/go-msgpack/codec/time.go
@@ -0,0 +1,193 @@
+// Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
+// Use of this source code is governed by a BSD-style license found in the LICENSE file.
+
+package codec
+
+import (
+	"time"
+)
+
+var (
+	timeDigits = [...]byte{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
+)
+
+// EncodeTime encodes a time.Time as a []byte, including
+// information on the instant in time and UTC offset.
+//
+// Format Description
+//
+//   A timestamp is composed of 3 components:
+//
+//   - secs: signed integer representing seconds since unix epoch
+//   - nsces: unsigned integer representing fractional seconds as a
+//     nanosecond offset within secs, in the range 0 <= nsecs < 1e9
+//   - tz: signed integer representing timezone offset in minutes east of UTC,
+//     and a dst (daylight savings time) flag
+//
+//   When encoding a timestamp, the first byte is the descriptor, which
+//   defines which components are encoded and how many bytes are used to
+//   encode secs and nsecs components. *If secs/nsecs is 0 or tz is UTC, it
+//   is not encoded in the byte array explicitly*.
+//
+//       Descriptor 8 bits are of the form `A B C DDD EE`:
+//           A:   Is secs component encoded? 1 = true
+//           B:   Is nsecs component encoded? 1 = true
+//           C:   Is tz component encoded? 1 = true
+//           DDD: Number of extra bytes for secs (range 0-7).
+//                If A = 1, secs encoded in DDD+1 bytes.
+//                    If A = 0, secs is not encoded, and is assumed to be 0.
+//                    If A = 1, then we need at least 1 byte to encode secs.
+//                    DDD says the number of extra bytes beyond that 1.
+//                    E.g. if DDD=0, then secs is represented in 1 byte.
+//                         if DDD=2, then secs is represented in 3 bytes.
+//           EE:  Number of extra bytes for nsecs (range 0-3).
+//                If B = 1, nsecs encoded in EE+1 bytes (similar to secs/DDD above)
+//
+//   Following the descriptor bytes, subsequent bytes are:
+//
+//       secs component encoded in `DDD + 1` bytes (if A == 1)
+//       nsecs component encoded in `EE + 1` bytes (if B == 1)
+//       tz component encoded in 2 bytes (if C == 1)
+//
+//   secs and nsecs components are integers encoded in a BigEndian
+//   2-complement encoding format.
+//
+//   tz component is encoded as 2 bytes (16 bits). Most significant bit 15 to
+//   Least significant bit 0 are described below:
+//
+//       Timezone offset has a range of -12:00 to +14:00 (ie -720 to +840 minutes).
+//       Bit 15 = have\_dst: set to 1 if we set the dst flag.
+//       Bit 14 = dst\_on: set to 1 if dst is in effect at the time, or 0 if not.
+//       Bits 13..0 = timezone offset in minutes. It is a signed integer in Big Endian format.
+//
+func encodeTime(t time.Time) []byte {
+	//t := rv.Interface().(time.Time)
+	tsecs, tnsecs := t.Unix(), t.Nanosecond()
+	var (
+		bd   byte
+		btmp [8]byte
+		bs   [16]byte
+		i    int = 1
+	)
+	l := t.Location()
+	if l == time.UTC {
+		l = nil
+	}
+	if tsecs != 0 {
+		bd = bd | 0x80
+		bigen.PutUint64(btmp[:], uint64(tsecs))
+		f := pruneSignExt(btmp[:], tsecs >= 0)
+		bd = bd | (byte(7-f) << 2)
+		copy(bs[i:], btmp[f:])
+		i = i + (8 - f)
+	}
+	if tnsecs != 0 {
+		bd = bd | 0x40
+		bigen.PutUint32(btmp[:4], uint32(tnsecs))
+		f := pruneSignExt(btmp[:4], true)
+		bd = bd | byte(3-f)
+		copy(bs[i:], btmp[f:4])
+		i = i + (4 - f)
+	}
+	if l != nil {
+		bd = bd | 0x20
+		// Note that Go Libs do not give access to dst flag.
+		_, zoneOffset := t.Zone()
+		//zoneName, zoneOffset := t.Zone()
+		zoneOffset /= 60
+		z := uint16(zoneOffset)
+		bigen.PutUint16(btmp[:2], z)
+		// clear dst flags
+		bs[i] = btmp[0] & 0x3f
+		bs[i+1] = btmp[1]
+		i = i + 2
+	}
+	bs[0] = bd
+	return bs[0:i]
+}
+
+// DecodeTime decodes a []byte into a time.Time.
+func decodeTime(bs []byte) (tt time.Time, err error) {
+	bd := bs[0]
+	var (
+		tsec  int64
+		tnsec uint32
+		tz    uint16
+		i     byte = 1
+		i2    byte
+		n     byte
+	)
+	if bd&(1<<7) != 0 {
+		var btmp [8]byte
+		n = ((bd >> 2) & 0x7) + 1
+		i2 = i + n
+		copy(btmp[8-n:], bs[i:i2])
+		//if first bit of bs[i] is set, then fill btmp[0..8-n] with 0xff (ie sign extend it)
+		if bs[i]&(1<<7) != 0 {
+			copy(btmp[0:8-n], bsAll0xff)
+			//for j,k := byte(0), 8-n; j < k; j++ {	btmp[j] = 0xff }
+		}
+		i = i2
+		tsec = int64(bigen.Uint64(btmp[:]))
+	}
+	if bd&(1<<6) != 0 {
+		var btmp [4]byte
+		n = (bd & 0x3) + 1
+		i2 = i + n
+		copy(btmp[4-n:], bs[i:i2])
+		i = i2
+		tnsec = bigen.Uint32(btmp[:])
+	}
+	if bd&(1<<5) == 0 {
+		tt = time.Unix(tsec, int64(tnsec)).UTC()
+		return
+	}
+	// In stdlib time.Parse, when a date is parsed without a zone name, it uses "" as zone name.
+	// However, we need name here, so it can be shown when time is printed.
+	// Zone name is in form: UTC-08:00.
+	// Note that Go Libs do not give access to dst flag, so we ignore dst bits
+
+	i2 = i + 2
+	tz = bigen.Uint16(bs[i:i2])
+	i = i2
+	// sign extend sign bit into top 2 MSB (which were dst bits):
+	if tz&(1<<13) == 0 { // positive
+		tz = tz & 0x3fff //clear 2 MSBs: dst bits
+	} else { // negative
+		tz = tz | 0xc000 //set 2 MSBs: dst bits
+		//tzname[3] = '-' (TODO: verify. this works here)
+	}
+	tzint := int16(tz)
+	if tzint == 0 {
+		tt = time.Unix(tsec, int64(tnsec)).UTC()
+	} else {
+		// For Go Time, do not use a descriptive timezone.
+		// It's unnecessary, and makes it harder to do a reflect.DeepEqual.
+		// The Offset already tells what the offset should be, if not on UTC and unknown zone name.
+		// var zoneName = timeLocUTCName(tzint)
+		tt = time.Unix(tsec, int64(tnsec)).In(time.FixedZone("", int(tzint)*60))
+	}
+	return
+}
+
+func timeLocUTCName(tzint int16) string {
+	if tzint == 0 {
+		return "UTC"
+	}
+	var tzname = []byte("UTC+00:00")
+	//tzname := fmt.Sprintf("UTC%s%02d:%02d", tzsign, tz/60, tz%60) //perf issue using Sprintf. inline below.
+	//tzhr, tzmin := tz/60, tz%60 //faster if u convert to int first
+	var tzhr, tzmin int16
+	if tzint < 0 {
+		tzname[3] = '-' // (TODO: verify. this works here)
+		tzhr, tzmin = -tzint/60, (-tzint)%60
+	} else {
+		tzhr, tzmin = tzint/60, tzint%60
+	}
+	tzname[4] = timeDigits[tzhr/10]
+	tzname[5] = timeDigits[tzhr%10]
+	tzname[7] = timeDigits[tzmin/10]
+	tzname[8] = timeDigits[tzmin%10]
+	return string(tzname)
+	//return time.FixedZone(string(tzname), int(tzint)*60)
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft-boltdb/LICENSE b/Godeps/_workspace/src/github.com/hashicorp/raft-boltdb/LICENSE
new file mode 100644
index 00000000000..f0e5c79e181
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft-boltdb/LICENSE
@@ -0,0 +1,362 @@
+Mozilla Public License, version 2.0
+
+1. Definitions
+
+1.1. "Contributor"
+
+     means each individual or legal entity that creates, contributes to the
+     creation of, or owns Covered Software.
+
+1.2. "Contributor Version"
+
+     means the combination of the Contributions of others (if any) used by a
+     Contributor and that particular Contributor's Contribution.
+
+1.3. "Contribution"
+
+     means Covered Software of a particular Contributor.
+
+1.4. "Covered Software"
+
+     means Source Code Form to which the initial Contributor has attached the
+     notice in Exhibit A, the Executable Form of such Source Code Form, and
+     Modifications of such Source Code Form, in each case including portions
+     thereof.
+
+1.5. "Incompatible With Secondary Licenses"
+     means
+
+     a. that the initial Contributor has attached the notice described in
+        Exhibit B to the Covered Software; or
+
+     b. that the Covered Software was made available under the terms of
+        version 1.1 or earlier of the License, but not also under the terms of
+        a Secondary License.
+
+1.6. "Executable Form"
+
+     means any form of the work other than Source Code Form.
+
+1.7. "Larger Work"
+
+     means a work that combines Covered Software with other material, in a
+     separate file or files, that is not Covered Software.
+
+1.8. "License"
+
+     means this document.
+
+1.9. "Licensable"
+
+     means having the right to grant, to the maximum extent possible, whether
+     at the time of the initial grant or subsequently, any and all of the
+     rights conveyed by this License.
+
+1.10. "Modifications"
+
+     means any of the following:
+
+     a. any file in Source Code Form that results from an addition to,
+        deletion from, or modification of the contents of Covered Software; or
+
+     b. any new file in Source Code Form that contains any Covered Software.
+
+1.11. "Patent Claims" of a Contributor
+
+      means any patent claim(s), including without limitation, method,
+      process, and apparatus claims, in any patent Licensable by such
+      Contributor that would be infringed, but for the grant of the License,
+      by the making, using, selling, offering for sale, having made, import,
+      or transfer of either its Contributions or its Contributor Version.
+
+1.12. "Secondary License"
+
+      means either the GNU General Public License, Version 2.0, the GNU Lesser
+      General Public License, Version 2.1, the GNU Affero General Public
+      License, Version 3.0, or any later versions of those licenses.
+
+1.13. "Source Code Form"
+
+      means the form of the work preferred for making modifications.
+
+1.14. "You" (or "Your")
+
+      means an individual or a legal entity exercising rights under this
+      License. For legal entities, "You" includes any entity that controls, is
+      controlled by, or is under common control with You. For purposes of this
+      definition, "control" means (a) the power, direct or indirect, to cause
+      the direction or management of such entity, whether by contract or
+      otherwise, or (b) ownership of more than fifty percent (50%) of the
+      outstanding shares or beneficial ownership of such entity.
+
+
+2. License Grants and Conditions
+
+2.1. Grants
+
+     Each Contributor hereby grants You a world-wide, royalty-free,
+     non-exclusive license:
+
+     a. under intellectual property rights (other than patent or trademark)
+        Licensable by such Contributor to use, reproduce, make available,
+        modify, display, perform, distribute, and otherwise exploit its
+        Contributions, either on an unmodified basis, with Modifications, or
+        as part of a Larger Work; and
+
+     b. under Patent Claims of such Contributor to make, use, sell, offer for
+        sale, have made, import, and otherwise transfer either its
+        Contributions or its Contributor Version.
+
+2.2. Effective Date
+
+     The licenses granted in Section 2.1 with respect to any Contribution
+     become effective for each Contribution on the date the Contributor first
+     distributes such Contribution.
+
+2.3. Limitations on Grant Scope
+
+     The licenses granted in this Section 2 are the only rights granted under
+     this License. No additional rights or licenses will be implied from the
+     distribution or licensing of Covered Software under this License.
+     Notwithstanding Section 2.1(b) above, no patent license is granted by a
+     Contributor:
+
+     a. for any code that a Contributor has removed from Covered Software; or
+
+     b. for infringements caused by: (i) Your and any other third party's
+        modifications of Covered Software, or (ii) the combination of its
+        Contributions with other software (except as part of its Contributor
+        Version); or
+
+     c. under Patent Claims infringed by Covered Software in the absence of
+        its Contributions.
+
+     This License does not grant any rights in the trademarks, service marks,
+     or logos of any Contributor (except as may be necessary to comply with
+     the notice requirements in Section 3.4).
+
+2.4. Subsequent Licenses
+
+     No Contributor makes additional grants as a result of Your choice to
+     distribute the Covered Software under a subsequent version of this
+     License (see Section 10.2) or under the terms of a Secondary License (if
+     permitted under the terms of Section 3.3).
+
+2.5. Representation
+
+     Each Contributor represents that the Contributor believes its
+     Contributions are its original creation(s) or it has sufficient rights to
+     grant the rights to its Contributions conveyed by this License.
+
+2.6. Fair Use
+
+     This License is not intended to limit any rights You have under
+     applicable copyright doctrines of fair use, fair dealing, or other
+     equivalents.
+
+2.7. Conditions
+
+     Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in
+     Section 2.1.
+
+
+3. Responsibilities
+
+3.1. Distribution of Source Form
+
+     All distribution of Covered Software in Source Code Form, including any
+     Modifications that You create or to which You contribute, must be under
+     the terms of this License. You must inform recipients that the Source
+     Code Form of the Covered Software is governed by the terms of this
+     License, and how they can obtain a copy of this License. You may not
+     attempt to alter or restrict the recipients' rights in the Source Code
+     Form.
+
+3.2. Distribution of Executable Form
+
+     If You distribute Covered Software in Executable Form then:
+
+     a. such Covered Software must also be made available in Source Code Form,
+        as described in Section 3.1, and You must inform recipients of the
+        Executable Form how they can obtain a copy of such Source Code Form by
+        reasonable means in a timely manner, at a charge no more than the cost
+        of distribution to the recipient; and
+
+     b. You may distribute such Executable Form under the terms of this
+        License, or sublicense it under different terms, provided that the
+        license for the Executable Form does not attempt to limit or alter the
+        recipients' rights in the Source Code Form under this License.
+
+3.3. Distribution of a Larger Work
+
+     You may create and distribute a Larger Work under terms of Your choice,
+     provided that You also comply with the requirements of this License for
+     the Covered Software. If the Larger Work is a combination of Covered
+     Software with a work governed by one or more Secondary Licenses, and the
+     Covered Software is not Incompatible With Secondary Licenses, this
+     License permits You to additionally distribute such Covered Software
+     under the terms of such Secondary License(s), so that the recipient of
+     the Larger Work may, at their option, further distribute the Covered
+     Software under the terms of either this License or such Secondary
+     License(s).
+
+3.4. Notices
+
+     You may not remove or alter the substance of any license notices
+     (including copyright notices, patent notices, disclaimers of warranty, or
+     limitations of liability) contained within the Source Code Form of the
+     Covered Software, except that You may alter any license notices to the
+     extent required to remedy known factual inaccuracies.
+
+3.5. Application of Additional Terms
+
+     You may choose to offer, and to charge a fee for, warranty, support,
+     indemnity or liability obligations to one or more recipients of Covered
+     Software. However, You may do so only on Your own behalf, and not on
+     behalf of any Contributor. You must make it absolutely clear that any
+     such warranty, support, indemnity, or liability obligation is offered by
+     You alone, and You hereby agree to indemnify every Contributor for any
+     liability incurred by such Contributor as a result of warranty, support,
+     indemnity or liability terms You offer. You may include additional
+     disclaimers of warranty and limitations of liability specific to any
+     jurisdiction.
+
+4. Inability to Comply Due to Statute or Regulation
+
+   If it is impossible for You to comply with any of the terms of this License
+   with respect to some or all of the Covered Software due to statute,
+   judicial order, or regulation then You must: (a) comply with the terms of
+   this License to the maximum extent possible; and (b) describe the
+   limitations and the code they affect. Such description must be placed in a
+   text file included with all distributions of the Covered Software under
+   this License. Except to the extent prohibited by statute or regulation,
+   such description must be sufficiently detailed for a recipient of ordinary
+   skill to be able to understand it.
+
+5. Termination
+
+5.1. The rights granted under this License will terminate automatically if You
+     fail to comply with any of its terms. However, if You become compliant,
+     then the rights granted under this License from a particular Contributor
+     are reinstated (a) provisionally, unless and until such Contributor
+     explicitly and finally terminates Your grants, and (b) on an ongoing
+     basis, if such Contributor fails to notify You of the non-compliance by
+     some reasonable means prior to 60 days after You have come back into
+     compliance. Moreover, Your grants from a particular Contributor are
+     reinstated on an ongoing basis if such Contributor notifies You of the
+     non-compliance by some reasonable means, this is the first time You have
+     received notice of non-compliance with this License from such
+     Contributor, and You become compliant prior to 30 days after Your receipt
+     of the notice.
+
+5.2. If You initiate litigation against any entity by asserting a patent
+     infringement claim (excluding declaratory judgment actions,
+     counter-claims, and cross-claims) alleging that a Contributor Version
+     directly or indirectly infringes any patent, then the rights granted to
+     You by any and all Contributors for the Covered Software under Section
+     2.1 of this License shall terminate.
+
+5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user
+     license agreements (excluding distributors and resellers) which have been
+     validly granted by You or Your distributors under this License prior to
+     termination shall survive termination.
+
+6. Disclaimer of Warranty
+
+   Covered Software is provided under this License on an "as is" basis,
+   without warranty of any kind, either expressed, implied, or statutory,
+   including, without limitation, warranties that the Covered Software is free
+   of defects, merchantable, fit for a particular purpose or non-infringing.
+   The entire risk as to the quality and performance of the Covered Software
+   is with You. Should any Covered Software prove defective in any respect,
+   You (not any Contributor) assume the cost of any necessary servicing,
+   repair, or correction. This disclaimer of warranty constitutes an essential
+   part of this License. No use of  any Covered Software is authorized under
+   this License except under this disclaimer.
+
+7. Limitation of Liability
+
+   Under no circumstances and under no legal theory, whether tort (including
+   negligence), contract, or otherwise, shall any Contributor, or anyone who
+   distributes Covered Software as permitted above, be liable to You for any
+   direct, indirect, special, incidental, or consequential damages of any
+   character including, without limitation, damages for lost profits, loss of
+   goodwill, work stoppage, computer failure or malfunction, or any and all
+   other commercial damages or losses, even if such party shall have been
+   informed of the possibility of such damages. This limitation of liability
+   shall not apply to liability for death or personal injury resulting from
+   such party's negligence to the extent applicable law prohibits such
+   limitation. Some jurisdictions do not allow the exclusion or limitation of
+   incidental or consequential damages, so this exclusion and limitation may
+   not apply to You.
+
+8. Litigation
+
+   Any litigation relating to this License may be brought only in the courts
+   of a jurisdiction where the defendant maintains its principal place of
+   business and such litigation shall be governed by laws of that
+   jurisdiction, without reference to its conflict-of-law provisions. Nothing
+   in this Section shall prevent a party's ability to bring cross-claims or
+   counter-claims.
+
+9. Miscellaneous
+
+   This License represents the complete agreement concerning the subject
+   matter hereof. If any provision of this License is held to be
+   unenforceable, such provision shall be reformed only to the extent
+   necessary to make it enforceable. Any law or regulation which provides that
+   the language of a contract shall be construed against the drafter shall not
+   be used to construe this License against a Contributor.
+
+
+10. Versions of the License
+
+10.1. New Versions
+
+      Mozilla Foundation is the license steward. Except as provided in Section
+      10.3, no one other than the license steward has the right to modify or
+      publish new versions of this License. Each version will be given a
+      distinguishing version number.
+
+10.2. Effect of New Versions
+
+      You may distribute the Covered Software under the terms of the version
+      of the License under which You originally received the Covered Software,
+      or under the terms of any subsequent version published by the license
+      steward.
+
+10.3. Modified Versions
+
+      If you create software not governed by this License, and you want to
+      create a new license for such software, you may create and use a
+      modified version of this License if you rename the license and remove
+      any references to the name of the license steward (except to note that
+      such modified license differs from this License).
+
+10.4. Distributing Source Code Form that is Incompatible With Secondary
+      Licenses If You choose to distribute Source Code Form that is
+      Incompatible With Secondary Licenses under the terms of this version of
+      the License, the notice described in Exhibit B of this License must be
+      attached.
+
+Exhibit A - Source Code Form License Notice
+
+      This Source Code Form is subject to the
+      terms of the Mozilla Public License, v.
+      2.0. If a copy of the MPL was not
+      distributed with this file, You can
+      obtain one at
+      http://mozilla.org/MPL/2.0/.
+
+If it is not possible or desirable to put the notice in a particular file,
+then You may include the notice in a location (such as a LICENSE file in a
+relevant directory) where a recipient would be likely to look for such a
+notice.
+
+You may add additional accurate notices of copyright ownership.
+
+Exhibit B - "Incompatible With Secondary Licenses" Notice
+
+      This Source Code Form is "Incompatible
+      With Secondary Licenses", as defined by
+      the Mozilla Public License, v. 2.0.
\ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft-boltdb/README.md b/Godeps/_workspace/src/github.com/hashicorp/raft-boltdb/README.md
new file mode 100644
index 00000000000..5d7180ab9ec
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft-boltdb/README.md
@@ -0,0 +1,11 @@
+raft-boltdb
+===========
+
+This repository provides the `raftboltdb` package. The package exports the
+`BoltStore` which is an implementation of both a `LogStore` and `StableStore`.
+
+It is meant to be used as a backend for the `raft` [package
+here](https://github.com/hashicorp/raft).
+
+This implementation uses [BoltDB](https://github.com/boltdb/bolt). BoltDB is
+a simple key/value store implemented in pure Go, and inspired by LMDB.
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft-boltdb/bolt_store.go b/Godeps/_workspace/src/github.com/hashicorp/raft-boltdb/bolt_store.go
new file mode 100644
index 00000000000..ab6dd4803e6
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft-boltdb/bolt_store.go
@@ -0,0 +1,231 @@
+package raftboltdb
+
+import (
+	"errors"
+
+	"github.com/boltdb/bolt"
+	"github.com/hashicorp/raft"
+)
+
+const (
+	// Permissions to use on the db file. This is only used if the
+	// database file does not exist and needs to be created.
+	dbFileMode = 0600
+)
+
+var (
+	// Bucket names we perform transactions in
+	dbLogs = []byte("logs")
+	dbConf = []byte("conf")
+
+	// An error indicating a given key does not exist
+	ErrKeyNotFound = errors.New("not found")
+)
+
+// BoltStore provides access to BoltDB for Raft to store and retrieve
+// log entries. It also provides key/value storage, and can be used as
+// a LogStore and StableStore.
+type BoltStore struct {
+	// conn is the underlying handle to the db.
+	conn *bolt.DB
+
+	// The path to the Bolt database file
+	path string
+}
+
+// NewBoltStore takes a file path and returns a connected Raft backend.
+func NewBoltStore(path string) (*BoltStore, error) {
+	// Try to connect
+	handle, err := bolt.Open(path, dbFileMode, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	// Create the new store
+	store := &BoltStore{
+		conn: handle,
+		path: path,
+	}
+
+	// Set up our buckets
+	if err := store.initialize(); err != nil {
+		store.Close()
+		return nil, err
+	}
+
+	return store, nil
+}
+
+// initialize is used to set up all of the buckets.
+func (b *BoltStore) initialize() error {
+	tx, err := b.conn.Begin(true)
+	if err != nil {
+		return err
+	}
+	defer tx.Rollback()
+
+	// Create all the buckets
+	if _, err := tx.CreateBucketIfNotExists(dbLogs); err != nil {
+		return err
+	}
+	if _, err := tx.CreateBucketIfNotExists(dbConf); err != nil {
+		return err
+	}
+
+	return tx.Commit()
+}
+
+// Close is used to gracefully close the DB connection.
+func (b *BoltStore) Close() error {
+	return b.conn.Close()
+}
+
+// FirstIndex returns the first known index from the Raft log.
+func (b *BoltStore) FirstIndex() (uint64, error) {
+	tx, err := b.conn.Begin(false)
+	if err != nil {
+		return 0, err
+	}
+	defer tx.Rollback()
+
+	curs := tx.Bucket(dbLogs).Cursor()
+	if first, _ := curs.First(); first == nil {
+		return 0, nil
+	} else {
+		return bytesToUint64(first), nil
+	}
+}
+
+// LastIndex returns the last known index from the Raft log.
+func (b *BoltStore) LastIndex() (uint64, error) {
+	tx, err := b.conn.Begin(false)
+	if err != nil {
+		return 0, err
+	}
+	defer tx.Rollback()
+
+	curs := tx.Bucket(dbLogs).Cursor()
+	if last, _ := curs.Last(); last == nil {
+		return 0, nil
+	} else {
+		return bytesToUint64(last), nil
+	}
+}
+
+// GetLog is used to retrieve a log from BoltDB at a given index.
+func (b *BoltStore) GetLog(idx uint64, log *raft.Log) error {
+	tx, err := b.conn.Begin(false)
+	if err != nil {
+		return err
+	}
+	defer tx.Rollback()
+
+	bucket := tx.Bucket(dbLogs)
+	val := bucket.Get(uint64ToBytes(idx))
+
+	if val == nil {
+		return raft.ErrLogNotFound
+	}
+	return decodeMsgPack(val, log)
+}
+
+// StoreLog is used to store a single raft log
+func (b *BoltStore) StoreLog(log *raft.Log) error {
+	return b.StoreLogs([]*raft.Log{log})
+}
+
+// StoreLogs is used to store a set of raft logs
+func (b *BoltStore) StoreLogs(logs []*raft.Log) error {
+	tx, err := b.conn.Begin(true)
+	if err != nil {
+		return err
+	}
+	defer tx.Rollback()
+
+	for _, log := range logs {
+		key := uint64ToBytes(log.Index)
+		val, err := encodeMsgPack(log)
+		if err != nil {
+			return err
+		}
+		bucket := tx.Bucket(dbLogs)
+		if err := bucket.Put(key, val.Bytes()); err != nil {
+			return err
+		}
+	}
+
+	return tx.Commit()
+}
+
+// DeleteRange is used to delete logs within a given range inclusively.
+func (b *BoltStore) DeleteRange(min, max uint64) error {
+	minKey := uint64ToBytes(min)
+
+	tx, err := b.conn.Begin(true)
+	if err != nil {
+		return err
+	}
+	defer tx.Rollback()
+
+	curs := tx.Bucket(dbLogs).Cursor()
+	for k, _ := curs.Seek(minKey); k != nil; k, _ = curs.Next() {
+		// Handle out-of-range log index
+		if bytesToUint64(k) > max {
+			break
+		}
+
+		// Delete in-range log index
+		if err := curs.Delete(); err != nil {
+			return err
+		}
+	}
+
+	return tx.Commit()
+}
+
+// Set is used to set a key/value set outside of the raft log
+func (b *BoltStore) Set(k, v []byte) error {
+	tx, err := b.conn.Begin(true)
+	if err != nil {
+		return err
+	}
+	defer tx.Rollback()
+
+	bucket := tx.Bucket(dbConf)
+	if err := bucket.Put(k, v); err != nil {
+		return err
+	}
+
+	return tx.Commit()
+}
+
+// Get is used to retrieve a value from the k/v store by key
+func (b *BoltStore) Get(k []byte) ([]byte, error) {
+	tx, err := b.conn.Begin(false)
+	if err != nil {
+		return nil, err
+	}
+	defer tx.Rollback()
+
+	bucket := tx.Bucket(dbConf)
+	val := bucket.Get(k)
+
+	if val == nil {
+		return nil, ErrKeyNotFound
+	}
+	return append([]byte{}, val...), nil
+}
+
+// SetUint64 is like Set, but handles uint64 values
+func (b *BoltStore) SetUint64(key []byte, val uint64) error {
+	return b.Set(key, uint64ToBytes(val))
+}
+
+// GetUint64 is like Get, but handles uint64 values
+func (b *BoltStore) GetUint64(key []byte) (uint64, error) {
+	val, err := b.Get(key)
+	if err != nil {
+		return 0, err
+	}
+	return bytesToUint64(val), nil
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft-boltdb/util.go b/Godeps/_workspace/src/github.com/hashicorp/raft-boltdb/util.go
new file mode 100644
index 00000000000..68dd786b7ad
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft-boltdb/util.go
@@ -0,0 +1,37 @@
+package raftboltdb
+
+import (
+	"bytes"
+	"encoding/binary"
+
+	"github.com/hashicorp/go-msgpack/codec"
+)
+
+// Decode reverses the encode operation on a byte slice input
+func decodeMsgPack(buf []byte, out interface{}) error {
+	r := bytes.NewBuffer(buf)
+	hd := codec.MsgpackHandle{}
+	dec := codec.NewDecoder(r, &hd)
+	return dec.Decode(out)
+}
+
+// Encode writes an encoded object to a new bytes buffer
+func encodeMsgPack(in interface{}) (*bytes.Buffer, error) {
+	buf := bytes.NewBuffer(nil)
+	hd := codec.MsgpackHandle{}
+	enc := codec.NewEncoder(buf, &hd)
+	err := enc.Encode(in)
+	return buf, err
+}
+
+// Converts bytes to an integer
+func bytesToUint64(b []byte) uint64 {
+	return binary.BigEndian.Uint64(b)
+}
+
+// Converts a uint to a byte slice
+func uint64ToBytes(u uint64) []byte {
+	buf := make([]byte, 8)
+	binary.BigEndian.PutUint64(buf, u)
+	return buf
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/.gitignore b/Godeps/_workspace/src/github.com/hashicorp/raft/.gitignore
new file mode 100644
index 00000000000..836562412fe
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/.gitignore
@@ -0,0 +1,23 @@
+# Compiled Object files, Static and Dynamic libs (Shared Objects)
+*.o
+*.a
+*.so
+
+# Folders
+_obj
+_test
+
+# Architecture specific extensions/prefixes
+*.[568vq]
+[568vq].out
+
+*.cgo1.go
+*.cgo2.c
+_cgo_defun.c
+_cgo_gotypes.go
+_cgo_export.*
+
+_testmain.go
+
+*.exe
+*.test
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/.travis.yml b/Godeps/_workspace/src/github.com/hashicorp/raft/.travis.yml
new file mode 100644
index 00000000000..5cf041d263a
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/.travis.yml
@@ -0,0 +1,14 @@
+language: go
+
+go:
+    - 1.2
+    - tip
+
+install: make deps
+script:
+    - make integ
+
+notifications:
+    flowdock:
+        secure: fZrcf9rlh2IrQrlch1sHkn3YI7SKvjGnAl/zyV5D6NROe1Bbr6d3QRMuCXWWdhJHzjKmXk5rIzbqJhUc0PNF7YjxGNKSzqWMQ56KcvN1k8DzlqxpqkcA3Jbs6fXCWo2fssRtZ7hj/wOP1f5n6cc7kzHDt9dgaYJ6nO2fqNPJiTc=
+
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/LICENSE b/Godeps/_workspace/src/github.com/hashicorp/raft/LICENSE
new file mode 100644
index 00000000000..c33dcc7c928
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/LICENSE
@@ -0,0 +1,354 @@
+Mozilla Public License, version 2.0
+
+1. Definitions
+
+1.1. “Contributor”
+
+     means each individual or legal entity that creates, contributes to the
+     creation of, or owns Covered Software.
+
+1.2. “Contributor Version”
+
+     means the combination of the Contributions of others (if any) used by a
+     Contributor and that particular Contributor’s Contribution.
+
+1.3. “Contribution”
+
+     means Covered Software of a particular Contributor.
+
+1.4. “Covered Software”
+
+     means Source Code Form to which the initial Contributor has attached the
+     notice in Exhibit A, the Executable Form of such Source Code Form, and
+     Modifications of such Source Code Form, in each case including portions
+     thereof.
+
+1.5. “Incompatible With Secondary Licenses”
+     means
+
+     a. that the initial Contributor has attached the notice described in
+        Exhibit B to the Covered Software; or
+
+     b. that the Covered Software was made available under the terms of version
+        1.1 or earlier of the License, but not also under the terms of a
+        Secondary License.
+
+1.6. “Executable Form”
+
+     means any form of the work other than Source Code Form.
+
+1.7. “Larger Work”
+
+     means a work that combines Covered Software with other material, in a separate
+     file or files, that is not Covered Software.
+
+1.8. “License”
+
+     means this document.
+
+1.9. “Licensable”
+
+     means having the right to grant, to the maximum extent possible, whether at the
+     time of the initial grant or subsequently, any and all of the rights conveyed by
+     this License.
+
+1.10. “Modifications”
+
+     means any of the following:
+
+     a. any file in Source Code Form that results from an addition to, deletion
+        from, or modification of the contents of Covered Software; or
+
+     b. any new file in Source Code Form that contains any Covered Software.
+
+1.11. “Patent Claims” of a Contributor
+
+      means any patent claim(s), including without limitation, method, process,
+      and apparatus claims, in any patent Licensable by such Contributor that
+      would be infringed, but for the grant of the License, by the making,
+      using, selling, offering for sale, having made, import, or transfer of
+      either its Contributions or its Contributor Version.
+
+1.12. “Secondary License”
+
+      means either the GNU General Public License, Version 2.0, the GNU Lesser
+      General Public License, Version 2.1, the GNU Affero General Public
+      License, Version 3.0, or any later versions of those licenses.
+
+1.13. “Source Code Form”
+
+      means the form of the work preferred for making modifications.
+
+1.14. “You” (or “Your”)
+
+      means an individual or a legal entity exercising rights under this
+      License. For legal entities, “You” includes any entity that controls, is
+      controlled by, or is under common control with You. For purposes of this
+      definition, “control” means (a) the power, direct or indirect, to cause
+      the direction or management of such entity, whether by contract or
+      otherwise, or (b) ownership of more than fifty percent (50%) of the
+      outstanding shares or beneficial ownership of such entity.
+
+
+2. License Grants and Conditions
+
+2.1. Grants
+
+     Each Contributor hereby grants You a world-wide, royalty-free,
+     non-exclusive license:
+
+     a. under intellectual property rights (other than patent or trademark)
+        Licensable by such Contributor to use, reproduce, make available,
+        modify, display, perform, distribute, and otherwise exploit its
+        Contributions, either on an unmodified basis, with Modifications, or as
+        part of a Larger Work; and
+
+     b. under Patent Claims of such Contributor to make, use, sell, offer for
+        sale, have made, import, and otherwise transfer either its Contributions
+        or its Contributor Version.
+
+2.2. Effective Date
+
+     The licenses granted in Section 2.1 with respect to any Contribution become
+     effective for each Contribution on the date the Contributor first distributes
+     such Contribution.
+
+2.3. Limitations on Grant Scope
+
+     The licenses granted in this Section 2 are the only rights granted under this
+     License. No additional rights or licenses will be implied from the distribution
+     or licensing of Covered Software under this License. Notwithstanding Section
+     2.1(b) above, no patent license is granted by a Contributor:
+
+     a. for any code that a Contributor has removed from Covered Software; or
+
+     b. for infringements caused by: (i) Your and any other third party’s
+        modifications of Covered Software, or (ii) the combination of its
+        Contributions with other software (except as part of its Contributor
+        Version); or
+
+     c. under Patent Claims infringed by Covered Software in the absence of its
+        Contributions.
+
+     This License does not grant any rights in the trademarks, service marks, or
+     logos of any Contributor (except as may be necessary to comply with the
+     notice requirements in Section 3.4).
+
+2.4. Subsequent Licenses
+
+     No Contributor makes additional grants as a result of Your choice to
+     distribute the Covered Software under a subsequent version of this License
+     (see Section 10.2) or under the terms of a Secondary License (if permitted
+     under the terms of Section 3.3).
+
+2.5. Representation
+
+     Each Contributor represents that the Contributor believes its Contributions
+     are its original creation(s) or it has sufficient rights to grant the
+     rights to its Contributions conveyed by this License.
+
+2.6. Fair Use
+
+     This License is not intended to limit any rights You have under applicable
+     copyright doctrines of fair use, fair dealing, or other equivalents.
+
+2.7. Conditions
+
+     Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in
+     Section 2.1.
+
+
+3. Responsibilities
+
+3.1. Distribution of Source Form
+
+     All distribution of Covered Software in Source Code Form, including any
+     Modifications that You create or to which You contribute, must be under the
+     terms of this License. You must inform recipients that the Source Code Form
+     of the Covered Software is governed by the terms of this License, and how
+     they can obtain a copy of this License. You may not attempt to alter or
+     restrict the recipients’ rights in the Source Code Form.
+
+3.2. Distribution of Executable Form
+
+     If You distribute Covered Software in Executable Form then:
+
+     a. such Covered Software must also be made available in Source Code Form,
+        as described in Section 3.1, and You must inform recipients of the
+        Executable Form how they can obtain a copy of such Source Code Form by
+        reasonable means in a timely manner, at a charge no more than the cost
+        of distribution to the recipient; and
+
+     b. You may distribute such Executable Form under the terms of this License,
+        or sublicense it under different terms, provided that the license for
+        the Executable Form does not attempt to limit or alter the recipients’
+        rights in the Source Code Form under this License.
+
+3.3. Distribution of a Larger Work
+
+     You may create and distribute a Larger Work under terms of Your choice,
+     provided that You also comply with the requirements of this License for the
+     Covered Software. If the Larger Work is a combination of Covered Software
+     with a work governed by one or more Secondary Licenses, and the Covered
+     Software is not Incompatible With Secondary Licenses, this License permits
+     You to additionally distribute such Covered Software under the terms of
+     such Secondary License(s), so that the recipient of the Larger Work may, at
+     their option, further distribute the Covered Software under the terms of
+     either this License or such Secondary License(s).
+
+3.4. Notices
+
+     You may not remove or alter the substance of any license notices (including
+     copyright notices, patent notices, disclaimers of warranty, or limitations
+     of liability) contained within the Source Code Form of the Covered
+     Software, except that You may alter any license notices to the extent
+     required to remedy known factual inaccuracies.
+
+3.5. Application of Additional Terms
+
+     You may choose to offer, and to charge a fee for, warranty, support,
+     indemnity or liability obligations to one or more recipients of Covered
+     Software. However, You may do so only on Your own behalf, and not on behalf
+     of any Contributor. You must make it absolutely clear that any such
+     warranty, support, indemnity, or liability obligation is offered by You
+     alone, and You hereby agree to indemnify every Contributor for any
+     liability incurred by such Contributor as a result of warranty, support,
+     indemnity or liability terms You offer. You may include additional
+     disclaimers of warranty and limitations of liability specific to any
+     jurisdiction.
+
+4. Inability to Comply Due to Statute or Regulation
+
+   If it is impossible for You to comply with any of the terms of this License
+   with respect to some or all of the Covered Software due to statute, judicial
+   order, or regulation then You must: (a) comply with the terms of this License
+   to the maximum extent possible; and (b) describe the limitations and the code
+   they affect. Such description must be placed in a text file included with all
+   distributions of the Covered Software under this License. Except to the
+   extent prohibited by statute or regulation, such description must be
+   sufficiently detailed for a recipient of ordinary skill to be able to
+   understand it.
+
+5. Termination
+
+5.1. The rights granted under this License will terminate automatically if You
+     fail to comply with any of its terms. However, if You become compliant,
+     then the rights granted under this License from a particular Contributor
+     are reinstated (a) provisionally, unless and until such Contributor
+     explicitly and finally terminates Your grants, and (b) on an ongoing basis,
+     if such Contributor fails to notify You of the non-compliance by some
+     reasonable means prior to 60 days after You have come back into compliance.
+     Moreover, Your grants from a particular Contributor are reinstated on an
+     ongoing basis if such Contributor notifies You of the non-compliance by
+     some reasonable means, this is the first time You have received notice of
+     non-compliance with this License from such Contributor, and You become
+     compliant prior to 30 days after Your receipt of the notice.
+
+5.2. If You initiate litigation against any entity by asserting a patent
+     infringement claim (excluding declaratory judgment actions, counter-claims,
+     and cross-claims) alleging that a Contributor Version directly or
+     indirectly infringes any patent, then the rights granted to You by any and
+     all Contributors for the Covered Software under Section 2.1 of this License
+     shall terminate.
+
+5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user
+     license agreements (excluding distributors and resellers) which have been
+     validly granted by You or Your distributors under this License prior to
+     termination shall survive termination.
+
+6. Disclaimer of Warranty
+
+   Covered Software is provided under this License on an “as is” basis, without
+   warranty of any kind, either expressed, implied, or statutory, including,
+   without limitation, warranties that the Covered Software is free of defects,
+   merchantable, fit for a particular purpose or non-infringing. The entire
+   risk as to the quality and performance of the Covered Software is with You.
+   Should any Covered Software prove defective in any respect, You (not any
+   Contributor) assume the cost of any necessary servicing, repair, or
+   correction. This disclaimer of warranty constitutes an essential part of this
+   License. No use of  any Covered Software is authorized under this License
+   except under this disclaimer.
+
+7. Limitation of Liability
+
+   Under no circumstances and under no legal theory, whether tort (including
+   negligence), contract, or otherwise, shall any Contributor, or anyone who
+   distributes Covered Software as permitted above, be liable to You for any
+   direct, indirect, special, incidental, or consequential damages of any
+   character including, without limitation, damages for lost profits, loss of
+   goodwill, work stoppage, computer failure or malfunction, or any and all
+   other commercial damages or losses, even if such party shall have been
+   informed of the possibility of such damages. This limitation of liability
+   shall not apply to liability for death or personal injury resulting from such
+   party’s negligence to the extent applicable law prohibits such limitation.
+   Some jurisdictions do not allow the exclusion or limitation of incidental or
+   consequential damages, so this exclusion and limitation may not apply to You.
+
+8. Litigation
+
+   Any litigation relating to this License may be brought only in the courts of
+   a jurisdiction where the defendant maintains its principal place of business
+   and such litigation shall be governed by laws of that jurisdiction, without
+   reference to its conflict-of-law provisions. Nothing in this Section shall
+   prevent a party’s ability to bring cross-claims or counter-claims.
+
+9. Miscellaneous
+
+   This License represents the complete agreement concerning the subject matter
+   hereof. If any provision of this License is held to be unenforceable, such
+   provision shall be reformed only to the extent necessary to make it
+   enforceable. Any law or regulation which provides that the language of a
+   contract shall be construed against the drafter shall not be used to construe
+   this License against a Contributor.
+
+
+10. Versions of the License
+
+10.1. New Versions
+
+      Mozilla Foundation is the license steward. Except as provided in Section
+      10.3, no one other than the license steward has the right to modify or
+      publish new versions of this License. Each version will be given a
+      distinguishing version number.
+
+10.2. Effect of New Versions
+
+      You may distribute the Covered Software under the terms of the version of
+      the License under which You originally received the Covered Software, or
+      under the terms of any subsequent version published by the license
+      steward.
+
+10.3. Modified Versions
+
+      If you create software not governed by this License, and you want to
+      create a new license for such software, you may create and use a modified
+      version of this License if you rename the license and remove any
+      references to the name of the license steward (except to note that such
+      modified license differs from this License).
+
+10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses
+      If You choose to distribute Source Code Form that is Incompatible With
+      Secondary Licenses under the terms of this version of the License, the
+      notice described in Exhibit B of this License must be attached.
+
+Exhibit A - Source Code Form License Notice
+
+      This Source Code Form is subject to the
+      terms of the Mozilla Public License, v.
+      2.0. If a copy of the MPL was not
+      distributed with this file, You can
+      obtain one at
+      http://mozilla.org/MPL/2.0/.
+
+If it is not possible or desirable to put the notice in a particular file, then
+You may include the notice in a location (such as a LICENSE file in a relevant
+directory) where a recipient would be likely to look for such a notice.
+
+You may add additional accurate notices of copyright ownership.
+
+Exhibit B - “Incompatible With Secondary Licenses” Notice
+
+      This Source Code Form is “Incompatible
+      With Secondary Licenses”, as defined by
+      the Mozilla Public License, v. 2.0.
+
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/Makefile b/Godeps/_workspace/src/github.com/hashicorp/raft/Makefile
new file mode 100644
index 00000000000..c61b34a8f6c
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/Makefile
@@ -0,0 +1,17 @@
+DEPS = $(go list -f '{{range .TestImports}}{{.}} {{end}}' ./...)
+
+test:
+	go test -timeout=5s ./...
+
+integ: test
+	INTEG_TESTS=yes go test -timeout=3s -run=Integ ./...
+
+deps:
+	go get -d -v ./...
+	echo $(DEPS) | xargs -n1 go get -d
+
+cov:
+	INTEG_TESTS=yes gocov test github.com/hashicorp/raft | gocov-html > /tmp/coverage.html
+	open /tmp/coverage.html
+
+.PHONY: test cov integ deps
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/README.md b/Godeps/_workspace/src/github.com/hashicorp/raft/README.md
new file mode 100644
index 00000000000..ecb6c977eea
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/README.md
@@ -0,0 +1,89 @@
+raft [![Build Status](https://travis-ci.org/hashicorp/raft.png)](https://travis-ci.org/hashicorp/raft)
+====
+
+raft is a [Go](http://www.golang.org) library that manages a replicated
+log and can be used with an FSM to manage replicated state machines. It
+is library for providing [consensus](http://en.wikipedia.org/wiki/Consensus_(computer_science)).
+
+The use cases for such a library are far-reaching as replicated state
+machines are a key component of many distributed systems. They enable
+building Consistent, Partition Tolerant (CP) systems, with limited
+fault tolerance as well.
+
+## Building
+
+If you wish to build raft you'll need Go version 1.2+ installed.
+
+Please check your installation with:
+
+```
+go version
+```
+
+## Documentation
+
+For complete documentation, see the associated [Godoc](http://godoc.org/github.com/hashicorp/raft).
+
+To prevent complications with cgo, the primary backend `MDBStore` is in a separate repositoy,
+called [raft-mdb](http://github.com/hashicorp/raft-mdb). That is the recommended implementation
+for the `LogStore` and `StableStore`.
+
+A pure Go backend using [BoltDB](https://github.com/boltdb/bolt) is also available called
+[raft-boltdb](https://github.com/hashicorp/raft-boltdb). It can also be used as a `LogStore`
+and `StableStore`.
+
+## Protocol
+
+raft is based on ["Raft: In Search of an Understandable Consensus Algorithm"](https://ramcloud.stanford.edu/wiki/download/attachments/11370504/raft.pdf)
+
+A high level overview of the Raft protocol is described below, but for details please read the full
+[Raft paper](https://ramcloud.stanford.edu/wiki/download/attachments/11370504/raft.pdf)
+followed by the raft source. Any questions about the raft protocol should be sent to the
+[raft-dev mailing list](https://groups.google.com/forum/#!forum/raft-dev).
+
+### Protocol Description
+
+Raft nodes are always in one of three states: follower, candidate or leader. All
+nodes initially start out as a follower. In this state, nodes can accept log entries
+from a leader and cast votes. If no entries are received for some time, nodes
+self-promote to the candidate state. In the candidate state nodes request votes from
+their peers. If a candidate receives a quorum of votes, then it is promoted to a leader.
+The leader must accept new log entries and replicate to all the other followers.
+In addition, if stale reads are not acceptable, all queries must also be performed on
+the leader.
+
+Once a cluster has a leader, it is able to accept new log entries. A client can
+request that a leader append a new log entry, which is an opaque binary blob to
+Raft. The leader then writes the entry to durable storage and attempts to replicate
+to a quorum of followers. Once the log entry is considered *committed*, it can be
+*applied* to a finite state machine. The finite state machine is application specific,
+and is implemented using an interface.
+
+An obvious question relates to the unbounded nature of a replicated log. Raft provides
+a mechanism by which the current state is snapshotted, and the log is compacted. Because
+of the FSM abstraction, restoring the state of the FSM must result in the same state
+as a replay of old logs. This allows Raft to capture the FSM state at a point in time,
+and then remove all the logs that were used to reach that state. This is performed automatically
+without user intervention, and prevents unbounded disk usage as well as minimizing
+time spent replaying logs.
+
+Lastly, there is the issue of updating the peer set when new servers are joining
+or existing servers are leaving. As long as a quorum of nodes is available, this
+is not an issue as Raft provides mechanisms to dynamically update the peer set.
+If a quorum of nodes is unavailable, then this becomes a very challenging issue.
+For example, suppose there are only 2 peers, A and B. The quorum size is also
+2, meaning both nodes must agree to commit a log entry. If either A or B fails,
+it is now impossible to reach quorum. This means the cluster is unable to add,
+or remove a node, or commit any additional log entries. This results in *unavailability*.
+At this point, manual intervention would be required to remove either A or B,
+and to restart the remaining node in bootstrap mode.
+
+A Raft cluster of 3 nodes can tolerate a single node failure, while a cluster
+of 5 can tolerate 2 node failures. The recommended configuration is to either
+run 3 or 5 raft servers. This maximizes availability without
+greatly sacrificing performance.
+
+In terms of performance, Raft is comparable to Paxos. Assuming stable leadership,
+committing a log entry requires a single round trip to half of the cluster.
+Thus performance is bound by disk I/O and network latency.
+
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/bench/bench.go b/Godeps/_workspace/src/github.com/hashicorp/raft/bench/bench.go
new file mode 100644
index 00000000000..d7a58f45f44
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/bench/bench.go
@@ -0,0 +1,171 @@
+package raftbench
+
+// raftbench provides common benchmarking functions which can be used by
+// anything which implements the raft.LogStore and raft.StableStore interfaces.
+// All functions accept these interfaces and perform benchmarking. This
+// makes comparing backend performance easier by sharing the tests.
+
+import (
+	"github.com/hashicorp/raft"
+	"testing"
+)
+
+func FirstIndex(b *testing.B, store raft.LogStore) {
+	// Create some fake data
+	var logs []*raft.Log
+	for i := 1; i < 10; i++ {
+		logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
+	}
+	if err := store.StoreLogs(logs); err != nil {
+		b.Fatalf("err: %s", err)
+	}
+	b.ResetTimer()
+
+	// Run FirstIndex a number of times
+	for n := 0; n < b.N; n++ {
+		store.FirstIndex()
+	}
+}
+
+func LastIndex(b *testing.B, store raft.LogStore) {
+	// Create some fake data
+	var logs []*raft.Log
+	for i := 1; i < 10; i++ {
+		logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
+	}
+	if err := store.StoreLogs(logs); err != nil {
+		b.Fatalf("err: %s", err)
+	}
+	b.ResetTimer()
+
+	// Run LastIndex a number of times
+	for n := 0; n < b.N; n++ {
+		store.LastIndex()
+	}
+}
+
+func GetLog(b *testing.B, store raft.LogStore) {
+	// Create some fake data
+	var logs []*raft.Log
+	for i := 1; i < 10; i++ {
+		logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
+	}
+	if err := store.StoreLogs(logs); err != nil {
+		b.Fatalf("err: %s", err)
+	}
+	b.ResetTimer()
+
+	// Run GetLog a number of times
+	for n := 0; n < b.N; n++ {
+		if err := store.GetLog(5, new(raft.Log)); err != nil {
+			b.Fatalf("err: %s", err)
+		}
+	}
+}
+
+func StoreLog(b *testing.B, store raft.LogStore) {
+	// Run StoreLog a number of times
+	for n := 0; n < b.N; n++ {
+		log := &raft.Log{Index: uint64(n), Data: []byte("data")}
+		if err := store.StoreLog(log); err != nil {
+			b.Fatalf("err: %s", err)
+		}
+	}
+}
+
+func StoreLogs(b *testing.B, store raft.LogStore) {
+	// Run StoreLogs a number of times. We want to set multiple logs each
+	// run, so we create 3 logs with incrementing indexes for each iteration.
+	for n := 0; n < b.N; n++ {
+		b.StopTimer()
+		offset := 3 * (n + 1)
+		logs := []*raft.Log{
+			&raft.Log{Index: uint64(offset - 2), Data: []byte("data")},
+			&raft.Log{Index: uint64(offset - 1), Data: []byte("data")},
+			&raft.Log{Index: uint64(offset), Data: []byte("data")},
+		}
+		b.StartTimer()
+
+		if err := store.StoreLogs(logs); err != nil {
+			b.Fatalf("err: %s", err)
+		}
+	}
+}
+
+func DeleteRange(b *testing.B, store raft.LogStore) {
+	// Create some fake data. In this case, we create 3 new log entries for each
+	// test case, and separate them by index in multiples of 10. This allows
+	// some room so that we can test deleting ranges with "extra" logs to
+	// to ensure we stop going to the database once our max index is hit.
+	var logs []*raft.Log
+	for n := 0; n < b.N; n++ {
+		offset := 10 * n
+		for i := offset; i < offset+3; i++ {
+			logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
+		}
+	}
+	if err := store.StoreLogs(logs); err != nil {
+		b.Fatalf("err: %s", err)
+	}
+	b.ResetTimer()
+
+	// Delete a range of the data
+	for n := 0; n < b.N; n++ {
+		offset := 10 * n
+		if err := store.DeleteRange(uint64(offset), uint64(offset+9)); err != nil {
+			b.Fatalf("err: %s", err)
+		}
+	}
+}
+
+func Set(b *testing.B, store raft.StableStore) {
+	// Run Set a number of times
+	for n := 0; n < b.N; n++ {
+		if err := store.Set([]byte{byte(n)}, []byte("val")); err != nil {
+			b.Fatalf("err: %s", err)
+		}
+	}
+}
+
+func Get(b *testing.B, store raft.StableStore) {
+	// Create some fake data
+	for i := 1; i < 10; i++ {
+		if err := store.Set([]byte{byte(i)}, []byte("val")); err != nil {
+			b.Fatalf("err: %s", err)
+		}
+	}
+	b.ResetTimer()
+
+	// Run Get a number of times
+	for n := 0; n < b.N; n++ {
+		if _, err := store.Get([]byte{0x05}); err != nil {
+			b.Fatalf("err: %s", err)
+		}
+	}
+}
+
+func SetUint64(b *testing.B, store raft.StableStore) {
+	// Run SetUint64 a number of times
+	for n := 0; n < b.N; n++ {
+		if err := store.SetUint64([]byte{byte(n)}, uint64(n)); err != nil {
+			b.Fatalf("err: %s", err)
+		}
+	}
+}
+
+func GetUint64(b *testing.B, store raft.StableStore) {
+	// Create some fake data
+	for i := 0; i < 10; i++ {
+		if err := store.SetUint64([]byte{byte(i)}, uint64(i)); err != nil {
+			b.Fatalf("err: %s", err)
+		}
+	}
+	b.ResetTimer()
+
+	// Run GetUint64 a number of times
+	for n := 0; n < b.N; n++ {
+		if _, err := store.Get([]byte{0x05}); err != nil {
+			b.Fatalf("err: %s", err)
+		}
+	}
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/commands.go b/Godeps/_workspace/src/github.com/hashicorp/raft/commands.go
new file mode 100644
index 00000000000..739775b3541
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/commands.go
@@ -0,0 +1,84 @@
+package raft
+
+// AppendEntriesRequest is the command used to append entries to the
+// replicated log.
+type AppendEntriesRequest struct {
+	// Provide the current term and leader
+	Term   uint64
+	Leader []byte
+
+	// Provide the previous entries for integrity checking
+	PrevLogEntry uint64
+	PrevLogTerm  uint64
+
+	// New entries to commit
+	Entries []*Log
+
+	// Commit index on the leader
+	LeaderCommitIndex uint64
+}
+
+// AppendEntriesResponse is the response returned from an
+// AppendEntriesRequest.
+type AppendEntriesResponse struct {
+	// Newer term if leader is out of date
+	Term uint64
+
+	// Last Log is a hint to help accelerate rebuilding slow nodes
+	LastLog uint64
+
+	// We may not succeed if we have a conflicting entry
+	Success bool
+
+	// There are scenarios where this request didn't succeed
+	// but there's no need to wait/back-off the next attempt.
+	NoRetryBackoff bool
+}
+
+// RequestVoteRequest is the command used by a candidate to ask a Raft peer
+// for a vote in an election.
+type RequestVoteRequest struct {
+	// Provide the term and our id
+	Term      uint64
+	Candidate []byte
+
+	// Used to ensure safety
+	LastLogIndex uint64
+	LastLogTerm  uint64
+}
+
+// RequestVoteResponse is the response returned from a RequestVoteRequest.
+type RequestVoteResponse struct {
+	// Newer term if leader is out of date
+	Term uint64
+
+	// Return the peers, so that a node can shutdown on removal
+	Peers []byte
+
+	// Is the vote granted
+	Granted bool
+}
+
+// InstallSnapshotRequest is the command sent to a Raft peer to bootstrap its
+// log (and state machine) from a snapshot on another peer.
+type InstallSnapshotRequest struct {
+	Term   uint64
+	Leader []byte
+
+	// These are the last index/term included in the snapshot
+	LastLogIndex uint64
+	LastLogTerm  uint64
+
+	// Peer Set in the snapshot
+	Peers []byte
+
+	// Size of the snapshot
+	Size int64
+}
+
+// InstallSnapshotResponse is the response returned from an
+// InstallSnapshotRequest.
+type InstallSnapshotResponse struct {
+	Term    uint64
+	Success bool
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/config.go b/Godeps/_workspace/src/github.com/hashicorp/raft/config.go
new file mode 100644
index 00000000000..6b3c0b59f0c
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/config.go
@@ -0,0 +1,134 @@
+package raft
+
+import (
+	"fmt"
+	"io"
+	"log"
+	"time"
+)
+
+// Config provides any necessary configuration to
+// the Raft server
+type Config struct {
+	// Time in follower state without a leader before we attempt an election.
+	HeartbeatTimeout time.Duration
+
+	// Time in candidate state without a leader before we attempt an election.
+	ElectionTimeout time.Duration
+
+	// Time without an Apply() operation before we heartbeat to ensure
+	// a timely commit. Due to random staggering, may be delayed as much as
+	// 2x this value.
+	CommitTimeout time.Duration
+
+	// MaxAppendEntries controls the maximum number of append entries
+	// to send at once. We want to strike a balance between efficiency
+	// and avoiding waste if the follower is going to reject because of
+	// an inconsistent log.
+	MaxAppendEntries int
+
+	// If we are a member of a cluster, and RemovePeer is invoked for the
+	// local node, then we forget all peers and transition into the follower state.
+	// If ShutdownOnRemove is is set, we additional shutdown Raft. Otherwise,
+	// we can become a leader of a cluster containing only this node.
+	ShutdownOnRemove bool
+
+	// DisableBootstrapAfterElect is used to turn off EnableSingleNode
+	// after the node is elected. This is used to prevent self-election
+	// if the node is removed from the Raft cluster via RemovePeer. Setting
+	// it to false will keep the bootstrap mode, allowing the node to self-elect
+	// and potentially bootstrap a separate cluster.
+	DisableBootstrapAfterElect bool
+
+	// TrailingLogs controls how many logs we leave after a snapshot. This is
+	// used so that we can quickly replay logs on a follower instead of being
+	// forced to send an entire snapshot.
+	TrailingLogs uint64
+
+	// SnapshotInterval controls how often we check if we should perform a snapshot.
+	// We randomly stagger between this value and 2x this value to avoid the entire
+	// cluster from performing a snapshot at once.
+	SnapshotInterval time.Duration
+
+	// SnapshotThreshold controls how many outstanding logs there must be before
+	// we perform a snapshot. This is to prevent excessive snapshots when we can
+	// just replay a small set of logs.
+	SnapshotThreshold uint64
+
+	// EnableSingleNode allows for a single node mode of operation. This
+	// is false by default, which prevents a lone node from electing itself.
+	// leader.
+	EnableSingleNode bool
+
+	// LeaderLeaseTimeout is used to control how long the "lease" lasts
+	// for being the leader without being able to contact a quorum
+	// of nodes. If we reach this interval without contact, we will
+	// step down as leader.
+	LeaderLeaseTimeout time.Duration
+
+	// StartAsLeader forces Raft to start in the leader state. This should
+	// never be used except for testing purposes, as it can cause a split-brain.
+	StartAsLeader bool
+
+	// NotifyCh is used to provide a channel that will be notified of leadership
+	// changes. Raft will block writing to this channel, so it should either be
+	// buffered or aggressively consumed.
+	NotifyCh chan<- bool
+
+	// LogOutput is used as a sink for logs, unless Logger is specified.
+	// Defaults to os.Stderr.
+	LogOutput io.Writer
+
+	// Logger is a user-provided logger. If nil, a logger writing to LogOutput
+	// is used.
+	Logger *log.Logger
+}
+
+// DefaultConfig returns a Config with usable defaults.
+func DefaultConfig() *Config {
+	return &Config{
+		HeartbeatTimeout:           1000 * time.Millisecond,
+		ElectionTimeout:            1000 * time.Millisecond,
+		CommitTimeout:              50 * time.Millisecond,
+		MaxAppendEntries:           64,
+		ShutdownOnRemove:           true,
+		DisableBootstrapAfterElect: true,
+		TrailingLogs:               10240,
+		SnapshotInterval:           120 * time.Second,
+		SnapshotThreshold:          8192,
+		EnableSingleNode:           false,
+		LeaderLeaseTimeout:         500 * time.Millisecond,
+	}
+}
+
+// ValidateConfig is used to validate a sane configuration
+func ValidateConfig(config *Config) error {
+	if config.HeartbeatTimeout < 5*time.Millisecond {
+		return fmt.Errorf("Heartbeat timeout is too low")
+	}
+	if config.ElectionTimeout < 5*time.Millisecond {
+		return fmt.Errorf("Election timeout is too low")
+	}
+	if config.CommitTimeout < time.Millisecond {
+		return fmt.Errorf("Commit timeout is too low")
+	}
+	if config.MaxAppendEntries <= 0 {
+		return fmt.Errorf("MaxAppendEntries must be positive")
+	}
+	if config.MaxAppendEntries > 1024 {
+		return fmt.Errorf("MaxAppendEntries is too large")
+	}
+	if config.SnapshotInterval < 5*time.Millisecond {
+		return fmt.Errorf("Snapshot interval is too low")
+	}
+	if config.LeaderLeaseTimeout < 5*time.Millisecond {
+		return fmt.Errorf("Leader lease timeout is too low")
+	}
+	if config.LeaderLeaseTimeout > config.HeartbeatTimeout {
+		return fmt.Errorf("Leader lease timeout cannot be larger than heartbeat timeout")
+	}
+	if config.ElectionTimeout < config.HeartbeatTimeout {
+		return fmt.Errorf("Election timeout must be equal or greater than Heartbeat Timeout")
+	}
+	return nil
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/discard_snapshot.go b/Godeps/_workspace/src/github.com/hashicorp/raft/discard_snapshot.go
new file mode 100644
index 00000000000..1b4611d559f
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/discard_snapshot.go
@@ -0,0 +1,48 @@
+package raft
+
+import (
+	"fmt"
+	"io"
+)
+
+// DiscardSnapshotStore is used to successfully snapshot while
+// always discarding the snapshot. This is useful for when the
+// log should be truncated but no snapshot should be retained.
+// This should never be used for production use, and is only
+// suitable for testing.
+type DiscardSnapshotStore struct{}
+
+type DiscardSnapshotSink struct{}
+
+// NewDiscardSnapshotStore is used to create a new DiscardSnapshotStore.
+func NewDiscardSnapshotStore() *DiscardSnapshotStore {
+	return &DiscardSnapshotStore{}
+}
+
+func (d *DiscardSnapshotStore) Create(index, term uint64, peers []byte) (SnapshotSink, error) {
+	return &DiscardSnapshotSink{}, nil
+}
+
+func (d *DiscardSnapshotStore) List() ([]*SnapshotMeta, error) {
+	return nil, nil
+}
+
+func (d *DiscardSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {
+	return nil, nil, fmt.Errorf("open is not supported")
+}
+
+func (d *DiscardSnapshotSink) Write(b []byte) (int, error) {
+	return len(b), nil
+}
+
+func (d *DiscardSnapshotSink) Close() error {
+	return nil
+}
+
+func (d *DiscardSnapshotSink) ID() string {
+	return "discard"
+}
+
+func (d *DiscardSnapshotSink) Cancel() error {
+	return nil
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/file_snapshot.go b/Godeps/_workspace/src/github.com/hashicorp/raft/file_snapshot.go
new file mode 100644
index 00000000000..a8955373422
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/file_snapshot.go
@@ -0,0 +1,470 @@
+package raft
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"hash"
+	"hash/crc64"
+	"io"
+	"io/ioutil"
+	"log"
+	"os"
+	"path/filepath"
+	"sort"
+	"strings"
+	"time"
+)
+
+const (
+	testPath      = "permTest"
+	snapPath      = "snapshots"
+	metaFilePath  = "meta.json"
+	stateFilePath = "state.bin"
+	tmpSuffix     = ".tmp"
+)
+
+// FileSnapshotStore implements the SnapshotStore interface and allows
+// snapshots to be made on the local disk.
+type FileSnapshotStore struct {
+	path   string
+	retain int
+	logger *log.Logger
+}
+
+type snapMetaSlice []*fileSnapshotMeta
+
+// FileSnapshotSink implements SnapshotSink with a file.
+type FileSnapshotSink struct {
+	store  *FileSnapshotStore
+	logger *log.Logger
+	dir    string
+	meta   fileSnapshotMeta
+
+	stateFile *os.File
+	stateHash hash.Hash64
+	buffered  *bufio.Writer
+
+	closed bool
+}
+
+// fileSnapshotMeta is stored on disk. We also put a CRC
+// on disk so that we can verify the snapshot.
+type fileSnapshotMeta struct {
+	SnapshotMeta
+	CRC []byte
+}
+
+// bufferedFile is returned when we open a snapshot. This way
+// reads are buffered and the file still gets closed.
+type bufferedFile struct {
+	bh *bufio.Reader
+	fh *os.File
+}
+
+func (b *bufferedFile) Read(p []byte) (n int, err error) {
+	return b.bh.Read(p)
+}
+
+func (b *bufferedFile) Close() error {
+	return b.fh.Close()
+}
+
+// NewFileSnapshotStoreWithLogger creates a new FileSnapshotStore based
+// on a base directory. The `retain` parameter controls how many
+// snapshots are retained. Must be at least 1.
+func NewFileSnapshotStoreWithLogger(base string, retain int, logger *log.Logger) (*FileSnapshotStore, error) {
+	if retain < 1 {
+		return nil, fmt.Errorf("must retain at least one snapshot")
+	}
+	if logger == nil {
+		logger = log.New(os.Stderr, "", log.LstdFlags)
+	}
+
+	// Ensure our path exists
+	path := filepath.Join(base, snapPath)
+	if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
+		return nil, fmt.Errorf("snapshot path not accessible: %v", err)
+	}
+
+	// Setup the store
+	store := &FileSnapshotStore{
+		path:   path,
+		retain: retain,
+		logger: logger,
+	}
+
+	// Do a permissions test
+	if err := store.testPermissions(); err != nil {
+		return nil, fmt.Errorf("permissions test failed: %v", err)
+	}
+	return store, nil
+}
+
+// NewFileSnapshotStore creates a new FileSnapshotStore based
+// on a base directory. The `retain` parameter controls how many
+// snapshots are retained. Must be at least 1.
+func NewFileSnapshotStore(base string, retain int, logOutput io.Writer) (*FileSnapshotStore, error) {
+	if logOutput == nil {
+		logOutput = os.Stderr
+	}
+	return NewFileSnapshotStoreWithLogger(base, retain, log.New(logOutput, "", log.LstdFlags))
+}
+
+// testPermissions tries to touch a file in our path to see if it works.
+func (f *FileSnapshotStore) testPermissions() error {
+	path := filepath.Join(f.path, testPath)
+	fh, err := os.Create(path)
+	if err != nil {
+		return err
+	}
+	fh.Close()
+	os.Remove(path)
+	return nil
+}
+
+// snapshotName generates a name for the snapshot.
+func snapshotName(term, index uint64) string {
+	now := time.Now()
+	msec := now.UnixNano() / int64(time.Millisecond)
+	return fmt.Sprintf("%d-%d-%d", term, index, msec)
+}
+
+// Create is used to start a new snapshot
+func (f *FileSnapshotStore) Create(index, term uint64, peers []byte) (SnapshotSink, error) {
+	// Create a new path
+	name := snapshotName(term, index)
+	path := filepath.Join(f.path, name+tmpSuffix)
+	f.logger.Printf("[INFO] snapshot: Creating new snapshot at %s", path)
+
+	// Make the directory
+	if err := os.MkdirAll(path, 0755); err != nil {
+		f.logger.Printf("[ERR] snapshot: Failed to make snapshot directory: %v", err)
+		return nil, err
+	}
+
+	// Create the sink
+	sink := &FileSnapshotSink{
+		store:  f,
+		logger: f.logger,
+		dir:    path,
+		meta: fileSnapshotMeta{
+			SnapshotMeta: SnapshotMeta{
+				ID:    name,
+				Index: index,
+				Term:  term,
+				Peers: peers,
+			},
+			CRC: nil,
+		},
+	}
+
+	// Write out the meta data
+	if err := sink.writeMeta(); err != nil {
+		f.logger.Printf("[ERR] snapshot: Failed to write metadata: %v", err)
+		return nil, err
+	}
+
+	// Open the state file
+	statePath := filepath.Join(path, stateFilePath)
+	fh, err := os.Create(statePath)
+	if err != nil {
+		f.logger.Printf("[ERR] snapshot: Failed to create state file: %v", err)
+		return nil, err
+	}
+	sink.stateFile = fh
+
+	// Create a CRC64 hash
+	sink.stateHash = crc64.New(crc64.MakeTable(crc64.ECMA))
+
+	// Wrap both the hash and file in a MultiWriter with buffering
+	multi := io.MultiWriter(sink.stateFile, sink.stateHash)
+	sink.buffered = bufio.NewWriter(multi)
+
+	// Done
+	return sink, nil
+}
+
+// List returns available snapshots in the store.
+func (f *FileSnapshotStore) List() ([]*SnapshotMeta, error) {
+	// Get the eligible snapshots
+	snapshots, err := f.getSnapshots()
+	if err != nil {
+		f.logger.Printf("[ERR] snapshot: Failed to get snapshots: %v", err)
+		return nil, err
+	}
+
+	var snapMeta []*SnapshotMeta
+	for _, meta := range snapshots {
+		snapMeta = append(snapMeta, &meta.SnapshotMeta)
+		if len(snapMeta) == f.retain {
+			break
+		}
+	}
+	return snapMeta, nil
+}
+
+// getSnapshots returns all the known snapshots.
+func (f *FileSnapshotStore) getSnapshots() ([]*fileSnapshotMeta, error) {
+	// Get the eligible snapshots
+	snapshots, err := ioutil.ReadDir(f.path)
+	if err != nil {
+		f.logger.Printf("[ERR] snapshot: Failed to scan snapshot dir: %v", err)
+		return nil, err
+	}
+
+	// Populate the metadata
+	var snapMeta []*fileSnapshotMeta
+	for _, snap := range snapshots {
+		// Ignore any files
+		if !snap.IsDir() {
+			continue
+		}
+
+		// Ignore any temporary snapshots
+		dirName := snap.Name()
+		if strings.HasSuffix(dirName, tmpSuffix) {
+			f.logger.Printf("[WARN] snapshot: Found temporary snapshot: %v", dirName)
+			continue
+		}
+
+		// Try to read the meta data
+		meta, err := f.readMeta(dirName)
+		if err != nil {
+			f.logger.Printf("[WARN] snapshot: Failed to read metadata for %v: %v", dirName, err)
+			continue
+		}
+
+		// Append, but only return up to the retain count
+		snapMeta = append(snapMeta, meta)
+	}
+
+	// Sort the snapshot, reverse so we get new -> old
+	sort.Sort(sort.Reverse(snapMetaSlice(snapMeta)))
+
+	return snapMeta, nil
+}
+
+// readMeta is used to read the meta data for a given named backup
+func (f *FileSnapshotStore) readMeta(name string) (*fileSnapshotMeta, error) {
+	// Open the meta file
+	metaPath := filepath.Join(f.path, name, metaFilePath)
+	fh, err := os.Open(metaPath)
+	if err != nil {
+		return nil, err
+	}
+	defer fh.Close()
+
+	// Buffer the file IO
+	buffered := bufio.NewReader(fh)
+
+	// Read in the JSON
+	meta := &fileSnapshotMeta{}
+	dec := json.NewDecoder(buffered)
+	if err := dec.Decode(meta); err != nil {
+		return nil, err
+	}
+	return meta, nil
+}
+
+// Open takes a snapshot ID and returns a ReadCloser for that snapshot.
+func (f *FileSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {
+	// Get the metadata
+	meta, err := f.readMeta(id)
+	if err != nil {
+		f.logger.Printf("[ERR] snapshot: Failed to get meta data to open snapshot: %v", err)
+		return nil, nil, err
+	}
+
+	// Open the state file
+	statePath := filepath.Join(f.path, id, stateFilePath)
+	fh, err := os.Open(statePath)
+	if err != nil {
+		f.logger.Printf("[ERR] snapshot: Failed to open state file: %v", err)
+		return nil, nil, err
+	}
+
+	// Create a CRC64 hash
+	stateHash := crc64.New(crc64.MakeTable(crc64.ECMA))
+
+	// Compute the hash
+	_, err = io.Copy(stateHash, fh)
+	if err != nil {
+		f.logger.Printf("[ERR] snapshot: Failed to read state file: %v", err)
+		fh.Close()
+		return nil, nil, err
+	}
+
+	// Verify the hash
+	computed := stateHash.Sum(nil)
+	if bytes.Compare(meta.CRC, computed) != 0 {
+		f.logger.Printf("[ERR] snapshot: CRC checksum failed (stored: %v computed: %v)",
+			meta.CRC, computed)
+		fh.Close()
+		return nil, nil, fmt.Errorf("CRC mismatch")
+	}
+
+	// Seek to the start
+	if _, err := fh.Seek(0, 0); err != nil {
+		f.logger.Printf("[ERR] snapshot: State file seek failed: %v", err)
+		fh.Close()
+		return nil, nil, err
+	}
+
+	// Return a buffered file
+	buffered := &bufferedFile{
+		bh: bufio.NewReader(fh),
+		fh: fh,
+	}
+
+	return &meta.SnapshotMeta, buffered, nil
+}
+
+// ReapSnapshots reaps any snapshots beyond the retain count.
+func (f *FileSnapshotStore) ReapSnapshots() error {
+	snapshots, err := f.getSnapshots()
+	if err != nil {
+		f.logger.Printf("[ERR] snapshot: Failed to get snapshots: %v", err)
+		return err
+	}
+
+	for i := f.retain; i < len(snapshots); i++ {
+		path := filepath.Join(f.path, snapshots[i].ID)
+		f.logger.Printf("[INFO] snapshot: reaping snapshot %v", path)
+		if err := os.RemoveAll(path); err != nil {
+			f.logger.Printf("[ERR] snapshot: Failed to reap snapshot %v: %v", path, err)
+			return err
+		}
+	}
+	return nil
+}
+
+// ID returns the ID of the snapshot, can be used with Open()
+// after the snapshot is finalized.
+func (s *FileSnapshotSink) ID() string {
+	return s.meta.ID
+}
+
+// Write is used to append to the state file. We write to the
+// buffered IO object to reduce the amount of context switches.
+func (s *FileSnapshotSink) Write(b []byte) (int, error) {
+	return s.buffered.Write(b)
+}
+
+// Close is used to indicate a successful end.
+func (s *FileSnapshotSink) Close() error {
+	// Make sure close is idempotent
+	if s.closed {
+		return nil
+	}
+	s.closed = true
+
+	// Close the open handles
+	if err := s.finalize(); err != nil {
+		s.logger.Printf("[ERR] snapshot: Failed to finalize snapshot: %v", err)
+		return err
+	}
+
+	// Write out the meta data
+	if err := s.writeMeta(); err != nil {
+		s.logger.Printf("[ERR] snapshot: Failed to write metadata: %v", err)
+		return err
+	}
+
+	// Move the directory into place
+	newPath := strings.TrimSuffix(s.dir, tmpSuffix)
+	if err := os.Rename(s.dir, newPath); err != nil {
+		s.logger.Printf("[ERR] snapshot: Failed to move snapshot into place: %v", err)
+		return err
+	}
+
+	// Reap any old snapshots
+	s.store.ReapSnapshots()
+	return nil
+}
+
+// Cancel is used to indicate an unsuccessful end.
+func (s *FileSnapshotSink) Cancel() error {
+	// Make sure close is idempotent
+	if s.closed {
+		return nil
+	}
+	s.closed = true
+
+	// Close the open handles
+	if err := s.finalize(); err != nil {
+		s.logger.Printf("[ERR] snapshot: Failed to finalize snapshot: %v", err)
+		return err
+	}
+
+	// Attempt to remove all artifacts
+	return os.RemoveAll(s.dir)
+}
+
+// finalize is used to close all of our resources.
+func (s *FileSnapshotSink) finalize() error {
+	// Flush any remaining data
+	if err := s.buffered.Flush(); err != nil {
+		return err
+	}
+
+	// Get the file size
+	stat, statErr := s.stateFile.Stat()
+
+	// Close the file
+	if err := s.stateFile.Close(); err != nil {
+		return err
+	}
+
+	// Set the file size, check after we close
+	if statErr != nil {
+		return statErr
+	}
+	s.meta.Size = stat.Size()
+
+	// Set the CRC
+	s.meta.CRC = s.stateHash.Sum(nil)
+	return nil
+}
+
+// writeMeta is used to write out the metadata we have.
+func (s *FileSnapshotSink) writeMeta() error {
+	// Open the meta file
+	metaPath := filepath.Join(s.dir, metaFilePath)
+	fh, err := os.Create(metaPath)
+	if err != nil {
+		return err
+	}
+	defer fh.Close()
+
+	// Buffer the file IO
+	buffered := bufio.NewWriter(fh)
+	defer buffered.Flush()
+
+	// Write out as JSON
+	enc := json.NewEncoder(buffered)
+	if err := enc.Encode(&s.meta); err != nil {
+		return err
+	}
+	return nil
+}
+
+// Implement the sort interface for []*fileSnapshotMeta.
+func (s snapMetaSlice) Len() int {
+	return len(s)
+}
+
+func (s snapMetaSlice) Less(i, j int) bool {
+	if s[i].Term != s[j].Term {
+		return s[i].Term < s[j].Term
+	}
+	if s[i].Index != s[j].Index {
+		return s[i].Index < s[j].Index
+	}
+	return s[i].ID < s[j].ID
+}
+
+func (s snapMetaSlice) Swap(i, j int) {
+	s[i], s[j] = s[j], s[i]
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/fsm.go b/Godeps/_workspace/src/github.com/hashicorp/raft/fsm.go
new file mode 100644
index 00000000000..ea8ab548dbc
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/fsm.go
@@ -0,0 +1,37 @@
+package raft
+
+import (
+	"io"
+)
+
+// FSM provides an interface that can be implemented by
+// clients to make use of the replicated log.
+type FSM interface {
+	// Apply log is invoked once a log entry is committed.
+	Apply(*Log) interface{}
+
+	// Snapshot is used to support log compaction. This call should
+	// return an FSMSnapshot which can be used to save a point-in-time
+	// snapshot of the FSM. Apply and Snapshot are not called in multiple
+	// threads, but Apply will be called concurrently with Persist. This means
+	// the FSM should be implemented in a fashion that allows for concurrent
+	// updates while a snapshot is happening.
+	Snapshot() (FSMSnapshot, error)
+
+	// Restore is used to restore an FSM from a snapshot. It is not called
+	// concurrently with any other command. The FSM must discard all previous
+	// state.
+	Restore(io.ReadCloser) error
+}
+
+// FSMSnapshot is returned by an FSM in response to a Snapshot
+// It must be safe to invoke FSMSnapshot methods with concurrent
+// calls to Apply.
+type FSMSnapshot interface {
+	// Persist should dump all necessary state to the WriteCloser 'sink',
+	// and call sink.Close() when finished or call sink.Cancel() on error.
+	Persist(sink SnapshotSink) error
+
+	// Release is invoked when we are finished with the snapshot.
+	Release()
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/future.go b/Godeps/_workspace/src/github.com/hashicorp/raft/future.go
new file mode 100644
index 00000000000..854e1ac927b
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/future.go
@@ -0,0 +1,182 @@
+package raft
+
+import (
+	"sync"
+	"time"
+)
+
+// Future is used to represent an action that may occur in the future.
+type Future interface {
+	Error() error
+}
+
+// ApplyFuture is used for Apply() and can returns the FSM response.
+type ApplyFuture interface {
+	Future
+	Response() interface{}
+	Index() uint64
+}
+
+// errorFuture is used to return a static error.
+type errorFuture struct {
+	err error
+}
+
+func (e errorFuture) Error() error {
+	return e.err
+}
+
+func (e errorFuture) Response() interface{} {
+	return nil
+}
+
+func (e errorFuture) Index() uint64 {
+	return 0
+}
+
+// deferError can be embedded to allow a future
+// to provide an error in the future.
+type deferError struct {
+	err       error
+	errCh     chan error
+	responded bool
+}
+
+func (d *deferError) init() {
+	d.errCh = make(chan error, 1)
+}
+
+func (d *deferError) Error() error {
+	if d.err != nil {
+		return d.err
+	}
+	if d.errCh == nil {
+		panic("waiting for response on nil channel")
+	}
+	d.err = <-d.errCh
+	return d.err
+}
+
+func (d *deferError) respond(err error) {
+	if d.errCh == nil {
+		return
+	}
+	if d.responded {
+		return
+	}
+	d.errCh <- err
+	close(d.errCh)
+	d.responded = true
+}
+
+// logFuture is used to apply a log entry and waits until
+// the log is considered committed.
+type logFuture struct {
+	deferError
+	log      Log
+	policy   quorumPolicy
+	response interface{}
+	dispatch time.Time
+}
+
+func (l *logFuture) Response() interface{} {
+	return l.response
+}
+
+func (l *logFuture) Index() uint64 {
+	return l.log.Index
+}
+
+type peerFuture struct {
+	deferError
+	peers []string
+}
+
+type shutdownFuture struct {
+	raft *Raft
+}
+
+func (s *shutdownFuture) Error() error {
+	for s.raft.getRoutines() > 0 {
+		time.Sleep(5 * time.Millisecond)
+	}
+	return nil
+}
+
+// snapshotFuture is used for waiting on a snapshot to complete.
+type snapshotFuture struct {
+	deferError
+}
+
+// reqSnapshotFuture is used for requesting a snapshot start.
+// It is only used internally.
+type reqSnapshotFuture struct {
+	deferError
+
+	// snapshot details provided by the FSM runner before responding
+	index    uint64
+	term     uint64
+	peers    []string
+	snapshot FSMSnapshot
+}
+
+// restoreFuture is used for requesting an FSM to perform a
+// snapshot restore. Used internally only.
+type restoreFuture struct {
+	deferError
+	ID string
+}
+
+// verifyFuture is used to verify the current node is still
+// the leader. This is to prevent a stale read.
+type verifyFuture struct {
+	deferError
+	notifyCh   chan *verifyFuture
+	quorumSize int
+	votes      int
+	voteLock   sync.Mutex
+}
+
+// vote is used to respond to a verifyFuture.
+// This may block when responding on the notifyCh.
+func (v *verifyFuture) vote(leader bool) {
+	v.voteLock.Lock()
+	defer v.voteLock.Unlock()
+
+	// Guard against having notified already
+	if v.notifyCh == nil {
+		return
+	}
+
+	if leader {
+		v.votes++
+		if v.votes >= v.quorumSize {
+			v.notifyCh <- v
+			v.notifyCh = nil
+		}
+	} else {
+		v.notifyCh <- v
+		v.notifyCh = nil
+	}
+}
+
+// appendFuture is used for waiting on a pipelined append
+// entries RPC.
+type appendFuture struct {
+	deferError
+	start time.Time
+	args  *AppendEntriesRequest
+	resp  *AppendEntriesResponse
+}
+
+func (a *appendFuture) Start() time.Time {
+	return a.start
+}
+
+func (a *appendFuture) Request() *AppendEntriesRequest {
+	return a.args
+}
+
+func (a *appendFuture) Response() *AppendEntriesResponse {
+	return a.resp
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/inflight.go b/Godeps/_workspace/src/github.com/hashicorp/raft/inflight.go
new file mode 100644
index 00000000000..7014ff50394
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/inflight.go
@@ -0,0 +1,213 @@
+package raft
+
+import (
+	"container/list"
+	"sync"
+)
+
+// QuorumPolicy allows individual logFutures to have different
+// commitment rules while still using the inflight mechanism.
+type quorumPolicy interface {
+	// Checks if a commit from a given peer is enough to
+	// satisfy the commitment rules
+	Commit() bool
+
+	// Checks if a commit is committed
+	IsCommitted() bool
+}
+
+// MajorityQuorum is used by Apply transactions and requires
+// a simple majority of nodes.
+type majorityQuorum struct {
+	count       int
+	votesNeeded int
+}
+
+func newMajorityQuorum(clusterSize int) *majorityQuorum {
+	votesNeeded := (clusterSize / 2) + 1
+	return &majorityQuorum{count: 0, votesNeeded: votesNeeded}
+}
+
+func (m *majorityQuorum) Commit() bool {
+	m.count++
+	return m.count >= m.votesNeeded
+}
+
+func (m *majorityQuorum) IsCommitted() bool {
+	return m.count >= m.votesNeeded
+}
+
+// Inflight is used to track operations that are still in-flight.
+type inflight struct {
+	sync.Mutex
+	committed  *list.List
+	commitCh   chan struct{}
+	minCommit  uint64
+	maxCommit  uint64
+	operations map[uint64]*logFuture
+	stopCh     chan struct{}
+}
+
+// NewInflight returns an inflight struct that notifies
+// the provided channel when logs are finished committing.
+func newInflight(commitCh chan struct{}) *inflight {
+	return &inflight{
+		committed:  list.New(),
+		commitCh:   commitCh,
+		minCommit:  0,
+		maxCommit:  0,
+		operations: make(map[uint64]*logFuture),
+		stopCh:     make(chan struct{}),
+	}
+}
+
+// Start is used to mark a logFuture as being inflight. It
+// also commits the entry, as it is assumed the leader is
+// starting.
+func (i *inflight) Start(l *logFuture) {
+	i.Lock()
+	defer i.Unlock()
+	i.start(l)
+}
+
+// StartAll is used to mark a list of logFuture's as being
+// inflight. It also commits each entry as the leader is
+// assumed to be starting.
+func (i *inflight) StartAll(logs []*logFuture) {
+	i.Lock()
+	defer i.Unlock()
+	for _, l := range logs {
+		i.start(l)
+	}
+}
+
+// start is used to mark a single entry as inflight,
+// must be invoked with the lock held.
+func (i *inflight) start(l *logFuture) {
+	idx := l.log.Index
+	i.operations[idx] = l
+
+	if idx > i.maxCommit {
+		i.maxCommit = idx
+	}
+	if i.minCommit == 0 {
+		i.minCommit = idx
+	}
+	i.commit(idx)
+}
+
+// Cancel is used to cancel all in-flight operations.
+// This is done when the leader steps down, and all futures
+// are sent the given error.
+func (i *inflight) Cancel(err error) {
+	// Close the channel first to unblock any pending commits
+	close(i.stopCh)
+
+	// Lock after close to avoid deadlock
+	i.Lock()
+	defer i.Unlock()
+
+	// Respond to all inflight operations
+	for _, op := range i.operations {
+		op.respond(err)
+	}
+
+	// Clear all the committed but not processed
+	for e := i.committed.Front(); e != nil; e = e.Next() {
+		e.Value.(*logFuture).respond(err)
+	}
+
+	// Clear the map
+	i.operations = make(map[uint64]*logFuture)
+
+	// Clear the list of committed
+	i.committed = list.New()
+
+	// Close the commmitCh
+	close(i.commitCh)
+
+	// Reset indexes
+	i.minCommit = 0
+	i.maxCommit = 0
+}
+
+// Committed returns all the committed operations in order.
+func (i *inflight) Committed() (l *list.List) {
+	i.Lock()
+	l, i.committed = i.committed, list.New()
+	i.Unlock()
+	return l
+}
+
+// Commit is used by leader replication routines to indicate that
+// a follower was finished committing a log to disk.
+func (i *inflight) Commit(index uint64) {
+	i.Lock()
+	defer i.Unlock()
+	i.commit(index)
+}
+
+// CommitRange is used to commit a range of indexes inclusively.
+// It is optimized to avoid commits for indexes that are not tracked.
+func (i *inflight) CommitRange(minIndex, maxIndex uint64) {
+	i.Lock()
+	defer i.Unlock()
+
+	// Update the minimum index
+	minIndex = max(i.minCommit, minIndex)
+
+	// Commit each index
+	for idx := minIndex; idx <= maxIndex; idx++ {
+		i.commit(idx)
+	}
+}
+
+// commit is used to commit a single index. Must be called with the lock held.
+func (i *inflight) commit(index uint64) {
+	op, ok := i.operations[index]
+	if !ok {
+		// Ignore if not in the map, as it may be committed already
+		return
+	}
+
+	// Check if we've satisfied the commit
+	if !op.policy.Commit() {
+		return
+	}
+
+	// Cannot commit if this is not the minimum inflight. This can happen
+	// if the quorum size changes, meaning a previous commit requires a larger
+	// quorum that this commit. We MUST block until the previous log is committed,
+	// otherwise logs will be applied out of order.
+	if index != i.minCommit {
+		return
+	}
+
+NOTIFY:
+	// Add the operation to the committed list
+	i.committed.PushBack(op)
+
+	// Stop tracking since it is committed
+	delete(i.operations, index)
+
+	// Update the indexes
+	if index == i.maxCommit {
+		i.minCommit = 0
+		i.maxCommit = 0
+
+	} else {
+		i.minCommit++
+	}
+
+	// Check if the next in-flight operation is ready
+	if i.minCommit != 0 {
+		op = i.operations[i.minCommit]
+		if op.policy.IsCommitted() {
+			index = i.minCommit
+			goto NOTIFY
+		}
+	}
+
+	// Async notify of ready operations
+	asyncNotifyCh(i.commitCh)
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/inmem_store.go b/Godeps/_workspace/src/github.com/hashicorp/raft/inmem_store.go
new file mode 100644
index 00000000000..6e4dfd020f7
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/inmem_store.go
@@ -0,0 +1,116 @@
+package raft
+
+import (
+	"sync"
+)
+
+// InmemStore implements the LogStore and StableStore interface.
+// It should NOT EVER be used for production. It is used only for
+// unit tests. Use the MDBStore implementation instead.
+type InmemStore struct {
+	l         sync.RWMutex
+	lowIndex  uint64
+	highIndex uint64
+	logs      map[uint64]*Log
+	kv        map[string][]byte
+	kvInt     map[string]uint64
+}
+
+// NewInmemStore returns a new in-memory backend. Do not ever
+// use for production. Only for testing.
+func NewInmemStore() *InmemStore {
+	i := &InmemStore{
+		logs:  make(map[uint64]*Log),
+		kv:    make(map[string][]byte),
+		kvInt: make(map[string]uint64),
+	}
+	return i
+}
+
+// FirstIndex implements the LogStore interface.
+func (i *InmemStore) FirstIndex() (uint64, error) {
+	i.l.RLock()
+	defer i.l.RUnlock()
+	return i.lowIndex, nil
+}
+
+// LastIndex implements the LogStore interface.
+func (i *InmemStore) LastIndex() (uint64, error) {
+	i.l.RLock()
+	defer i.l.RUnlock()
+	return i.highIndex, nil
+}
+
+// GetLog implements the LogStore interface.
+func (i *InmemStore) GetLog(index uint64, log *Log) error {
+	i.l.RLock()
+	defer i.l.RUnlock()
+	l, ok := i.logs[index]
+	if !ok {
+		return ErrLogNotFound
+	}
+	*log = *l
+	return nil
+}
+
+// StoreLog implements the LogStore interface.
+func (i *InmemStore) StoreLog(log *Log) error {
+	return i.StoreLogs([]*Log{log})
+}
+
+// StoreLogs implements the LogStore interface.
+func (i *InmemStore) StoreLogs(logs []*Log) error {
+	i.l.Lock()
+	defer i.l.Unlock()
+	for _, l := range logs {
+		i.logs[l.Index] = l
+		if i.lowIndex == 0 {
+			i.lowIndex = l.Index
+		}
+		if l.Index > i.highIndex {
+			i.highIndex = l.Index
+		}
+	}
+	return nil
+}
+
+// DeleteRange implements the LogStore interface.
+func (i *InmemStore) DeleteRange(min, max uint64) error {
+	i.l.Lock()
+	defer i.l.Unlock()
+	for j := min; j <= max; j++ {
+		delete(i.logs, j)
+	}
+	i.lowIndex = max + 1
+	return nil
+}
+
+// Set implements the StableStore interface.
+func (i *InmemStore) Set(key []byte, val []byte) error {
+	i.l.Lock()
+	defer i.l.Unlock()
+	i.kv[string(key)] = val
+	return nil
+}
+
+// Get implements the StableStore interface.
+func (i *InmemStore) Get(key []byte) ([]byte, error) {
+	i.l.RLock()
+	defer i.l.RUnlock()
+	return i.kv[string(key)], nil
+}
+
+// SetUint64 implements the StableStore interface.
+func (i *InmemStore) SetUint64(key []byte, val uint64) error {
+	i.l.Lock()
+	defer i.l.Unlock()
+	i.kvInt[string(key)] = val
+	return nil
+}
+
+// GetUint64 implements the StableStore interface.
+func (i *InmemStore) GetUint64(key []byte) (uint64, error) {
+	i.l.RLock()
+	defer i.l.RUnlock()
+	return i.kvInt[string(key)], nil
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/inmem_transport.go b/Godeps/_workspace/src/github.com/hashicorp/raft/inmem_transport.go
new file mode 100644
index 00000000000..994d06d8fad
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/inmem_transport.go
@@ -0,0 +1,315 @@
+package raft
+
+import (
+	"fmt"
+	"io"
+	"sync"
+	"time"
+)
+
+// NewInmemAddr returns a new in-memory addr with
+// a randomly generate UUID as the ID.
+func NewInmemAddr() string {
+	return generateUUID()
+}
+
+// inmemPipeline is used to pipeline requests for the in-mem transport.
+type inmemPipeline struct {
+	trans    *InmemTransport
+	peer     *InmemTransport
+	peerAddr string
+
+	doneCh       chan AppendFuture
+	inprogressCh chan *inmemPipelineInflight
+
+	shutdown     bool
+	shutdownCh   chan struct{}
+	shutdownLock sync.Mutex
+}
+
+type inmemPipelineInflight struct {
+	future *appendFuture
+	respCh <-chan RPCResponse
+}
+
+// InmemTransport Implements the Transport interface, to allow Raft to be
+// tested in-memory without going over a network.
+type InmemTransport struct {
+	sync.RWMutex
+	consumerCh chan RPC
+	localAddr  string
+	peers      map[string]*InmemTransport
+	pipelines  []*inmemPipeline
+	timeout    time.Duration
+}
+
+// NewInmemTransport is used to initialize a new transport
+// and generates a random local address.
+func NewInmemTransport() (string, *InmemTransport) {
+	addr := NewInmemAddr()
+	trans := &InmemTransport{
+		consumerCh: make(chan RPC, 16),
+		localAddr:  addr,
+		peers:      make(map[string]*InmemTransport),
+		timeout:    50 * time.Millisecond,
+	}
+	return addr, trans
+}
+
+// SetHeartbeatHandler is used to set optional fast-path for
+// heartbeats, not supported for this transport.
+func (i *InmemTransport) SetHeartbeatHandler(cb func(RPC)) {
+}
+
+// Consumer implements the Transport interface.
+func (i *InmemTransport) Consumer() <-chan RPC {
+	return i.consumerCh
+}
+
+// LocalAddr implements the Transport interface.
+func (i *InmemTransport) LocalAddr() string {
+	return i.localAddr
+}
+
+// AppendEntriesPipeline returns an interface that can be used to pipeline
+// AppendEntries requests.
+func (i *InmemTransport) AppendEntriesPipeline(target string) (AppendPipeline, error) {
+	i.RLock()
+	peer, ok := i.peers[target]
+	i.RUnlock()
+	if !ok {
+		return nil, fmt.Errorf("failed to connect to peer: %v", target)
+	}
+	pipeline := newInmemPipeline(i, peer, target)
+	i.Lock()
+	i.pipelines = append(i.pipelines, pipeline)
+	i.Unlock()
+	return pipeline, nil
+}
+
+// AppendEntries implements the Transport interface.
+func (i *InmemTransport) AppendEntries(target string, args *AppendEntriesRequest, resp *AppendEntriesResponse) error {
+	rpcResp, err := i.makeRPC(target, args, nil, i.timeout)
+	if err != nil {
+		return err
+	}
+
+	// Copy the result back
+	out := rpcResp.Response.(*AppendEntriesResponse)
+	*resp = *out
+	return nil
+}
+
+// RequestVote implements the Transport interface.
+func (i *InmemTransport) RequestVote(target string, args *RequestVoteRequest, resp *RequestVoteResponse) error {
+	rpcResp, err := i.makeRPC(target, args, nil, i.timeout)
+	if err != nil {
+		return err
+	}
+
+	// Copy the result back
+	out := rpcResp.Response.(*RequestVoteResponse)
+	*resp = *out
+	return nil
+}
+
+// InstallSnapshot implements the Transport interface.
+func (i *InmemTransport) InstallSnapshot(target string, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error {
+	rpcResp, err := i.makeRPC(target, args, data, 10*i.timeout)
+	if err != nil {
+		return err
+	}
+
+	// Copy the result back
+	out := rpcResp.Response.(*InstallSnapshotResponse)
+	*resp = *out
+	return nil
+}
+
+func (i *InmemTransport) makeRPC(target string, args interface{}, r io.Reader, timeout time.Duration) (rpcResp RPCResponse, err error) {
+	i.RLock()
+	peer, ok := i.peers[target]
+	i.RUnlock()
+
+	if !ok {
+		err = fmt.Errorf("failed to connect to peer: %v", target)
+		return
+	}
+
+	// Send the RPC over
+	respCh := make(chan RPCResponse)
+	peer.consumerCh <- RPC{
+		Command:  args,
+		Reader:   r,
+		RespChan: respCh,
+	}
+
+	// Wait for a response
+	select {
+	case rpcResp = <-respCh:
+		if rpcResp.Error != nil {
+			err = rpcResp.Error
+		}
+	case <-time.After(timeout):
+		err = fmt.Errorf("command timed out")
+	}
+	return
+}
+
+// EncodePeer implements the Transport interface. It uses the UUID as the
+// address directly.
+func (i *InmemTransport) EncodePeer(p string) []byte {
+	return []byte(p)
+}
+
+// DecodePeer implements the Transport interface. It wraps the UUID in an
+// InmemAddr.
+func (i *InmemTransport) DecodePeer(buf []byte) string {
+	return string(buf)
+}
+
+// Connect is used to connect this transport to another transport for
+// a given peer name. This allows for local routing.
+func (i *InmemTransport) Connect(peer string, trans *InmemTransport) {
+	i.Lock()
+	defer i.Unlock()
+	i.peers[peer] = trans
+}
+
+// Disconnect is used to remove the ability to route to a given peer.
+func (i *InmemTransport) Disconnect(peer string) {
+	i.Lock()
+	defer i.Unlock()
+	delete(i.peers, peer)
+
+	// Disconnect any pipelines
+	n := len(i.pipelines)
+	for idx := 0; idx < n; idx++ {
+		if i.pipelines[idx].peerAddr == peer {
+			i.pipelines[idx].Close()
+			i.pipelines[idx], i.pipelines[n-1] = i.pipelines[n-1], nil
+			idx--
+			n--
+		}
+	}
+	i.pipelines = i.pipelines[:n]
+}
+
+// DisconnectAll is used to remove all routes to peers.
+func (i *InmemTransport) DisconnectAll() {
+	i.Lock()
+	defer i.Unlock()
+	i.peers = make(map[string]*InmemTransport)
+
+	// Handle pipelines
+	for _, pipeline := range i.pipelines {
+		pipeline.Close()
+	}
+	i.pipelines = nil
+}
+
+func newInmemPipeline(trans *InmemTransport, peer *InmemTransport, addr string) *inmemPipeline {
+	i := &inmemPipeline{
+		trans:        trans,
+		peer:         peer,
+		peerAddr:     addr,
+		doneCh:       make(chan AppendFuture, 16),
+		inprogressCh: make(chan *inmemPipelineInflight, 16),
+		shutdownCh:   make(chan struct{}),
+	}
+	go i.decodeResponses()
+	return i
+}
+
+func (i *inmemPipeline) decodeResponses() {
+	timeout := i.trans.timeout
+	for {
+		select {
+		case inp := <-i.inprogressCh:
+			var timeoutCh <-chan time.Time
+			if timeout > 0 {
+				timeoutCh = time.After(timeout)
+			}
+
+			select {
+			case rpcResp := <-inp.respCh:
+				// Copy the result back
+				*inp.future.resp = *rpcResp.Response.(*AppendEntriesResponse)
+				inp.future.respond(rpcResp.Error)
+
+				select {
+				case i.doneCh <- inp.future:
+				case <-i.shutdownCh:
+					return
+				}
+
+			case <-timeoutCh:
+				inp.future.respond(fmt.Errorf("command timed out"))
+				select {
+				case i.doneCh <- inp.future:
+				case <-i.shutdownCh:
+					return
+				}
+
+			case <-i.shutdownCh:
+				return
+			}
+		case <-i.shutdownCh:
+			return
+		}
+	}
+}
+
+func (i *inmemPipeline) AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error) {
+	// Create a new future
+	future := &appendFuture{
+		start: time.Now(),
+		args:  args,
+		resp:  resp,
+	}
+	future.init()
+
+	// Handle a timeout
+	var timeout <-chan time.Time
+	if i.trans.timeout > 0 {
+		timeout = time.After(i.trans.timeout)
+	}
+
+	// Send the RPC over
+	respCh := make(chan RPCResponse, 1)
+	rpc := RPC{
+		Command:  args,
+		RespChan: respCh,
+	}
+	select {
+	case i.peer.consumerCh <- rpc:
+	case <-timeout:
+		return nil, fmt.Errorf("command enqueue timeout")
+	case <-i.shutdownCh:
+		return nil, ErrPipelineShutdown
+	}
+
+	// Send to be decoded
+	select {
+	case i.inprogressCh <- &inmemPipelineInflight{future, respCh}:
+		return future, nil
+	case <-i.shutdownCh:
+		return nil, ErrPipelineShutdown
+	}
+}
+
+func (i *inmemPipeline) Consumer() <-chan AppendFuture {
+	return i.doneCh
+}
+
+func (i *inmemPipeline) Close() error {
+	i.shutdownLock.Lock()
+	defer i.shutdownLock.Unlock()
+	if i.shutdown {
+		return nil
+	}
+
+	i.shutdown = true
+	close(i.shutdownCh)
+	return nil
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/log.go b/Godeps/_workspace/src/github.com/hashicorp/raft/log.go
new file mode 100644
index 00000000000..a8c5a40eabf
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/log.go
@@ -0,0 +1,60 @@
+package raft
+
+// LogType describes various types of log entries.
+type LogType uint8
+
+const (
+	// LogCommand is applied to a user FSM.
+	LogCommand LogType = iota
+
+	// LogNoop is used to assert leadership.
+	LogNoop
+
+	// LogAddPeer is used to add a new peer.
+	LogAddPeer
+
+	// LogRemovePeer is used to remove an existing peer.
+	LogRemovePeer
+
+	// LogBarrier is used to ensure all preceding operations have been
+	// applied to the FSM. It is similar to LogNoop, but instead of returning
+	// once committed, it only returns once the FSM manager acks it. Otherwise
+	// it is possible there are operations committed but not yet applied to
+	// the FSM.
+	LogBarrier
+)
+
+// Log entries are replicated to all members of the Raft cluster
+// and form the heart of the replicated state machine.
+type Log struct {
+	Index uint64
+	Term  uint64
+	Type  LogType
+	Data  []byte
+
+	// peer is not exported since it is not transmitted, only used
+	// internally to construct the Data field.
+	peer string
+}
+
+// LogStore is used to provide an interface for storing
+// and retrieving logs in a durable fashion.
+type LogStore interface {
+	// Returns the first index written. 0 for no entries.
+	FirstIndex() (uint64, error)
+
+	// Returns the last index written. 0 for no entries.
+	LastIndex() (uint64, error)
+
+	// Gets a log entry at a given index.
+	GetLog(index uint64, log *Log) error
+
+	// Stores a log entry.
+	StoreLog(log *Log) error
+
+	// Stores multiple log entries.
+	StoreLogs(logs []*Log) error
+
+	// Deletes a range of log entries. The range is inclusive.
+	DeleteRange(min, max uint64) error
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/log_cache.go b/Godeps/_workspace/src/github.com/hashicorp/raft/log_cache.go
new file mode 100644
index 00000000000..952e98c2282
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/log_cache.go
@@ -0,0 +1,79 @@
+package raft
+
+import (
+	"fmt"
+	"sync"
+)
+
+// LogCache wraps any LogStore implementation to provide an
+// in-memory ring buffer. This is used to cache access to
+// the recently written entries. For implementations that do not
+// cache themselves, this can provide a substantial boost by
+// avoiding disk I/O on recent entries.
+type LogCache struct {
+	store LogStore
+
+	cache []*Log
+	l     sync.RWMutex
+}
+
+// NewLogCache is used to create a new LogCache with the
+// given capacity and backend store.
+func NewLogCache(capacity int, store LogStore) (*LogCache, error) {
+	if capacity <= 0 {
+		return nil, fmt.Errorf("capacity must be positive")
+	}
+	c := &LogCache{
+		store: store,
+		cache: make([]*Log, capacity),
+	}
+	return c, nil
+}
+
+func (c *LogCache) GetLog(idx uint64, log *Log) error {
+	// Check the buffer for an entry
+	c.l.RLock()
+	cached := c.cache[idx%uint64(len(c.cache))]
+	c.l.RUnlock()
+
+	// Check if entry is valid
+	if cached != nil && cached.Index == idx {
+		*log = *cached
+		return nil
+	}
+
+	// Forward request on cache miss
+	return c.store.GetLog(idx, log)
+}
+
+func (c *LogCache) StoreLog(log *Log) error {
+	return c.StoreLogs([]*Log{log})
+}
+
+func (c *LogCache) StoreLogs(logs []*Log) error {
+	// Insert the logs into the ring buffer
+	c.l.Lock()
+	for _, l := range logs {
+		c.cache[l.Index%uint64(len(c.cache))] = l
+	}
+	c.l.Unlock()
+
+	return c.store.StoreLogs(logs)
+}
+
+func (c *LogCache) FirstIndex() (uint64, error) {
+	return c.store.FirstIndex()
+}
+
+func (c *LogCache) LastIndex() (uint64, error) {
+	return c.store.LastIndex()
+}
+
+func (c *LogCache) DeleteRange(min, max uint64) error {
+	// Invalidate the cache on deletes
+	c.l.Lock()
+	c.cache = make([]*Log, len(c.cache))
+	c.l.Unlock()
+
+	return c.store.DeleteRange(min, max)
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/net_transport.go b/Godeps/_workspace/src/github.com/hashicorp/raft/net_transport.go
new file mode 100644
index 00000000000..9eb4fe054e8
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/net_transport.go
@@ -0,0 +1,622 @@
+package raft
+
+import (
+	"bufio"
+	"errors"
+	"fmt"
+	"io"
+	"log"
+	"net"
+	"os"
+	"sync"
+	"time"
+
+	"github.com/hashicorp/go-msgpack/codec"
+)
+
+const (
+	rpcAppendEntries uint8 = iota
+	rpcRequestVote
+	rpcInstallSnapshot
+
+	// DefaultTimeoutScale is the default TimeoutScale in a NetworkTransport.
+	DefaultTimeoutScale = 256 * 1024 // 256KB
+
+	// rpcMaxPipeline controls the maximum number of outstanding
+	// AppendEntries RPC calls.
+	rpcMaxPipeline = 128
+)
+
+var (
+	// ErrTransportShutdown is returned when operations on a transport are
+	// invoked after it's been terminated.
+	ErrTransportShutdown = errors.New("transport shutdown")
+
+	// ErrPipelineShutdown is returned when the pipeline is closed.
+	ErrPipelineShutdown = errors.New("append pipeline closed")
+)
+
+/*
+
+NetworkTransport provides a network based transport that can be
+used to communicate with Raft on remote machines. It requires
+an underlying stream layer to provide a stream abstraction, which can
+be simple TCP, TLS, etc.
+
+This transport is very simple and lightweight. Each RPC request is
+framed by sending a byte that indicates the message type, followed
+by the MsgPack encoded request.
+
+The response is an error string followed by the response object,
+both are encoded using MsgPack.
+
+InstallSnapshot is special, in that after the RPC request we stream
+the entire state. That socket is not re-used as the connection state
+is not known if there is an error.
+
+*/
+type NetworkTransport struct {
+	connPool     map[string][]*netConn
+	connPoolLock sync.Mutex
+
+	consumeCh chan RPC
+
+	heartbeatFn     func(RPC)
+	heartbeatFnLock sync.Mutex
+
+	logger *log.Logger
+
+	maxPool int
+
+	shutdown     bool
+	shutdownCh   chan struct{}
+	shutdownLock sync.Mutex
+
+	stream StreamLayer
+
+	timeout      time.Duration
+	TimeoutScale int
+}
+
+// StreamLayer is used with the NetworkTransport to provide
+// the low level stream abstraction.
+type StreamLayer interface {
+	net.Listener
+
+	// Dial is used to create a new outgoing connection
+	Dial(address string, timeout time.Duration) (net.Conn, error)
+}
+
+type netConn struct {
+	target string
+	conn   net.Conn
+	r      *bufio.Reader
+	w      *bufio.Writer
+	dec    *codec.Decoder
+	enc    *codec.Encoder
+}
+
+func (n *netConn) Release() error {
+	return n.conn.Close()
+}
+
+type netPipeline struct {
+	conn  *netConn
+	trans *NetworkTransport
+
+	doneCh       chan AppendFuture
+	inprogressCh chan *appendFuture
+
+	shutdown     bool
+	shutdownCh   chan struct{}
+	shutdownLock sync.Mutex
+}
+
+// NewNetworkTransport creates a new network transport with the given dialer
+// and listener. The maxPool controls how many connections we will pool. The
+// timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply
+// the timeout by (SnapshotSize / TimeoutScale).
+func NewNetworkTransport(
+	stream StreamLayer,
+	maxPool int,
+	timeout time.Duration,
+	logOutput io.Writer,
+) *NetworkTransport {
+	if logOutput == nil {
+		logOutput = os.Stderr
+	}
+	return NewNetworkTransportWithLogger(stream, maxPool, timeout, log.New(logOutput, "", log.LstdFlags))
+}
+
+// NewNetworkTransportWithLogger creates a new network transport with the given dialer
+// and listener. The maxPool controls how many connections we will pool. The
+// timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply
+// the timeout by (SnapshotSize / TimeoutScale).
+func NewNetworkTransportWithLogger(
+	stream StreamLayer,
+	maxPool int,
+	timeout time.Duration,
+	logger *log.Logger,
+) *NetworkTransport {
+	if logger == nil {
+		logger = log.New(os.Stderr, "", log.LstdFlags)
+	}
+	trans := &NetworkTransport{
+		connPool:     make(map[string][]*netConn),
+		consumeCh:    make(chan RPC),
+		logger:       logger,
+		maxPool:      maxPool,
+		shutdownCh:   make(chan struct{}),
+		stream:       stream,
+		timeout:      timeout,
+		TimeoutScale: DefaultTimeoutScale,
+	}
+	go trans.listen()
+	return trans
+}
+
+// SetHeartbeatHandler is used to setup a heartbeat handler
+// as a fast-pass. This is to avoid head-of-line blocking from
+// disk IO.
+func (n *NetworkTransport) SetHeartbeatHandler(cb func(rpc RPC)) {
+	n.heartbeatFnLock.Lock()
+	defer n.heartbeatFnLock.Unlock()
+	n.heartbeatFn = cb
+}
+
+// Close is used to stop the network transport.
+func (n *NetworkTransport) Close() error {
+	n.shutdownLock.Lock()
+	defer n.shutdownLock.Unlock()
+
+	if !n.shutdown {
+		close(n.shutdownCh)
+		n.stream.Close()
+		n.shutdown = true
+	}
+	return nil
+}
+
+// Consumer implements the Transport interface.
+func (n *NetworkTransport) Consumer() <-chan RPC {
+	return n.consumeCh
+}
+
+// LocalAddr implements the Transport interface.
+func (n *NetworkTransport) LocalAddr() string {
+	return n.stream.Addr().String()
+}
+
+// IsShutdown is used to check if the transport is shutdown.
+func (n *NetworkTransport) IsShutdown() bool {
+	select {
+	case <-n.shutdownCh:
+		return true
+	default:
+		return false
+	}
+}
+
+// getExistingConn is used to grab a pooled connection.
+func (n *NetworkTransport) getPooledConn(target string) *netConn {
+	n.connPoolLock.Lock()
+	defer n.connPoolLock.Unlock()
+
+	conns, ok := n.connPool[target]
+	if !ok || len(conns) == 0 {
+		return nil
+	}
+
+	var conn *netConn
+	num := len(conns)
+	conn, conns[num-1] = conns[num-1], nil
+	n.connPool[target] = conns[:num-1]
+	return conn
+}
+
+// getConn is used to get a connection from the pool.
+func (n *NetworkTransport) getConn(target string) (*netConn, error) {
+	// Check for a pooled conn
+	if conn := n.getPooledConn(target); conn != nil {
+		return conn, nil
+	}
+
+	// Dial a new connection
+	conn, err := n.stream.Dial(target, n.timeout)
+	if err != nil {
+		return nil, err
+	}
+
+	// Wrap the conn
+	netConn := &netConn{
+		target: target,
+		conn:   conn,
+		r:      bufio.NewReader(conn),
+		w:      bufio.NewWriter(conn),
+	}
+
+	// Setup encoder/decoders
+	netConn.dec = codec.NewDecoder(netConn.r, &codec.MsgpackHandle{})
+	netConn.enc = codec.NewEncoder(netConn.w, &codec.MsgpackHandle{})
+
+	// Done
+	return netConn, nil
+}
+
+// returnConn returns a connection back to the pool.
+func (n *NetworkTransport) returnConn(conn *netConn) {
+	n.connPoolLock.Lock()
+	defer n.connPoolLock.Unlock()
+
+	key := conn.target
+	conns, _ := n.connPool[key]
+
+	if !n.IsShutdown() && len(conns) < n.maxPool {
+		n.connPool[key] = append(conns, conn)
+	} else {
+		conn.Release()
+	}
+}
+
+// AppendEntriesPipeline returns an interface that can be used to pipeline
+// AppendEntries requests.
+func (n *NetworkTransport) AppendEntriesPipeline(target string) (AppendPipeline, error) {
+	// Get a connection
+	conn, err := n.getConn(target)
+	if err != nil {
+		return nil, err
+	}
+
+	// Create the pipeline
+	return newNetPipeline(n, conn), nil
+}
+
+// AppendEntries implements the Transport interface.
+func (n *NetworkTransport) AppendEntries(target string, args *AppendEntriesRequest, resp *AppendEntriesResponse) error {
+	return n.genericRPC(target, rpcAppendEntries, args, resp)
+}
+
+// RequestVote implements the Transport interface.
+func (n *NetworkTransport) RequestVote(target string, args *RequestVoteRequest, resp *RequestVoteResponse) error {
+	return n.genericRPC(target, rpcRequestVote, args, resp)
+}
+
+// genericRPC handles a simple request/response RPC.
+func (n *NetworkTransport) genericRPC(target string, rpcType uint8, args interface{}, resp interface{}) error {
+	// Get a conn
+	conn, err := n.getConn(target)
+	if err != nil {
+		return err
+	}
+
+	// Set a deadline
+	if n.timeout > 0 {
+		conn.conn.SetDeadline(time.Now().Add(n.timeout))
+	}
+
+	// Send the RPC
+	if err := sendRPC(conn, rpcType, args); err != nil {
+		return err
+	}
+
+	// Decode the response
+	canReturn, err := decodeResponse(conn, resp)
+	if canReturn {
+		n.returnConn(conn)
+	}
+	return err
+}
+
+// InstallSnapshot implements the Transport interface.
+func (n *NetworkTransport) InstallSnapshot(target string, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error {
+	// Get a conn, always close for InstallSnapshot
+	conn, err := n.getConn(target)
+	if err != nil {
+		return err
+	}
+	defer conn.Release()
+
+	// Set a deadline, scaled by request size
+	if n.timeout > 0 {
+		timeout := n.timeout * time.Duration(args.Size/int64(n.TimeoutScale))
+		if timeout < n.timeout {
+			timeout = n.timeout
+		}
+		conn.conn.SetDeadline(time.Now().Add(timeout))
+	}
+
+	// Send the RPC
+	if err := sendRPC(conn, rpcInstallSnapshot, args); err != nil {
+		return err
+	}
+
+	// Stream the state
+	if _, err := io.Copy(conn.w, data); err != nil {
+		return err
+	}
+
+	// Flush
+	if err := conn.w.Flush(); err != nil {
+		return err
+	}
+
+	// Decode the response, do not return conn
+	_, err = decodeResponse(conn, resp)
+	return err
+}
+
+// EncodePeer implements the Transport interface.
+func (n *NetworkTransport) EncodePeer(p string) []byte {
+	return []byte(p)
+}
+
+// DecodePeer implements the Transport interface.
+func (n *NetworkTransport) DecodePeer(buf []byte) string {
+	return string(buf)
+}
+
+// listen is used to handling incoming connections.
+func (n *NetworkTransport) listen() {
+	for {
+		// Accept incoming connections
+		conn, err := n.stream.Accept()
+		if err != nil {
+			if n.IsShutdown() {
+				return
+			}
+			n.logger.Printf("[ERR] raft-net: Failed to accept connection: %v", err)
+			continue
+		}
+		n.logger.Printf("[DEBUG] raft-net: %v accepted connection from: %v", n.LocalAddr(), conn.RemoteAddr())
+
+		// Handle the connection in dedicated routine
+		go n.handleConn(conn)
+	}
+}
+
+// handleConn is used to handle an inbound connection for its lifespan.
+func (n *NetworkTransport) handleConn(conn net.Conn) {
+	defer conn.Close()
+	r := bufio.NewReader(conn)
+	w := bufio.NewWriter(conn)
+	dec := codec.NewDecoder(r, &codec.MsgpackHandle{})
+	enc := codec.NewEncoder(w, &codec.MsgpackHandle{})
+
+	for {
+		if err := n.handleCommand(r, dec, enc); err != nil {
+			if err != io.EOF {
+				n.logger.Printf("[ERR] raft-net: Failed to decode incoming command: %v", err)
+			}
+			return
+		}
+		if err := w.Flush(); err != nil {
+			n.logger.Printf("[ERR] raft-net: Failed to flush response: %v", err)
+			return
+		}
+	}
+}
+
+// handleCommand is used to decode and dispatch a single command.
+func (n *NetworkTransport) handleCommand(r *bufio.Reader, dec *codec.Decoder, enc *codec.Encoder) error {
+	// Get the rpc type
+	rpcType, err := r.ReadByte()
+	if err != nil {
+		return err
+	}
+
+	// Create the RPC object
+	respCh := make(chan RPCResponse, 1)
+	rpc := RPC{
+		RespChan: respCh,
+	}
+
+	// Decode the command
+	isHeartbeat := false
+	switch rpcType {
+	case rpcAppendEntries:
+		var req AppendEntriesRequest
+		if err := dec.Decode(&req); err != nil {
+			return err
+		}
+		rpc.Command = &req
+
+		// Check if this is a heartbeat
+		if req.Term != 0 && req.Leader != nil &&
+			req.PrevLogEntry == 0 && req.PrevLogTerm == 0 &&
+			len(req.Entries) == 0 && req.LeaderCommitIndex == 0 {
+			isHeartbeat = true
+		}
+
+	case rpcRequestVote:
+		var req RequestVoteRequest
+		if err := dec.Decode(&req); err != nil {
+			return err
+		}
+		rpc.Command = &req
+
+	case rpcInstallSnapshot:
+		var req InstallSnapshotRequest
+		if err := dec.Decode(&req); err != nil {
+			return err
+		}
+		rpc.Command = &req
+		rpc.Reader = io.LimitReader(r, req.Size)
+
+	default:
+		return fmt.Errorf("unknown rpc type %d", rpcType)
+	}
+
+	// Check for heartbeat fast-path
+	if isHeartbeat {
+		n.heartbeatFnLock.Lock()
+		fn := n.heartbeatFn
+		n.heartbeatFnLock.Unlock()
+		if fn != nil {
+			fn(rpc)
+			goto RESP
+		}
+	}
+
+	// Dispatch the RPC
+	select {
+	case n.consumeCh <- rpc:
+	case <-n.shutdownCh:
+		return ErrTransportShutdown
+	}
+
+	// Wait for response
+RESP:
+	select {
+	case resp := <-respCh:
+		// Send the error first
+		respErr := ""
+		if resp.Error != nil {
+			respErr = resp.Error.Error()
+		}
+		if err := enc.Encode(respErr); err != nil {
+			return err
+		}
+
+		// Send the response
+		if err := enc.Encode(resp.Response); err != nil {
+			return err
+		}
+	case <-n.shutdownCh:
+		return ErrTransportShutdown
+	}
+	return nil
+}
+
+// decodeResponse is used to decode an RPC response and reports whether
+// the connection can be reused.
+func decodeResponse(conn *netConn, resp interface{}) (bool, error) {
+	// Decode the error if any
+	var rpcError string
+	if err := conn.dec.Decode(&rpcError); err != nil {
+		conn.Release()
+		return false, err
+	}
+
+	// Decode the response
+	if err := conn.dec.Decode(resp); err != nil {
+		conn.Release()
+		return false, err
+	}
+
+	// Format an error if any
+	if rpcError != "" {
+		return true, fmt.Errorf(rpcError)
+	}
+	return true, nil
+}
+
+// sendRPC is used to encode and send the RPC.
+func sendRPC(conn *netConn, rpcType uint8, args interface{}) error {
+	// Write the request type
+	if err := conn.w.WriteByte(rpcType); err != nil {
+		conn.Release()
+		return err
+	}
+
+	// Send the request
+	if err := conn.enc.Encode(args); err != nil {
+		conn.Release()
+		return err
+	}
+
+	// Flush
+	if err := conn.w.Flush(); err != nil {
+		conn.Release()
+		return err
+	}
+	return nil
+}
+
+// newNetPipeline is used to construct a netPipeline from a given
+// transport and connection.
+func newNetPipeline(trans *NetworkTransport, conn *netConn) *netPipeline {
+	n := &netPipeline{
+		conn:         conn,
+		trans:        trans,
+		doneCh:       make(chan AppendFuture, rpcMaxPipeline),
+		inprogressCh: make(chan *appendFuture, rpcMaxPipeline),
+		shutdownCh:   make(chan struct{}),
+	}
+	go n.decodeResponses()
+	return n
+}
+
+// decodeResponses is a long running routine that decodes the responses
+// sent on the connection.
+func (n *netPipeline) decodeResponses() {
+	timeout := n.trans.timeout
+	for {
+		select {
+		case future := <-n.inprogressCh:
+			if timeout > 0 {
+				n.conn.conn.SetReadDeadline(time.Now().Add(timeout))
+			}
+
+			_, err := decodeResponse(n.conn, future.resp)
+			future.respond(err)
+			select {
+			case n.doneCh <- future:
+			case <-n.shutdownCh:
+				return
+			}
+		case <-n.shutdownCh:
+			return
+		}
+	}
+}
+
+// AppendEntries is used to pipeline a new append entries request.
+func (n *netPipeline) AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error) {
+	// Create a new future
+	future := &appendFuture{
+		start: time.Now(),
+		args:  args,
+		resp:  resp,
+	}
+	future.init()
+
+	// Add a send timeout
+	if timeout := n.trans.timeout; timeout > 0 {
+		n.conn.conn.SetWriteDeadline(time.Now().Add(timeout))
+	}
+
+	// Send the RPC
+	if err := sendRPC(n.conn, rpcAppendEntries, future.args); err != nil {
+		return nil, err
+	}
+
+	// Hand-off for decoding, this can also cause back-pressure
+	// to prevent too many inflight requests
+	select {
+	case n.inprogressCh <- future:
+		return future, nil
+	case <-n.shutdownCh:
+		return nil, ErrPipelineShutdown
+	}
+}
+
+// Consumer returns a channel that can be used to consume complete futures.
+func (n *netPipeline) Consumer() <-chan AppendFuture {
+	return n.doneCh
+}
+
+// Closed is used to shutdown the pipeline connection.
+func (n *netPipeline) Close() error {
+	n.shutdownLock.Lock()
+	defer n.shutdownLock.Unlock()
+	if n.shutdown {
+		return nil
+	}
+
+	// Release the connection
+	n.conn.Release()
+
+	n.shutdown = true
+	close(n.shutdownCh)
+	return nil
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/peer.go b/Godeps/_workspace/src/github.com/hashicorp/raft/peer.go
new file mode 100644
index 00000000000..6f3bcf85645
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/peer.go
@@ -0,0 +1,122 @@
+package raft
+
+import (
+	"bytes"
+	"encoding/json"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"sync"
+)
+
+const (
+	jsonPeerPath = "peers.json"
+)
+
+// PeerStore provides an interface for persistent storage and
+// retrieval of peers. We use a separate interface than StableStore
+// since the peers may need to be edited by a human operator. For example,
+// in a two node cluster, the failure of either node requires human intervention
+// since consensus is impossible.
+type PeerStore interface {
+	// Peers returns the list of known peers.
+	Peers() ([]string, error)
+
+	// SetPeers sets the list of known peers. This is invoked when a peer is
+	// added or removed.
+	SetPeers([]string) error
+}
+
+// StaticPeers is used to provide a static list of peers.
+type StaticPeers struct {
+	StaticPeers []string
+	l           sync.Mutex
+}
+
+// Peers implements the PeerStore interface.
+func (s *StaticPeers) Peers() ([]string, error) {
+	s.l.Lock()
+	peers := s.StaticPeers
+	s.l.Unlock()
+	return peers, nil
+}
+
+// SetPeers implements the PeerStore interface.
+func (s *StaticPeers) SetPeers(p []string) error {
+	s.l.Lock()
+	s.StaticPeers = p
+	s.l.Unlock()
+	return nil
+}
+
+// JSONPeers is used to provide peer persistence on disk in the form
+// of a JSON file. This allows human operators to manipulate the file.
+type JSONPeers struct {
+	l     sync.Mutex
+	path  string
+	trans Transport
+}
+
+// NewJSONPeers creates a new JSONPeers store. Requires a transport
+// to handle the serialization of network addresses.
+func NewJSONPeers(base string, trans Transport) *JSONPeers {
+	path := filepath.Join(base, jsonPeerPath)
+	store := &JSONPeers{
+		path:  path,
+		trans: trans,
+	}
+	return store
+}
+
+// Peers implements the PeerStore interface.
+func (j *JSONPeers) Peers() ([]string, error) {
+	j.l.Lock()
+	defer j.l.Unlock()
+
+	// Read the file
+	buf, err := ioutil.ReadFile(j.path)
+	if err != nil && !os.IsNotExist(err) {
+		return nil, err
+	}
+
+	// Check for no peers
+	if len(buf) == 0 {
+		return nil, nil
+	}
+
+	// Decode the peers
+	var peerSet []string
+	dec := json.NewDecoder(bytes.NewReader(buf))
+	if err := dec.Decode(&peerSet); err != nil {
+		return nil, err
+	}
+
+	// Deserialize each peer
+	var peers []string
+	for _, p := range peerSet {
+		peers = append(peers, j.trans.DecodePeer([]byte(p)))
+	}
+	return peers, nil
+}
+
+// SetPeers implements the PeerStore interface.
+func (j *JSONPeers) SetPeers(peers []string) error {
+	j.l.Lock()
+	defer j.l.Unlock()
+
+	// Encode each peer
+	var peerSet []string
+	for _, p := range peers {
+		peerSet = append(peerSet, string(j.trans.EncodePeer(p)))
+	}
+
+	// Convert to JSON
+	var buf bytes.Buffer
+	enc := json.NewEncoder(&buf)
+	if err := enc.Encode(peerSet); err != nil {
+		return err
+	}
+
+	// Write out as JSON
+	return ioutil.WriteFile(j.path, buf.Bytes(), 0755)
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/raft.go b/Godeps/_workspace/src/github.com/hashicorp/raft/raft.go
new file mode 100644
index 00000000000..f7880ba9c97
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/raft.go
@@ -0,0 +1,1887 @@
+package raft
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"io"
+	"log"
+	"os"
+	"strconv"
+	"sync"
+	"time"
+
+	"github.com/armon/go-metrics"
+)
+
+const (
+	minCheckInterval = 10 * time.Millisecond
+)
+
+var (
+	keyCurrentTerm  = []byte("CurrentTerm")
+	keyLastVoteTerm = []byte("LastVoteTerm")
+	keyLastVoteCand = []byte("LastVoteCand")
+
+	// ErrLeader is returned when an operation can't be completed on a
+	// leader node.
+	ErrLeader = errors.New("node is the leader")
+
+	// ErrNotLeader is returned when an operation can't be completed on a
+	// follower or candidate node.
+	ErrNotLeader = errors.New("node is not the leader")
+
+	// ErrLeadershipLost is returned when a leader fails to commit a log entry
+	// because it's been deposed in the process.
+	ErrLeadershipLost = errors.New("leadership lost while committing log")
+
+	// ErrRaftShutdown is returned when operations are requested against an
+	// inactive Raft.
+	ErrRaftShutdown = errors.New("raft is already shutdown")
+
+	// ErrEnqueueTimeout is returned when a command fails due to a timeout.
+	ErrEnqueueTimeout = errors.New("timed out enqueuing operation")
+
+	// ErrKnownPeer is returned when trying to add a peer to the configuration
+	// that already exists.
+	ErrKnownPeer = errors.New("peer already known")
+
+	// ErrUnknownPeer is returned when trying to remove a peer from the
+	// configuration that doesn't exist.
+	ErrUnknownPeer = errors.New("peer is unknown")
+
+	// ErrNothingNewToSnapshot is returned when trying to create a snapshot
+	// but there's nothing new commited to the FSM since we started.
+	ErrNothingNewToSnapshot = errors.New("Nothing new to snapshot")
+)
+
+// commitTuple is used to send an index that was committed,
+// with an optional associated future that should be invoked.
+type commitTuple struct {
+	log    *Log
+	future *logFuture
+}
+
+// leaderState is state that is used while we are a leader.
+type leaderState struct {
+	commitCh  chan struct{}
+	inflight  *inflight
+	replState map[string]*followerReplication
+	notify    map[*verifyFuture]struct{}
+	stepDown  chan struct{}
+}
+
+// Raft implements a Raft node.
+type Raft struct {
+	raftState
+
+	// applyCh is used to async send logs to the main thread to
+	// be committed and applied to the FSM.
+	applyCh chan *logFuture
+
+	// Configuration provided at Raft initialization
+	conf *Config
+
+	// FSM is the client state machine to apply commands to
+	fsm FSM
+
+	// fsmCommitCh is used to trigger async application of logs to the fsm
+	fsmCommitCh chan commitTuple
+
+	// fsmRestoreCh is used to trigger a restore from snapshot
+	fsmRestoreCh chan *restoreFuture
+
+	// fsmSnapshotCh is used to trigger a new snapshot being taken
+	fsmSnapshotCh chan *reqSnapshotFuture
+
+	// lastContact is the last time we had contact from the
+	// leader node. This can be used to gauge staleness.
+	lastContact     time.Time
+	lastContactLock sync.RWMutex
+
+	// Leader is the current cluster leader
+	leader     string
+	leaderLock sync.RWMutex
+
+	// leaderCh is used to notify of leadership changes
+	leaderCh chan bool
+
+	// leaderState used only while state is leader
+	leaderState leaderState
+
+	// Stores our local addr
+	localAddr string
+
+	// Used for our logging
+	logger *log.Logger
+
+	// LogStore provides durable storage for logs
+	logs LogStore
+
+	// Track our known peers
+	peerCh    chan *peerFuture
+	peers     []string
+	peerStore PeerStore
+
+	// RPC chan comes from the transport layer
+	rpcCh <-chan RPC
+
+	// Shutdown channel to exit, protected to prevent concurrent exits
+	shutdown     bool
+	shutdownCh   chan struct{}
+	shutdownLock sync.Mutex
+
+	// snapshots is used to store and retrieve snapshots
+	snapshots SnapshotStore
+
+	// snapshotCh is used for user triggered snapshots
+	snapshotCh chan *snapshotFuture
+
+	// stable is a StableStore implementation for durable state
+	// It provides stable storage for many fields in raftState
+	stable StableStore
+
+	// The transport layer we use
+	trans Transport
+
+	// verifyCh is used to async send verify futures to the main thread
+	// to verify we are still the leader
+	verifyCh chan *verifyFuture
+}
+
+// NewRaft is used to construct a new Raft node. It takes a configuration, as well
+// as implementations of various interfaces that are required. If we have any old state,
+// such as snapshots, logs, peers, etc, all those will be restored when creating the
+// Raft node.
+func NewRaft(conf *Config, fsm FSM, logs LogStore, stable StableStore, snaps SnapshotStore,
+	peerStore PeerStore, trans Transport) (*Raft, error) {
+	// Validate the configuration
+	if err := ValidateConfig(conf); err != nil {
+		return nil, err
+	}
+
+	// Ensure we have a LogOutput
+	var logger *log.Logger
+	if conf.Logger != nil {
+		logger = conf.Logger
+	} else {
+		if conf.LogOutput == nil {
+			conf.LogOutput = os.Stderr
+		}
+		logger = log.New(conf.LogOutput, "", log.LstdFlags)
+	}
+
+	// Try to restore the current term
+	currentTerm, err := stable.GetUint64(keyCurrentTerm)
+	if err != nil && err.Error() != "not found" {
+		return nil, fmt.Errorf("failed to load current term: %v", err)
+	}
+
+	// Read the last log value
+	lastIdx, err := logs.LastIndex()
+	if err != nil {
+		return nil, fmt.Errorf("failed to find last log: %v", err)
+	}
+
+	// Get the log
+	var lastLog Log
+	if lastIdx > 0 {
+		if err := logs.GetLog(lastIdx, &lastLog); err != nil {
+			return nil, fmt.Errorf("failed to get last log: %v", err)
+		}
+	}
+
+	// Construct the list of peers that excludes us
+	localAddr := trans.LocalAddr()
+	peers, err := peerStore.Peers()
+	if err != nil {
+		return nil, fmt.Errorf("failed to get list of peers: %v", err)
+	}
+	peers = ExcludePeer(peers, localAddr)
+
+	// Create Raft struct
+	r := &Raft{
+		applyCh:       make(chan *logFuture),
+		conf:          conf,
+		fsm:           fsm,
+		fsmCommitCh:   make(chan commitTuple, 128),
+		fsmRestoreCh:  make(chan *restoreFuture),
+		fsmSnapshotCh: make(chan *reqSnapshotFuture),
+		leaderCh:      make(chan bool),
+		localAddr:     localAddr,
+		logger:        logger,
+		logs:          logs,
+		peerCh:        make(chan *peerFuture),
+		peers:         peers,
+		peerStore:     peerStore,
+		rpcCh:         trans.Consumer(),
+		snapshots:     snaps,
+		snapshotCh:    make(chan *snapshotFuture),
+		shutdownCh:    make(chan struct{}),
+		stable:        stable,
+		trans:         trans,
+		verifyCh:      make(chan *verifyFuture, 64),
+	}
+
+	// Initialize as a follower
+	r.setState(Follower)
+
+	// Start as leader if specified. This should only be used
+	// for testing purposes.
+	if conf.StartAsLeader {
+		r.setState(Leader)
+		r.setLeader(r.localAddr)
+	}
+
+	// Restore the current term and the last log
+	r.setCurrentTerm(currentTerm)
+	r.setLastLogIndex(lastLog.Index)
+	r.setLastLogTerm(lastLog.Term)
+
+	// Attempt to restore a snapshot if there are any
+	if err := r.restoreSnapshot(); err != nil {
+		return nil, err
+	}
+
+	// Setup a heartbeat fast-path to avoid head-of-line
+	// blocking where possible. It MUST be safe for this
+	// to be called concurrently with a blocking RPC.
+	trans.SetHeartbeatHandler(r.processHeartbeat)
+
+	// Start the background work
+	r.goFunc(r.run)
+	r.goFunc(r.runFSM)
+	r.goFunc(r.runSnapshots)
+	return r, nil
+}
+
+// Leader is used to return the current leader of the cluster.
+// It may return empty string if there is no current leader
+// or the leader is unknown.
+func (r *Raft) Leader() string {
+	r.leaderLock.RLock()
+	leader := r.leader
+	r.leaderLock.RUnlock()
+	return leader
+}
+
+// setLeader is used to modify the current leader of the cluster
+func (r *Raft) setLeader(leader string) {
+	r.leaderLock.Lock()
+	r.leader = leader
+	r.leaderLock.Unlock()
+}
+
+// Apply is used to apply a command to the FSM in a highly consistent
+// manner. This returns a future that can be used to wait on the application.
+// An optional timeout can be provided to limit the amount of time we wait
+// for the command to be started. This must be run on the leader or it
+// will fail.
+func (r *Raft) Apply(cmd []byte, timeout time.Duration) ApplyFuture {
+	metrics.IncrCounter([]string{"raft", "apply"}, 1)
+	var timer <-chan time.Time
+	if timeout > 0 {
+		timer = time.After(timeout)
+	}
+
+	// Create a log future, no index or term yet
+	logFuture := &logFuture{
+		log: Log{
+			Type: LogCommand,
+			Data: cmd,
+		},
+	}
+	logFuture.init()
+
+	select {
+	case <-timer:
+		return errorFuture{ErrEnqueueTimeout}
+	case <-r.shutdownCh:
+		return errorFuture{ErrRaftShutdown}
+	case r.applyCh <- logFuture:
+		return logFuture
+	}
+}
+
+// Barrier is used to issue a command that blocks until all preceeding
+// operations have been applied to the FSM. It can be used to ensure the
+// FSM reflects all queued writes. An optional timeout can be provided to
+// limit the amount of time we wait for the command to be started. This
+// must be run on the leader or it will fail.
+func (r *Raft) Barrier(timeout time.Duration) Future {
+	metrics.IncrCounter([]string{"raft", "barrier"}, 1)
+	var timer <-chan time.Time
+	if timeout > 0 {
+		timer = time.After(timeout)
+	}
+
+	// Create a log future, no index or term yet
+	logFuture := &logFuture{
+		log: Log{
+			Type: LogBarrier,
+		},
+	}
+	logFuture.init()
+
+	select {
+	case <-timer:
+		return errorFuture{ErrEnqueueTimeout}
+	case <-r.shutdownCh:
+		return errorFuture{ErrRaftShutdown}
+	case r.applyCh <- logFuture:
+		return logFuture
+	}
+}
+
+// VerifyLeader is used to ensure the current node is still
+// the leader. This can be done to prevent stale reads when a
+// new leader has potentially been elected.
+func (r *Raft) VerifyLeader() Future {
+	metrics.IncrCounter([]string{"raft", "verify_leader"}, 1)
+	verifyFuture := &verifyFuture{}
+	verifyFuture.init()
+	select {
+	case <-r.shutdownCh:
+		return errorFuture{ErrRaftShutdown}
+	case r.verifyCh <- verifyFuture:
+		return verifyFuture
+	}
+}
+
+// AddPeer is used to add a new peer into the cluster. This must be
+// run on the leader or it will fail.
+func (r *Raft) AddPeer(peer string) Future {
+	logFuture := &logFuture{
+		log: Log{
+			Type: LogAddPeer,
+			peer: peer,
+		},
+	}
+	logFuture.init()
+	select {
+	case r.applyCh <- logFuture:
+		return logFuture
+	case <-r.shutdownCh:
+		return errorFuture{ErrRaftShutdown}
+	}
+}
+
+// RemovePeer is used to remove a peer from the cluster. If the
+// current leader is being removed, it will cause a new election
+// to occur. This must be run on the leader or it will fail.
+func (r *Raft) RemovePeer(peer string) Future {
+	logFuture := &logFuture{
+		log: Log{
+			Type: LogRemovePeer,
+			peer: peer,
+		},
+	}
+	logFuture.init()
+	select {
+	case r.applyCh <- logFuture:
+		return logFuture
+	case <-r.shutdownCh:
+		return errorFuture{ErrRaftShutdown}
+	}
+}
+
+// SetPeers is used to forcibly replace the set of internal peers and
+// the peerstore with the ones specified. This can be considered unsafe.
+func (r *Raft) SetPeers(p []string) Future {
+	peerFuture := &peerFuture{
+		peers: p,
+	}
+	peerFuture.init()
+
+	select {
+	case r.peerCh <- peerFuture:
+		return peerFuture
+	case <-r.shutdownCh:
+		return errorFuture{ErrRaftShutdown}
+	}
+}
+
+// Shutdown is used to stop the Raft background routines.
+// This is not a graceful operation. Provides a future that
+// can be used to block until all background routines have exited.
+func (r *Raft) Shutdown() Future {
+	r.shutdownLock.Lock()
+	defer r.shutdownLock.Unlock()
+
+	if !r.shutdown {
+		close(r.shutdownCh)
+		r.shutdown = true
+		r.setState(Shutdown)
+	}
+
+	return &shutdownFuture{r}
+}
+
+// Snapshot is used to manually force Raft to take a snapshot.
+// Returns a future that can be used to block until complete.
+func (r *Raft) Snapshot() Future {
+	snapFuture := &snapshotFuture{}
+	snapFuture.init()
+	select {
+	case r.snapshotCh <- snapFuture:
+		return snapFuture
+	case <-r.shutdownCh:
+		return errorFuture{ErrRaftShutdown}
+	}
+
+}
+
+// State is used to return the current raft state.
+func (r *Raft) State() RaftState {
+	return r.getState()
+}
+
+// LeaderCh is used to get a channel which delivers signals on
+// acquiring or losing leadership. It sends true if we become
+// the leader, and false if we lose it. The channel is not buffered,
+// and does not block on writes.
+func (r *Raft) LeaderCh() <-chan bool {
+	return r.leaderCh
+}
+
+func (r *Raft) String() string {
+	return fmt.Sprintf("Node at %s [%v]", r.localAddr, r.getState())
+}
+
+// LastContact returns the time of last contact by a leader.
+// This only makes sense if we are currently a follower.
+func (r *Raft) LastContact() time.Time {
+	r.lastContactLock.RLock()
+	last := r.lastContact
+	r.lastContactLock.RUnlock()
+	return last
+}
+
+// Stats is used to return a map of various internal stats. This should only
+// be used for informative purposes or debugging.
+func (r *Raft) Stats() map[string]string {
+	toString := func(v uint64) string {
+		return strconv.FormatUint(v, 10)
+	}
+	s := map[string]string{
+		"state":               r.getState().String(),
+		"term":                toString(r.getCurrentTerm()),
+		"last_log_index":      toString(r.getLastLogIndex()),
+		"last_log_term":       toString(r.getLastLogTerm()),
+		"commit_index":        toString(r.getCommitIndex()),
+		"applied_index":       toString(r.getLastApplied()),
+		"fsm_pending":         toString(uint64(len(r.fsmCommitCh))),
+		"last_snapshot_index": toString(r.getLastSnapshotIndex()),
+		"last_snapshot_term":  toString(r.getLastSnapshotTerm()),
+		"num_peers":           toString(uint64(len(r.peers))),
+	}
+	last := r.LastContact()
+	if last.IsZero() {
+		s["last_contact"] = "never"
+	} else if r.getState() == Leader {
+		s["last_contact"] = "0"
+	} else {
+		s["last_contact"] = fmt.Sprintf("%v", time.Now().Sub(last))
+	}
+	return s
+}
+
+// LastIndex returns the last index in stable storage,
+// either from the last log or from the last snapshot.
+func (r *Raft) LastIndex() uint64 {
+	return r.getLastIndex()
+}
+
+// AppliedIndex returns the last index applied to the FSM.
+// This is generally lagging behind the last index, especially
+// for indexes that are persisted but have not yet been considered
+// committed by the leader.
+func (r *Raft) AppliedIndex() uint64 {
+	return r.getLastApplied()
+}
+
+// runFSM is a long running goroutine responsible for applying logs
+// to the FSM. This is done async of other logs since we don't want
+// the FSM to block our internal operations.
+func (r *Raft) runFSM() {
+	var lastIndex, lastTerm uint64
+	for {
+		select {
+		case req := <-r.fsmRestoreCh:
+			// Open the snapshot
+			meta, source, err := r.snapshots.Open(req.ID)
+			if err != nil {
+				req.respond(fmt.Errorf("failed to open snapshot %v: %v", req.ID, err))
+				continue
+			}
+
+			// Attempt to restore
+			start := time.Now()
+			if err := r.fsm.Restore(source); err != nil {
+				req.respond(fmt.Errorf("failed to restore snapshot %v: %v", req.ID, err))
+				source.Close()
+				continue
+			}
+			source.Close()
+			metrics.MeasureSince([]string{"raft", "fsm", "restore"}, start)
+
+			// Update the last index and term
+			lastIndex = meta.Index
+			lastTerm = meta.Term
+			req.respond(nil)
+
+		case req := <-r.fsmSnapshotCh:
+			// Is there something to snapshot?
+			if lastIndex == 0 {
+				req.respond(ErrNothingNewToSnapshot)
+				continue
+			}
+
+			// Get our peers
+			peers, err := r.peerStore.Peers()
+			if err != nil {
+				req.respond(err)
+				continue
+			}
+
+			// Start a snapshot
+			start := time.Now()
+			snap, err := r.fsm.Snapshot()
+			metrics.MeasureSince([]string{"raft", "fsm", "snapshot"}, start)
+
+			// Respond to the request
+			req.index = lastIndex
+			req.term = lastTerm
+			req.peers = peers
+			req.snapshot = snap
+			req.respond(err)
+
+		case commitTuple := <-r.fsmCommitCh:
+			// Apply the log if a command
+			var resp interface{}
+			if commitTuple.log.Type == LogCommand {
+				start := time.Now()
+				resp = r.fsm.Apply(commitTuple.log)
+				metrics.MeasureSince([]string{"raft", "fsm", "apply"}, start)
+			}
+
+			// Update the indexes
+			lastIndex = commitTuple.log.Index
+			lastTerm = commitTuple.log.Term
+
+			// Invoke the future if given
+			if commitTuple.future != nil {
+				commitTuple.future.response = resp
+				commitTuple.future.respond(nil)
+			}
+		case <-r.shutdownCh:
+			return
+		}
+	}
+}
+
+// run is a long running goroutine that runs the Raft FSM.
+func (r *Raft) run() {
+	for {
+		// Check if we are doing a shutdown
+		select {
+		case <-r.shutdownCh:
+			// Clear the leader to prevent forwarding
+			r.setLeader("")
+			return
+		default:
+		}
+
+		// Enter into a sub-FSM
+		switch r.getState() {
+		case Follower:
+			r.runFollower()
+		case Candidate:
+			r.runCandidate()
+		case Leader:
+			r.runLeader()
+		}
+	}
+}
+
+// runFollower runs the FSM for a follower.
+func (r *Raft) runFollower() {
+	didWarn := false
+	r.logger.Printf("[INFO] raft: %v entering Follower state", r)
+	metrics.IncrCounter([]string{"raft", "state", "follower"}, 1)
+	heartbeatTimer := randomTimeout(r.conf.HeartbeatTimeout)
+	for {
+		select {
+		case rpc := <-r.rpcCh:
+			r.processRPC(rpc)
+
+		case a := <-r.applyCh:
+			// Reject any operations since we are not the leader
+			a.respond(ErrNotLeader)
+
+		case v := <-r.verifyCh:
+			// Reject any operations since we are not the leader
+			v.respond(ErrNotLeader)
+
+		case p := <-r.peerCh:
+			// Set the peers
+			r.peers = ExcludePeer(p.peers, r.localAddr)
+			p.respond(r.peerStore.SetPeers(p.peers))
+
+		case <-heartbeatTimer:
+			// Restart the heartbeat timer
+			heartbeatTimer = randomTimeout(r.conf.HeartbeatTimeout)
+
+			// Check if we have had a successful contact
+			lastContact := r.LastContact()
+			if time.Now().Sub(lastContact) < r.conf.HeartbeatTimeout {
+				continue
+			}
+
+			// Heartbeat failed! Transition to the candidate state
+			r.setLeader("")
+			if len(r.peers) == 0 && !r.conf.EnableSingleNode {
+				if !didWarn {
+					r.logger.Printf("[WARN] raft: EnableSingleNode disabled, and no known peers. Aborting election.")
+					didWarn = true
+				}
+			} else {
+				r.logger.Printf("[WARN] raft: Heartbeat timeout reached, starting election")
+
+				metrics.IncrCounter([]string{"raft", "transition", "heartbeat_timout"}, 1)
+				r.setState(Candidate)
+				return
+			}
+
+		case <-r.shutdownCh:
+			return
+		}
+	}
+}
+
+// runCandidate runs the FSM for a candidate.
+func (r *Raft) runCandidate() {
+	r.logger.Printf("[INFO] raft: %v entering Candidate state", r)
+	metrics.IncrCounter([]string{"raft", "state", "candidate"}, 1)
+
+	// Start vote for us, and set a timeout
+	voteCh := r.electSelf()
+	electionTimer := randomTimeout(r.conf.ElectionTimeout)
+
+	// Tally the votes, need a simple majority
+	grantedVotes := 0
+	votesNeeded := r.quorumSize()
+	r.logger.Printf("[DEBUG] raft: Votes needed: %d", votesNeeded)
+
+	for r.getState() == Candidate {
+		select {
+		case rpc := <-r.rpcCh:
+			r.processRPC(rpc)
+
+		case vote := <-voteCh:
+			// Check if the term is greater than ours, bail
+			if vote.Term > r.getCurrentTerm() {
+				r.logger.Printf("[DEBUG] raft: Newer term discovered, fallback to follower")
+				r.setState(Follower)
+				r.setCurrentTerm(vote.Term)
+				return
+			}
+
+			// Check if the vote is granted
+			if vote.Granted {
+				grantedVotes++
+				r.logger.Printf("[DEBUG] raft: Vote granted from %s. Tally: %d", vote.voter, grantedVotes)
+			}
+
+			// Check if we've become the leader
+			if grantedVotes >= votesNeeded {
+				r.logger.Printf("[INFO] raft: Election won. Tally: %d", grantedVotes)
+				r.setState(Leader)
+				r.setLeader(r.localAddr)
+				return
+			}
+
+		case a := <-r.applyCh:
+			// Reject any operations since we are not the leader
+			a.respond(ErrNotLeader)
+
+		case v := <-r.verifyCh:
+			// Reject any operations since we are not the leader
+			v.respond(ErrNotLeader)
+
+		case p := <-r.peerCh:
+			// Set the peers
+			r.peers = ExcludePeer(p.peers, r.localAddr)
+			p.respond(r.peerStore.SetPeers(p.peers))
+			// Become a follower again
+			r.setState(Follower)
+			return
+
+		case <-electionTimer:
+			// Election failed! Restart the election. We simply return,
+			// which will kick us back into runCandidate
+			r.logger.Printf("[WARN] raft: Election timeout reached, restarting election")
+			return
+
+		case <-r.shutdownCh:
+			return
+		}
+	}
+}
+
+// runLeader runs the FSM for a leader. Do the setup here and drop into
+// the leaderLoop for the hot loop.
+func (r *Raft) runLeader() {
+	r.logger.Printf("[INFO] raft: %v entering Leader state", r)
+	metrics.IncrCounter([]string{"raft", "state", "leader"}, 1)
+
+	// Notify that we are the leader
+	asyncNotifyBool(r.leaderCh, true)
+
+	// Push to the notify channel if given
+	if notify := r.conf.NotifyCh; notify != nil {
+		select {
+		case notify <- true:
+		case <-r.shutdownCh:
+		}
+	}
+
+	// Setup leader state
+	r.leaderState.commitCh = make(chan struct{}, 1)
+	r.leaderState.inflight = newInflight(r.leaderState.commitCh)
+	r.leaderState.replState = make(map[string]*followerReplication)
+	r.leaderState.notify = make(map[*verifyFuture]struct{})
+	r.leaderState.stepDown = make(chan struct{}, 1)
+
+	// Cleanup state on step down
+	defer func() {
+		// Since we were the leader previously, we update our
+		// last contact time when we step down, so that we are not
+		// reporting a last contact time from before we were the
+		// leader. Otherwise, to a client it would seem our data
+		// is extremely stale.
+		r.setLastContact()
+
+		// Stop replication
+		for _, p := range r.leaderState.replState {
+			close(p.stopCh)
+		}
+
+		// Cancel inflight requests
+		r.leaderState.inflight.Cancel(ErrLeadershipLost)
+
+		// Respond to any pending verify requests
+		for future := range r.leaderState.notify {
+			future.respond(ErrLeadershipLost)
+		}
+
+		// Clear all the state
+		r.leaderState.commitCh = nil
+		r.leaderState.inflight = nil
+		r.leaderState.replState = nil
+		r.leaderState.notify = nil
+		r.leaderState.stepDown = nil
+
+		// If we are stepping down for some reason, no known leader.
+		// We may have stepped down due to an RPC call, which would
+		// provide the leader, so we cannot always blank this out.
+		r.leaderLock.Lock()
+		if r.leader == r.localAddr {
+			r.leader = ""
+		}
+		r.leaderLock.Unlock()
+
+		// Notify that we are not the leader
+		asyncNotifyBool(r.leaderCh, false)
+
+		// Push to the notify channel if given
+		if notify := r.conf.NotifyCh; notify != nil {
+			select {
+			case notify <- false:
+			case <-r.shutdownCh:
+				// On shutdown, make a best effort but do not block
+				select {
+				case notify <- false:
+				default:
+				}
+			}
+		}
+	}()
+
+	// Start a replication routine for each peer
+	for _, peer := range r.peers {
+		r.startReplication(peer)
+	}
+
+	// Dispatch a no-op log first. Instead of LogNoop,
+	// we use a LogAddPeer with our peerset. This acts like
+	// a no-op as well, but when doing an initial bootstrap, ensures
+	// that all nodes share a common peerset.
+	peerSet := append([]string{r.localAddr}, r.peers...)
+	noop := &logFuture{
+		log: Log{
+			Type: LogAddPeer,
+			Data: encodePeers(peerSet, r.trans),
+		},
+	}
+	r.dispatchLogs([]*logFuture{noop})
+
+	// Disable EnableSingleNode after we've been elected leader.
+	// This is to prevent a split brain in the future, if we are removed
+	// from the cluster and then elect ourself as leader.
+	if r.conf.DisableBootstrapAfterElect && r.conf.EnableSingleNode {
+		r.logger.Printf("[INFO] raft: Disabling EnableSingleNode (bootstrap)")
+		r.conf.EnableSingleNode = false
+	}
+
+	// Sit in the leader loop until we step down
+	r.leaderLoop()
+}
+
+// startReplication is a helper to setup state and start async replication to a peer.
+func (r *Raft) startReplication(peer string) {
+	lastIdx := r.getLastIndex()
+	s := &followerReplication{
+		peer:        peer,
+		inflight:    r.leaderState.inflight,
+		stopCh:      make(chan uint64, 1),
+		triggerCh:   make(chan struct{}, 1),
+		currentTerm: r.getCurrentTerm(),
+		matchIndex:  0,
+		nextIndex:   lastIdx + 1,
+		lastContact: time.Now(),
+		notifyCh:    make(chan struct{}, 1),
+		stepDown:    r.leaderState.stepDown,
+	}
+	r.leaderState.replState[peer] = s
+	r.goFunc(func() { r.replicate(s) })
+	asyncNotifyCh(s.triggerCh)
+}
+
+// leaderLoop is the hot loop for a leader. It is invoked
+// after all the various leader setup is done.
+func (r *Raft) leaderLoop() {
+	// stepDown is used to track if there is an inflight log that
+	// would cause us to lose leadership (specifically a RemovePeer of
+	// ourselves). If this is the case, we must not allow any logs to
+	// be processed in parallel, otherwise we are basing commit on
+	// only a single peer (ourself) and replicating to an undefined set
+	// of peers.
+	stepDown := false
+
+	lease := time.After(r.conf.LeaderLeaseTimeout)
+	for r.getState() == Leader {
+		select {
+		case rpc := <-r.rpcCh:
+			r.processRPC(rpc)
+
+		case <-r.leaderState.stepDown:
+			r.setState(Follower)
+
+		case <-r.leaderState.commitCh:
+			// Get the committed messages
+			committed := r.leaderState.inflight.Committed()
+			for e := committed.Front(); e != nil; e = e.Next() {
+				// Measure the commit time
+				commitLog := e.Value.(*logFuture)
+				metrics.MeasureSince([]string{"raft", "commitTime"}, commitLog.dispatch)
+
+				// Increment the commit index
+				idx := commitLog.log.Index
+				r.setCommitIndex(idx)
+				r.processLogs(idx, commitLog)
+			}
+
+		case v := <-r.verifyCh:
+			if v.quorumSize == 0 {
+				// Just dispatched, start the verification
+				r.verifyLeader(v)
+
+			} else if v.votes < v.quorumSize {
+				// Early return, means there must be a new leader
+				r.logger.Printf("[WARN] raft: New leader elected, stepping down")
+				r.setState(Follower)
+				delete(r.leaderState.notify, v)
+				v.respond(ErrNotLeader)
+
+			} else {
+				// Quorum of members agree, we are still leader
+				delete(r.leaderState.notify, v)
+				v.respond(nil)
+			}
+
+		case p := <-r.peerCh:
+			p.respond(ErrLeader)
+
+		case newLog := <-r.applyCh:
+			// Group commit, gather all the ready commits
+			ready := []*logFuture{newLog}
+			for i := 0; i < r.conf.MaxAppendEntries; i++ {
+				select {
+				case newLog := <-r.applyCh:
+					ready = append(ready, newLog)
+				default:
+					break
+				}
+			}
+
+			// Handle any peer set changes
+			n := len(ready)
+			for i := 0; i < n; i++ {
+				// Fail all future transactions once stepDown is on
+				if stepDown {
+					ready[i].respond(ErrNotLeader)
+					ready[i], ready[n-1] = ready[n-1], nil
+					n--
+					i--
+					continue
+				}
+
+				// Special case AddPeer and RemovePeer
+				log := ready[i]
+				if log.log.Type != LogAddPeer && log.log.Type != LogRemovePeer {
+					continue
+				}
+
+				// Check if this log should be ignored. The logs can be
+				// reordered here since we have not yet assigned an index
+				// and are not violating any promises.
+				if !r.preparePeerChange(log) {
+					ready[i], ready[n-1] = ready[n-1], nil
+					n--
+					i--
+					continue
+				}
+
+				// Apply peer set changes early and check if we will step
+				// down after the commit of this log. If so, we must not
+				// allow any future entries to make progress to avoid undefined
+				// behavior.
+				if ok := r.processLog(&log.log, nil, true); ok {
+					stepDown = true
+				}
+			}
+
+			// Nothing to do if all logs are invalid
+			if n == 0 {
+				continue
+			}
+
+			// Dispatch the logs
+			ready = ready[:n]
+			r.dispatchLogs(ready)
+
+		case <-lease:
+			// Check if we've exceeded the lease, potentially stepping down
+			maxDiff := r.checkLeaderLease()
+
+			// Next check interval should adjust for the last node we've
+			// contacted, without going negative
+			checkInterval := r.conf.LeaderLeaseTimeout - maxDiff
+			if checkInterval < minCheckInterval {
+				checkInterval = minCheckInterval
+			}
+
+			// Renew the lease timer
+			lease = time.After(checkInterval)
+
+		case <-r.shutdownCh:
+			return
+		}
+	}
+}
+
+// verifyLeader must be called from the main thread for safety.
+// Causes the followers to attempt an immediate heartbeat.
+func (r *Raft) verifyLeader(v *verifyFuture) {
+	// Current leader always votes for self
+	v.votes = 1
+
+	// Set the quorum size, hot-path for single node
+	v.quorumSize = r.quorumSize()
+	if v.quorumSize == 1 {
+		v.respond(nil)
+		return
+	}
+
+	// Track this request
+	v.notifyCh = r.verifyCh
+	r.leaderState.notify[v] = struct{}{}
+
+	// Trigger immediate heartbeats
+	for _, repl := range r.leaderState.replState {
+		repl.notifyLock.Lock()
+		repl.notify = append(repl.notify, v)
+		repl.notifyLock.Unlock()
+		asyncNotifyCh(repl.notifyCh)
+	}
+}
+
+// checkLeaderLease is used to check if we can contact a quorum of nodes
+// within the last leader lease interval. If not, we need to step down,
+// as we may have lost connectivity. Returns the maximum duration without
+// contact.
+func (r *Raft) checkLeaderLease() time.Duration {
+	// Track contacted nodes, we can always contact ourself
+	contacted := 1
+
+	// Check each follower
+	var maxDiff time.Duration
+	now := time.Now()
+	for peer, f := range r.leaderState.replState {
+		diff := now.Sub(f.LastContact())
+		if diff <= r.conf.LeaderLeaseTimeout {
+			contacted++
+			if diff > maxDiff {
+				maxDiff = diff
+			}
+		} else {
+			// Log at least once at high value, then debug. Otherwise it gets very verbose.
+			if diff <= 3*r.conf.LeaderLeaseTimeout {
+				r.logger.Printf("[WARN] raft: Failed to contact %v in %v", peer, diff)
+			} else {
+				r.logger.Printf("[DEBUG] raft: Failed to contact %v in %v", peer, diff)
+			}
+		}
+		metrics.AddSample([]string{"raft", "leader", "lastContact"}, float32(diff/time.Millisecond))
+	}
+
+	// Verify we can contact a quorum
+	quorum := r.quorumSize()
+	if contacted < quorum {
+		r.logger.Printf("[WARN] raft: Failed to contact quorum of nodes, stepping down")
+		r.setState(Follower)
+		metrics.IncrCounter([]string{"raft", "transition", "leader_lease_timeout"}, 1)
+	}
+	return maxDiff
+}
+
+// quorumSize is used to return the quorum size
+func (r *Raft) quorumSize() int {
+	return ((len(r.peers) + 1) / 2) + 1
+}
+
+// preparePeerChange checks if a LogAddPeer or LogRemovePeer should be performed,
+// and properly formats the data field on the log before dispatching it.
+func (r *Raft) preparePeerChange(l *logFuture) bool {
+	// Check if this is a known peer
+	p := l.log.peer
+	knownPeer := PeerContained(r.peers, p) || r.localAddr == p
+
+	// Ignore known peers on add
+	if l.log.Type == LogAddPeer && knownPeer {
+		l.respond(ErrKnownPeer)
+		return false
+	}
+
+	// Ignore unknown peers on remove
+	if l.log.Type == LogRemovePeer && !knownPeer {
+		l.respond(ErrUnknownPeer)
+		return false
+	}
+
+	// Construct the peer set
+	var peerSet []string
+	if l.log.Type == LogAddPeer {
+		peerSet = append([]string{p, r.localAddr}, r.peers...)
+	} else {
+		peerSet = ExcludePeer(append([]string{r.localAddr}, r.peers...), p)
+	}
+
+	// Setup the log
+	l.log.Data = encodePeers(peerSet, r.trans)
+	return true
+}
+
+// dispatchLog is called to push a log to disk, mark it
+// as inflight and begin replication of it.
+func (r *Raft) dispatchLogs(applyLogs []*logFuture) {
+	now := time.Now()
+	defer metrics.MeasureSince([]string{"raft", "leader", "dispatchLog"}, now)
+
+	term := r.getCurrentTerm()
+	lastIndex := r.getLastIndex()
+	logs := make([]*Log, len(applyLogs))
+
+	for idx, applyLog := range applyLogs {
+		applyLog.dispatch = now
+		applyLog.log.Index = lastIndex + uint64(idx) + 1
+		applyLog.log.Term = term
+		applyLog.policy = newMajorityQuorum(len(r.peers) + 1)
+		logs[idx] = &applyLog.log
+	}
+
+	// Write the log entry locally
+	if err := r.logs.StoreLogs(logs); err != nil {
+		r.logger.Printf("[ERR] raft: Failed to commit logs: %v", err)
+		for _, applyLog := range applyLogs {
+			applyLog.respond(err)
+		}
+		r.setState(Follower)
+		return
+	}
+
+	// Add this to the inflight logs, commit
+	r.leaderState.inflight.StartAll(applyLogs)
+
+	// Update the last log since it's on disk now
+	r.setLastLogIndex(lastIndex + uint64(len(applyLogs)))
+	r.setLastLogTerm(term)
+
+	// Notify the replicators of the new log
+	for _, f := range r.leaderState.replState {
+		asyncNotifyCh(f.triggerCh)
+	}
+}
+
+// processLogs is used to process all the logs from the lastApplied
+// up to the given index.
+func (r *Raft) processLogs(index uint64, future *logFuture) {
+	// Reject logs we've applied already
+	lastApplied := r.getLastApplied()
+	if index <= lastApplied {
+		r.logger.Printf("[WARN] raft: Skipping application of old log: %d", index)
+		return
+	}
+
+	// Apply all the preceding logs
+	for idx := r.getLastApplied() + 1; idx <= index; idx++ {
+		// Get the log, either from the future or from our log store
+		if future != nil && future.log.Index == idx {
+			r.processLog(&future.log, future, false)
+
+		} else {
+			l := new(Log)
+			if err := r.logs.GetLog(idx, l); err != nil {
+				r.logger.Printf("[ERR] raft: Failed to get log at %d: %v", idx, err)
+				panic(err)
+			}
+			r.processLog(l, nil, false)
+		}
+
+		// Update the lastApplied index and term
+		r.setLastApplied(idx)
+	}
+}
+
+// processLog is invoked to process the application of a single committed log.
+// Returns if this log entry would cause us to stepDown after it commits.
+func (r *Raft) processLog(l *Log, future *logFuture, precommit bool) (stepDown bool) {
+	switch l.Type {
+	case LogBarrier:
+		// Barrier is handled by the FSM
+		fallthrough
+
+	case LogCommand:
+		// Forward to the fsm handler
+		select {
+		case r.fsmCommitCh <- commitTuple{l, future}:
+		case <-r.shutdownCh:
+			if future != nil {
+				future.respond(ErrRaftShutdown)
+			}
+		}
+
+		// Return so that the future is only responded to
+		// by the FSM handler when the application is done
+		return
+
+	case LogAddPeer:
+		fallthrough
+	case LogRemovePeer:
+		peers := decodePeers(l.Data, r.trans)
+		r.logger.Printf("[DEBUG] raft: Node %v updated peer set (%v): %v", r.localAddr, l.Type, peers)
+
+		// If the peer set does not include us, remove all other peers
+		removeSelf := !PeerContained(peers, r.localAddr) && l.Type == LogRemovePeer
+		if removeSelf {
+			// Mark that this operation will cause us to step down as
+			// leader. This prevents the future logs from being Applied
+			// from this leader.
+			stepDown = true
+
+			// We only modify the peers after the commit, otherwise we
+			// would be using a quorum size of 1 for the RemovePeer operation.
+			// This is used with the stepDown guard to prevent any other logs.
+			if !precommit {
+				r.peers = nil
+				r.peerStore.SetPeers([]string{r.localAddr})
+			}
+		} else {
+			r.peers = ExcludePeer(peers, r.localAddr)
+			r.peerStore.SetPeers(peers)
+		}
+
+		// Handle replication if we are the leader
+		if r.getState() == Leader {
+			for _, p := range r.peers {
+				if _, ok := r.leaderState.replState[p]; !ok {
+					r.logger.Printf("[INFO] raft: Added peer %v, starting replication", p)
+					r.startReplication(p)
+				}
+			}
+		}
+
+		// Stop replication for old nodes
+		if r.getState() == Leader && !precommit {
+			var toDelete []string
+			for _, repl := range r.leaderState.replState {
+				if !PeerContained(r.peers, repl.peer) {
+					r.logger.Printf("[INFO] raft: Removed peer %v, stopping replication (Index: %d)", repl.peer, l.Index)
+
+					// Replicate up to this index and stop
+					repl.stopCh <- l.Index
+					close(repl.stopCh)
+					toDelete = append(toDelete, repl.peer)
+				}
+			}
+			for _, name := range toDelete {
+				delete(r.leaderState.replState, name)
+			}
+		}
+
+		// Handle removing ourself
+		if removeSelf && !precommit {
+			if r.conf.ShutdownOnRemove {
+				r.logger.Printf("[INFO] raft: Removed ourself, shutting down")
+				r.Shutdown()
+			} else {
+				r.logger.Printf("[INFO] raft: Removed ourself, transitioning to follower")
+				r.setState(Follower)
+			}
+		}
+
+	case LogNoop:
+		// Ignore the no-op
+	default:
+		r.logger.Printf("[ERR] raft: Got unrecognized log type: %#v", l)
+	}
+
+	// Invoke the future if given
+	if future != nil && !precommit {
+		future.respond(nil)
+	}
+	return
+}
+
+// processRPC is called to handle an incoming RPC request.
+func (r *Raft) processRPC(rpc RPC) {
+	switch cmd := rpc.Command.(type) {
+	case *AppendEntriesRequest:
+		r.appendEntries(rpc, cmd)
+	case *RequestVoteRequest:
+		r.requestVote(rpc, cmd)
+	case *InstallSnapshotRequest:
+		r.installSnapshot(rpc, cmd)
+	default:
+		r.logger.Printf("[ERR] raft: Got unexpected command: %#v", rpc.Command)
+		rpc.Respond(nil, fmt.Errorf("unexpected command"))
+	}
+}
+
+// processHeartbeat is a special handler used just for heartbeat requests
+// so that they can be fast-pathed if a transport supports it.
+func (r *Raft) processHeartbeat(rpc RPC) {
+	defer metrics.MeasureSince([]string{"raft", "rpc", "processHeartbeat"}, time.Now())
+
+	// Check if we are shutdown, just ignore the RPC
+	select {
+	case <-r.shutdownCh:
+		return
+	default:
+	}
+
+	// Ensure we are only handling a heartbeat
+	switch cmd := rpc.Command.(type) {
+	case *AppendEntriesRequest:
+		r.appendEntries(rpc, cmd)
+	default:
+		r.logger.Printf("[ERR] raft: Expected heartbeat, got command: %#v", rpc.Command)
+		rpc.Respond(nil, fmt.Errorf("unexpected command"))
+	}
+}
+
+// appendEntries is invoked when we get an append entries RPC call.
+func (r *Raft) appendEntries(rpc RPC, a *AppendEntriesRequest) {
+	defer metrics.MeasureSince([]string{"raft", "rpc", "appendEntries"}, time.Now())
+	// Setup a response
+	resp := &AppendEntriesResponse{
+		Term:           r.getCurrentTerm(),
+		LastLog:        r.getLastIndex(),
+		Success:        false,
+		NoRetryBackoff: false,
+	}
+	var rpcErr error
+	defer func() {
+		rpc.Respond(resp, rpcErr)
+	}()
+
+	// Ignore an older term
+	if a.Term < r.getCurrentTerm() {
+		return
+	}
+
+	// Increase the term if we see a newer one, also transition to follower
+	// if we ever get an appendEntries call
+	if a.Term > r.getCurrentTerm() || r.getState() != Follower {
+		// Ensure transition to follower
+		r.setState(Follower)
+		r.setCurrentTerm(a.Term)
+		resp.Term = a.Term
+	}
+
+	// Save the current leader
+	r.setLeader(r.trans.DecodePeer(a.Leader))
+
+	// Verify the last log entry
+	if a.PrevLogEntry > 0 {
+		lastIdx, lastTerm := r.getLastEntry()
+
+		var prevLogTerm uint64
+		if a.PrevLogEntry == lastIdx {
+			prevLogTerm = lastTerm
+
+		} else {
+			var prevLog Log
+			if err := r.logs.GetLog(a.PrevLogEntry, &prevLog); err != nil {
+				r.logger.Printf("[WARN] raft: Failed to get previous log: %d %v (last: %d)",
+					a.PrevLogEntry, err, lastIdx)
+				resp.NoRetryBackoff = true
+				return
+			}
+			prevLogTerm = prevLog.Term
+		}
+
+		if a.PrevLogTerm != prevLogTerm {
+			r.logger.Printf("[WARN] raft: Previous log term mis-match: ours: %d remote: %d",
+				prevLogTerm, a.PrevLogTerm)
+			resp.NoRetryBackoff = true
+			return
+		}
+	}
+
+	// Process any new entries
+	if n := len(a.Entries); n > 0 {
+		start := time.Now()
+		first := a.Entries[0]
+		last := a.Entries[n-1]
+
+		// Delete any conflicting entries
+		lastLogIdx := r.getLastLogIndex()
+		if first.Index <= lastLogIdx {
+			r.logger.Printf("[WARN] raft: Clearing log suffix from %d to %d", first.Index, lastLogIdx)
+			if err := r.logs.DeleteRange(first.Index, lastLogIdx); err != nil {
+				r.logger.Printf("[ERR] raft: Failed to clear log suffix: %v", err)
+				return
+			}
+		}
+
+		// Append the entry
+		if err := r.logs.StoreLogs(a.Entries); err != nil {
+			r.logger.Printf("[ERR] raft: Failed to append to logs: %v", err)
+			return
+		}
+
+		// Update the lastLog
+		r.setLastLogIndex(last.Index)
+		r.setLastLogTerm(last.Term)
+		metrics.MeasureSince([]string{"raft", "rpc", "appendEntries", "storeLogs"}, start)
+	}
+
+	// Update the commit index
+	if a.LeaderCommitIndex > 0 && a.LeaderCommitIndex > r.getCommitIndex() {
+		start := time.Now()
+		idx := min(a.LeaderCommitIndex, r.getLastIndex())
+		r.setCommitIndex(idx)
+		r.processLogs(idx, nil)
+		metrics.MeasureSince([]string{"raft", "rpc", "appendEntries", "processLogs"}, start)
+	}
+
+	// Everything went well, set success
+	resp.Success = true
+	r.setLastContact()
+	return
+}
+
+// requestVote is invoked when we get an request vote RPC call.
+func (r *Raft) requestVote(rpc RPC, req *RequestVoteRequest) {
+	defer metrics.MeasureSince([]string{"raft", "rpc", "requestVote"}, time.Now())
+	// Setup a response
+	resp := &RequestVoteResponse{
+		Term:    r.getCurrentTerm(),
+		Peers:   encodePeers(r.peers, r.trans),
+		Granted: false,
+	}
+	var rpcErr error
+	defer func() {
+		rpc.Respond(resp, rpcErr)
+	}()
+
+	// Check if we have an existing leader [who's not the candidate]
+	candidate := r.trans.DecodePeer(req.Candidate)
+	if leader := r.Leader(); leader != "" && leader != candidate {
+		r.logger.Printf("[WARN] raft: Rejecting vote request from %v since we have a leader: %v",
+			candidate, leader)
+		return
+	}
+
+	// Ignore an older term
+	if req.Term < r.getCurrentTerm() {
+		return
+	}
+
+	// Increase the term if we see a newer one
+	if req.Term > r.getCurrentTerm() {
+		// Ensure transition to follower
+		r.setState(Follower)
+		r.setCurrentTerm(req.Term)
+		resp.Term = req.Term
+	}
+
+	// Check if we have voted yet
+	lastVoteTerm, err := r.stable.GetUint64(keyLastVoteTerm)
+	if err != nil && err.Error() != "not found" {
+		r.logger.Printf("[ERR] raft: Failed to get last vote term: %v", err)
+		return
+	}
+	lastVoteCandBytes, err := r.stable.Get(keyLastVoteCand)
+	if err != nil && err.Error() != "not found" {
+		r.logger.Printf("[ERR] raft: Failed to get last vote candidate: %v", err)
+		return
+	}
+
+	// Check if we've voted in this election before
+	if lastVoteTerm == req.Term && lastVoteCandBytes != nil {
+		r.logger.Printf("[INFO] raft: Duplicate RequestVote for same term: %d", req.Term)
+		if bytes.Compare(lastVoteCandBytes, req.Candidate) == 0 {
+			r.logger.Printf("[WARN] raft: Duplicate RequestVote from candidate: %s", req.Candidate)
+			resp.Granted = true
+		}
+		return
+	}
+
+	// Reject if their term is older
+	lastIdx, lastTerm := r.getLastEntry()
+	if lastTerm > req.LastLogTerm {
+		r.logger.Printf("[WARN] raft: Rejecting vote request from %v since our last term is greater (%d, %d)",
+			candidate, lastTerm, req.LastLogTerm)
+		return
+	}
+
+	if lastIdx > req.LastLogIndex {
+		r.logger.Printf("[WARN] raft: Rejecting vote request from %v since our last index is greater (%d, %d)",
+			candidate, lastIdx, req.LastLogIndex)
+		return
+	}
+
+	// Persist a vote for safety
+	if err := r.persistVote(req.Term, req.Candidate); err != nil {
+		r.logger.Printf("[ERR] raft: Failed to persist vote: %v", err)
+		return
+	}
+
+	resp.Granted = true
+	return
+}
+
+// installSnapshot is invoked when we get a InstallSnapshot RPC call.
+// We must be in the follower state for this, since it means we are
+// too far behind a leader for log replay.
+func (r *Raft) installSnapshot(rpc RPC, req *InstallSnapshotRequest) {
+	defer metrics.MeasureSince([]string{"raft", "rpc", "installSnapshot"}, time.Now())
+	// Setup a response
+	resp := &InstallSnapshotResponse{
+		Term:    r.getCurrentTerm(),
+		Success: false,
+	}
+	var rpcErr error
+	defer func() {
+		rpc.Respond(resp, rpcErr)
+	}()
+
+	// Ignore an older term
+	if req.Term < r.getCurrentTerm() {
+		return
+	}
+
+	// Increase the term if we see a newer one
+	if req.Term > r.getCurrentTerm() {
+		// Ensure transition to follower
+		r.setState(Follower)
+		r.setCurrentTerm(req.Term)
+		resp.Term = req.Term
+	}
+
+	// Save the current leader
+	r.setLeader(r.trans.DecodePeer(req.Leader))
+
+	// Create a new snapshot
+	sink, err := r.snapshots.Create(req.LastLogIndex, req.LastLogTerm, req.Peers)
+	if err != nil {
+		r.logger.Printf("[ERR] raft: Failed to create snapshot to install: %v", err)
+		rpcErr = fmt.Errorf("failed to create snapshot: %v", err)
+		return
+	}
+
+	// Spill the remote snapshot to disk
+	n, err := io.Copy(sink, rpc.Reader)
+	if err != nil {
+		sink.Cancel()
+		r.logger.Printf("[ERR] raft: Failed to copy snapshot: %v", err)
+		rpcErr = err
+		return
+	}
+
+	// Check that we received it all
+	if n != req.Size {
+		sink.Cancel()
+		r.logger.Printf("[ERR] raft: Failed to receive whole snapshot: %d / %d", n, req.Size)
+		rpcErr = fmt.Errorf("short read")
+		return
+	}
+
+	// Finalize the snapshot
+	if err := sink.Close(); err != nil {
+		r.logger.Printf("[ERR] raft: Failed to finalize snapshot: %v", err)
+		rpcErr = err
+		return
+	}
+	r.logger.Printf("[INFO] raft: Copied %d bytes to local snapshot", n)
+
+	// Restore snapshot
+	future := &restoreFuture{ID: sink.ID()}
+	future.init()
+	select {
+	case r.fsmRestoreCh <- future:
+	case <-r.shutdownCh:
+		future.respond(ErrRaftShutdown)
+		return
+	}
+
+	// Wait for the restore to happen
+	if err := future.Error(); err != nil {
+		r.logger.Printf("[ERR] raft: Failed to restore snapshot: %v", err)
+		rpcErr = err
+		return
+	}
+
+	// Update the lastApplied so we don't replay old logs
+	r.setLastApplied(req.LastLogIndex)
+
+	// Update the last stable snapshot info
+	r.setLastSnapshotIndex(req.LastLogIndex)
+	r.setLastSnapshotTerm(req.LastLogTerm)
+
+	// Restore the peer set
+	peers := decodePeers(req.Peers, r.trans)
+	r.peers = ExcludePeer(peers, r.localAddr)
+	r.peerStore.SetPeers(peers)
+
+	// Compact logs, continue even if this fails
+	if err := r.compactLogs(req.LastLogIndex); err != nil {
+		r.logger.Printf("[ERR] raft: Failed to compact logs: %v", err)
+	}
+
+	r.logger.Printf("[INFO] raft: Installed remote snapshot")
+	resp.Success = true
+	r.setLastContact()
+	return
+}
+
+// setLastContact is used to set the last contact time to now
+func (r *Raft) setLastContact() {
+	r.lastContactLock.Lock()
+	r.lastContact = time.Now()
+	r.lastContactLock.Unlock()
+}
+
+type voteResult struct {
+	RequestVoteResponse
+	voter string
+}
+
+// electSelf is used to send a RequestVote RPC to all peers,
+// and vote for ourself. This has the side affecting of incrementing
+// the current term. The response channel returned is used to wait
+// for all the responses (including a vote for ourself).
+func (r *Raft) electSelf() <-chan *voteResult {
+	// Create a response channel
+	respCh := make(chan *voteResult, len(r.peers)+1)
+
+	// Increment the term
+	r.setCurrentTerm(r.getCurrentTerm() + 1)
+
+	// Construct the request
+	lastIdx, lastTerm := r.getLastEntry()
+	req := &RequestVoteRequest{
+		Term:         r.getCurrentTerm(),
+		Candidate:    r.trans.EncodePeer(r.localAddr),
+		LastLogIndex: lastIdx,
+		LastLogTerm:  lastTerm,
+	}
+
+	// Construct a function to ask for a vote
+	askPeer := func(peer string) {
+		r.goFunc(func() {
+			defer metrics.MeasureSince([]string{"raft", "candidate", "electSelf"}, time.Now())
+			resp := &voteResult{voter: peer}
+			err := r.trans.RequestVote(peer, req, &resp.RequestVoteResponse)
+			if err != nil {
+				r.logger.Printf("[ERR] raft: Failed to make RequestVote RPC to %v: %v", peer, err)
+				resp.Term = req.Term
+				resp.Granted = false
+			}
+
+			// If we are not a peer, we could have been removed but failed
+			// to receive the log message. OR it could mean an improperly configured
+			// cluster. Either way, we should warn
+			if err == nil {
+				peerSet := decodePeers(resp.Peers, r.trans)
+				if !PeerContained(peerSet, r.localAddr) {
+					r.logger.Printf("[WARN] raft: Remote peer %v does not have local node %v as a peer",
+						peer, r.localAddr)
+				}
+			}
+
+			respCh <- resp
+		})
+	}
+
+	// For each peer, request a vote
+	for _, peer := range r.peers {
+		askPeer(peer)
+	}
+
+	// Persist a vote for ourselves
+	if err := r.persistVote(req.Term, req.Candidate); err != nil {
+		r.logger.Printf("[ERR] raft: Failed to persist vote : %v", err)
+		return nil
+	}
+
+	// Include our own vote
+	respCh <- &voteResult{
+		RequestVoteResponse: RequestVoteResponse{
+			Term:    req.Term,
+			Granted: true,
+		},
+		voter: r.localAddr,
+	}
+	return respCh
+}
+
+// persistVote is used to persist our vote for safety.
+func (r *Raft) persistVote(term uint64, candidate []byte) error {
+	if err := r.stable.SetUint64(keyLastVoteTerm, term); err != nil {
+		return err
+	}
+	if err := r.stable.Set(keyLastVoteCand, candidate); err != nil {
+		return err
+	}
+	return nil
+}
+
+// setCurrentTerm is used to set the current term in a durable manner.
+func (r *Raft) setCurrentTerm(t uint64) {
+	// Persist to disk first
+	if err := r.stable.SetUint64(keyCurrentTerm, t); err != nil {
+		panic(fmt.Errorf("failed to save current term: %v", err))
+	}
+	r.raftState.setCurrentTerm(t)
+}
+
+// setState is used to update the current state. Any state
+// transition causes the known leader to be cleared. This means
+// that leader should be set only after updating the state.
+func (r *Raft) setState(state RaftState) {
+	r.setLeader("")
+	r.raftState.setState(state)
+}
+
+// runSnapshots is a long running goroutine used to manage taking
+// new snapshots of the FSM. It runs in parallel to the FSM and
+// main goroutines, so that snapshots do not block normal operation.
+func (r *Raft) runSnapshots() {
+	for {
+		select {
+		case <-randomTimeout(r.conf.SnapshotInterval):
+			// Check if we should snapshot
+			if !r.shouldSnapshot() {
+				continue
+			}
+
+			// Trigger a snapshot
+			if err := r.takeSnapshot(); err != nil {
+				r.logger.Printf("[ERR] raft: Failed to take snapshot: %v", err)
+			}
+
+		case future := <-r.snapshotCh:
+			// User-triggered, run immediately
+			err := r.takeSnapshot()
+			if err != nil {
+				r.logger.Printf("[ERR] raft: Failed to take snapshot: %v", err)
+			}
+			future.respond(err)
+
+		case <-r.shutdownCh:
+			return
+		}
+	}
+}
+
+// shouldSnapshot checks if we meet the conditions to take
+// a new snapshot.
+func (r *Raft) shouldSnapshot() bool {
+	// Check the last snapshot index
+	lastSnap := r.getLastSnapshotIndex()
+
+	// Check the last log index
+	lastIdx, err := r.logs.LastIndex()
+	if err != nil {
+		r.logger.Printf("[ERR] raft: Failed to get last log index: %v", err)
+		return false
+	}
+
+	// Compare the delta to the threshold
+	delta := lastIdx - lastSnap
+	return delta >= r.conf.SnapshotThreshold
+}
+
+// takeSnapshot is used to take a new snapshot.
+func (r *Raft) takeSnapshot() error {
+	defer metrics.MeasureSince([]string{"raft", "snapshot", "takeSnapshot"}, time.Now())
+	// Create a snapshot request
+	req := &reqSnapshotFuture{}
+	req.init()
+
+	// Wait for dispatch or shutdown
+	select {
+	case r.fsmSnapshotCh <- req:
+	case <-r.shutdownCh:
+		return ErrRaftShutdown
+	}
+
+	// Wait until we get a response
+	if err := req.Error(); err != nil {
+		if err != ErrNothingNewToSnapshot {
+			err = fmt.Errorf("failed to start snapshot: %v", err)
+		}
+		return err
+	}
+	defer req.snapshot.Release()
+
+	// Log that we are starting the snapshot
+	r.logger.Printf("[INFO] raft: Starting snapshot up to %d", req.index)
+
+	// Encode the peerset
+	peerSet := encodePeers(req.peers, r.trans)
+
+	// Create a new snapshot
+	start := time.Now()
+	sink, err := r.snapshots.Create(req.index, req.term, peerSet)
+	if err != nil {
+		return fmt.Errorf("failed to create snapshot: %v", err)
+	}
+	metrics.MeasureSince([]string{"raft", "snapshot", "create"}, start)
+
+	// Try to persist the snapshot
+	start = time.Now()
+	if err := req.snapshot.Persist(sink); err != nil {
+		sink.Cancel()
+		return fmt.Errorf("failed to persist snapshot: %v", err)
+	}
+	metrics.MeasureSince([]string{"raft", "snapshot", "persist"}, start)
+
+	// Close and check for error
+	if err := sink.Close(); err != nil {
+		return fmt.Errorf("failed to close snapshot: %v", err)
+	}
+
+	// Update the last stable snapshot info
+	r.setLastSnapshotIndex(req.index)
+	r.setLastSnapshotTerm(req.term)
+
+	// Compact the logs
+	if err := r.compactLogs(req.index); err != nil {
+		return err
+	}
+
+	// Log completion
+	r.logger.Printf("[INFO] raft: Snapshot to %d complete", req.index)
+	return nil
+}
+
+// compactLogs takes the last inclusive index of a snapshot
+// and trims the logs that are no longer needed.
+func (r *Raft) compactLogs(snapIdx uint64) error {
+	defer metrics.MeasureSince([]string{"raft", "compactLogs"}, time.Now())
+	// Determine log ranges to compact
+	minLog, err := r.logs.FirstIndex()
+	if err != nil {
+		return fmt.Errorf("failed to get first log index: %v", err)
+	}
+
+	// Check if we have enough logs to truncate
+	if r.getLastLogIndex() <= r.conf.TrailingLogs {
+		return nil
+	}
+
+	// Truncate up to the end of the snapshot, or `TrailingLogs`
+	// back from the head, which ever is further back. This ensures
+	// at least `TrailingLogs` entries, but does not allow logs
+	// after the snapshot to be removed.
+	maxLog := min(snapIdx, r.getLastLogIndex()-r.conf.TrailingLogs)
+
+	// Log this
+	r.logger.Printf("[INFO] raft: Compacting logs from %d to %d", minLog, maxLog)
+
+	// Compact the logs
+	if err := r.logs.DeleteRange(minLog, maxLog); err != nil {
+		return fmt.Errorf("log compaction failed: %v", err)
+	}
+	return nil
+}
+
+// restoreSnapshot attempts to restore the latest snapshots, and fails
+// if none of them can be restored. This is called at initialization time,
+// and is completely unsafe to call at any other time.
+func (r *Raft) restoreSnapshot() error {
+	snapshots, err := r.snapshots.List()
+	if err != nil {
+		r.logger.Printf("[ERR] raft: Failed to list snapshots: %v", err)
+		return err
+	}
+
+	// Try to load in order of newest to oldest
+	for _, snapshot := range snapshots {
+		_, source, err := r.snapshots.Open(snapshot.ID)
+		if err != nil {
+			r.logger.Printf("[ERR] raft: Failed to open snapshot %v: %v", snapshot.ID, err)
+			continue
+		}
+		defer source.Close()
+
+		if err := r.fsm.Restore(source); err != nil {
+			r.logger.Printf("[ERR] raft: Failed to restore snapshot %v: %v", snapshot.ID, err)
+			continue
+		}
+
+		// Log success
+		r.logger.Printf("[INFO] raft: Restored from snapshot %v", snapshot.ID)
+
+		// Update the lastApplied so we don't replay old logs
+		r.setLastApplied(snapshot.Index)
+
+		// Update the last stable snapshot info
+		r.setLastSnapshotIndex(snapshot.Index)
+		r.setLastSnapshotTerm(snapshot.Term)
+
+		// Success!
+		return nil
+	}
+
+	// If we had snapshots and failed to load them, its an error
+	if len(snapshots) > 0 {
+		return fmt.Errorf("failed to load any existing snapshots")
+	}
+	return nil
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/replication.go b/Godeps/_workspace/src/github.com/hashicorp/raft/replication.go
new file mode 100644
index 00000000000..6a01631d237
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/replication.go
@@ -0,0 +1,517 @@
+package raft
+
+import (
+	"errors"
+	"fmt"
+	"sync"
+	"time"
+
+	"github.com/armon/go-metrics"
+)
+
+const (
+	maxFailureScale = 12
+	failureWait     = 10 * time.Millisecond
+)
+
+var (
+	// ErrLogNotFound indicates a given log entry is not available.
+	ErrLogNotFound = errors.New("log not found")
+
+	// ErrPipelineReplicationNotSupported can be returned by the transport to
+	// signal that pipeline replication is not supported in general, and that
+	// no error message should be produced.
+	ErrPipelineReplicationNotSupported = errors.New("pipeline replication not supported")
+)
+
+type followerReplication struct {
+	peer     string
+	inflight *inflight
+
+	stopCh    chan uint64
+	triggerCh chan struct{}
+
+	currentTerm uint64
+	matchIndex  uint64
+	nextIndex   uint64
+
+	lastContact     time.Time
+	lastContactLock sync.RWMutex
+
+	failures uint64
+
+	notifyCh   chan struct{}
+	notify     []*verifyFuture
+	notifyLock sync.Mutex
+
+	// stepDown is used to indicate to the leader that we
+	// should step down based on information from a follower.
+	stepDown chan struct{}
+
+	// allowPipeline is used to control it seems like
+	// pipeline replication should be enabled.
+	allowPipeline bool
+}
+
+// notifyAll is used to notify all the waiting verify futures
+// if the follower believes we are still the leader.
+func (s *followerReplication) notifyAll(leader bool) {
+	// Clear the waiting notifies minimizing lock time
+	s.notifyLock.Lock()
+	n := s.notify
+	s.notify = nil
+	s.notifyLock.Unlock()
+
+	// Submit our votes
+	for _, v := range n {
+		v.vote(leader)
+	}
+}
+
+// LastContact returns the time of last contact.
+func (s *followerReplication) LastContact() time.Time {
+	s.lastContactLock.RLock()
+	last := s.lastContact
+	s.lastContactLock.RUnlock()
+	return last
+}
+
+// setLastContact sets the last contact to the current time.
+func (s *followerReplication) setLastContact() {
+	s.lastContactLock.Lock()
+	s.lastContact = time.Now()
+	s.lastContactLock.Unlock()
+}
+
+// replicate is a long running routine that is used to manage
+// the process of replicating logs to our followers.
+func (r *Raft) replicate(s *followerReplication) {
+	// Start an async heartbeating routing
+	stopHeartbeat := make(chan struct{})
+	defer close(stopHeartbeat)
+	r.goFunc(func() { r.heartbeat(s, stopHeartbeat) })
+
+RPC:
+	shouldStop := false
+	for !shouldStop {
+		select {
+		case maxIndex := <-s.stopCh:
+			// Make a best effort to replicate up to this index
+			if maxIndex > 0 {
+				r.replicateTo(s, maxIndex)
+			}
+			return
+		case <-s.triggerCh:
+			shouldStop = r.replicateTo(s, r.getLastLogIndex())
+		case <-randomTimeout(r.conf.CommitTimeout):
+			shouldStop = r.replicateTo(s, r.getLastLogIndex())
+		}
+
+		// If things looks healthy, switch to pipeline mode
+		if !shouldStop && s.allowPipeline {
+			goto PIPELINE
+		}
+	}
+	return
+
+PIPELINE:
+	// Disable until re-enabled
+	s.allowPipeline = false
+
+	// Replicates using a pipeline for high performance. This method
+	// is not able to gracefully recover from errors, and so we fall back
+	// to standard mode on failure.
+	if err := r.pipelineReplicate(s); err != nil {
+		if err != ErrPipelineReplicationNotSupported {
+			r.logger.Printf("[ERR] raft: Failed to start pipeline replication to %s: %s", s.peer, err)
+		}
+	}
+	goto RPC
+}
+
+// replicateTo is used to replicate the logs up to a given last index.
+// If the follower log is behind, we take care to bring them up to date.
+func (r *Raft) replicateTo(s *followerReplication, lastIndex uint64) (shouldStop bool) {
+	// Create the base request
+	var req AppendEntriesRequest
+	var resp AppendEntriesResponse
+	var start time.Time
+START:
+	// Prevent an excessive retry rate on errors
+	if s.failures > 0 {
+		select {
+		case <-time.After(backoff(failureWait, s.failures, maxFailureScale)):
+		case <-r.shutdownCh:
+		}
+	}
+
+	// Setup the request
+	if err := r.setupAppendEntries(s, &req, s.nextIndex, lastIndex); err == ErrLogNotFound {
+		goto SEND_SNAP
+	} else if err != nil {
+		return
+	}
+
+	// Make the RPC call
+	start = time.Now()
+	if err := r.trans.AppendEntries(s.peer, &req, &resp); err != nil {
+		r.logger.Printf("[ERR] raft: Failed to AppendEntries to %v: %v", s.peer, err)
+		s.failures++
+		return
+	}
+	appendStats(s.peer, start, float32(len(req.Entries)))
+
+	// Check for a newer term, stop running
+	if resp.Term > req.Term {
+		r.handleStaleTerm(s)
+		return true
+	}
+
+	// Update the last contact
+	s.setLastContact()
+
+	// Update s based on success
+	if resp.Success {
+		// Update our replication state
+		updateLastAppended(s, &req)
+
+		// Clear any failures, allow pipelining
+		s.failures = 0
+		s.allowPipeline = true
+	} else {
+		s.nextIndex = max(min(s.nextIndex-1, resp.LastLog+1), 1)
+		s.matchIndex = s.nextIndex - 1
+		if resp.NoRetryBackoff {
+			s.failures = 0
+		} else {
+			s.failures++
+		}
+		r.logger.Printf("[WARN] raft: AppendEntries to %v rejected, sending older logs (next: %d)", s.peer, s.nextIndex)
+	}
+
+CHECK_MORE:
+	// Check if there are more logs to replicate
+	if s.nextIndex <= lastIndex {
+		goto START
+	}
+	return
+
+	// SEND_SNAP is used when we fail to get a log, usually because the follower
+	// is too far behind, and we must ship a snapshot down instead
+SEND_SNAP:
+	if stop, err := r.sendLatestSnapshot(s); stop {
+		return true
+	} else if err != nil {
+		r.logger.Printf("[ERR] raft: Failed to send snapshot to %v: %v", s.peer, err)
+		return
+	}
+
+	// Check if there is more to replicate
+	goto CHECK_MORE
+}
+
+// sendLatestSnapshot is used to send the latest snapshot we have
+// down to our follower.
+func (r *Raft) sendLatestSnapshot(s *followerReplication) (bool, error) {
+	// Get the snapshots
+	snapshots, err := r.snapshots.List()
+	if err != nil {
+		r.logger.Printf("[ERR] raft: Failed to list snapshots: %v", err)
+		return false, err
+	}
+
+	// Check we have at least a single snapshot
+	if len(snapshots) == 0 {
+		return false, fmt.Errorf("no snapshots found")
+	}
+
+	// Open the most recent snapshot
+	snapID := snapshots[0].ID
+	meta, snapshot, err := r.snapshots.Open(snapID)
+	if err != nil {
+		r.logger.Printf("[ERR] raft: Failed to open snapshot %v: %v", snapID, err)
+		return false, err
+	}
+	defer snapshot.Close()
+
+	// Setup the request
+	req := InstallSnapshotRequest{
+		Term:         s.currentTerm,
+		Leader:       r.trans.EncodePeer(r.localAddr),
+		LastLogIndex: meta.Index,
+		LastLogTerm:  meta.Term,
+		Peers:        meta.Peers,
+		Size:         meta.Size,
+	}
+
+	// Make the call
+	start := time.Now()
+	var resp InstallSnapshotResponse
+	if err := r.trans.InstallSnapshot(s.peer, &req, &resp, snapshot); err != nil {
+		r.logger.Printf("[ERR] raft: Failed to install snapshot %v: %v", snapID, err)
+		s.failures++
+		return false, err
+	}
+	metrics.MeasureSince([]string{"raft", "replication", "installSnapshot", s.peer}, start)
+
+	// Check for a newer term, stop running
+	if resp.Term > req.Term {
+		r.handleStaleTerm(s)
+		return true, nil
+	}
+
+	// Update the last contact
+	s.setLastContact()
+
+	// Check for success
+	if resp.Success {
+		// Mark any inflight logs as committed
+		s.inflight.CommitRange(s.matchIndex+1, meta.Index)
+
+		// Update the indexes
+		s.matchIndex = meta.Index
+		s.nextIndex = s.matchIndex + 1
+
+		// Clear any failures
+		s.failures = 0
+
+		// Notify we are still leader
+		s.notifyAll(true)
+	} else {
+		s.failures++
+		r.logger.Printf("[WARN] raft: InstallSnapshot to %v rejected", s.peer)
+	}
+	return false, nil
+}
+
+// heartbeat is used to periodically invoke AppendEntries on a peer
+// to ensure they don't time out. This is done async of replicate(),
+// since that routine could potentially be blocked on disk IO.
+func (r *Raft) heartbeat(s *followerReplication, stopCh chan struct{}) {
+	var failures uint64
+	req := AppendEntriesRequest{
+		Term:   s.currentTerm,
+		Leader: r.trans.EncodePeer(r.localAddr),
+	}
+	var resp AppendEntriesResponse
+	for {
+		// Wait for the next heartbeat interval or forced notify
+		select {
+		case <-s.notifyCh:
+		case <-randomTimeout(r.conf.HeartbeatTimeout / 10):
+		case <-stopCh:
+			return
+		}
+
+		start := time.Now()
+		if err := r.trans.AppendEntries(s.peer, &req, &resp); err != nil {
+			r.logger.Printf("[ERR] raft: Failed to heartbeat to %v: %v", s.peer, err)
+			failures++
+			select {
+			case <-time.After(backoff(failureWait, failures, maxFailureScale)):
+			case <-stopCh:
+			}
+		} else {
+			s.setLastContact()
+			failures = 0
+			metrics.MeasureSince([]string{"raft", "replication", "heartbeat", s.peer}, start)
+			s.notifyAll(resp.Success)
+		}
+	}
+}
+
+// pipelineReplicate is used when we have synchronized our state with the follower,
+// and want to switch to a higher performance pipeline mode of replication.
+// We only pipeline AppendEntries commands, and if we ever hit an error, we fall
+// back to the standard replication which can handle more complex situations.
+func (r *Raft) pipelineReplicate(s *followerReplication) error {
+	// Create a new pipeline
+	pipeline, err := r.trans.AppendEntriesPipeline(s.peer)
+	if err != nil {
+		return err
+	}
+	defer pipeline.Close()
+
+	// Log start and stop of pipeline
+	r.logger.Printf("[INFO] raft: pipelining replication to peer %v", s.peer)
+	defer r.logger.Printf("[INFO] raft: aborting pipeline replication to peer %v", s.peer)
+
+	// Create a shutdown and finish channel
+	stopCh := make(chan struct{})
+	finishCh := make(chan struct{})
+
+	// Start a dedicated decoder
+	r.goFunc(func() { r.pipelineDecode(s, pipeline, stopCh, finishCh) })
+
+	// Start pipeline sends at the last good nextIndex
+	nextIndex := s.nextIndex
+
+	shouldStop := false
+SEND:
+	for !shouldStop {
+		select {
+		case <-finishCh:
+			break SEND
+		case maxIndex := <-s.stopCh:
+			if maxIndex > 0 {
+				r.pipelineSend(s, pipeline, &nextIndex, maxIndex)
+			}
+			break SEND
+		case <-s.triggerCh:
+			shouldStop = r.pipelineSend(s, pipeline, &nextIndex, r.getLastLogIndex())
+		case <-randomTimeout(r.conf.CommitTimeout):
+			shouldStop = r.pipelineSend(s, pipeline, &nextIndex, r.getLastLogIndex())
+		}
+	}
+
+	// Stop our decoder, and wait for it to finish
+	close(stopCh)
+	select {
+	case <-finishCh:
+	case <-r.shutdownCh:
+	}
+	return nil
+}
+
+// pipelineSend is used to send data over a pipeline.
+func (r *Raft) pipelineSend(s *followerReplication, p AppendPipeline, nextIdx *uint64, lastIndex uint64) (shouldStop bool) {
+	// Create a new append request
+	req := new(AppendEntriesRequest)
+	if err := r.setupAppendEntries(s, req, *nextIdx, lastIndex); err != nil {
+		return true
+	}
+
+	// Pipeline the append entries
+	if _, err := p.AppendEntries(req, new(AppendEntriesResponse)); err != nil {
+		r.logger.Printf("[ERR] raft: Failed to pipeline AppendEntries to %v: %v", s.peer, err)
+		return true
+	}
+
+	// Increase the next send log to avoid re-sending old logs
+	if n := len(req.Entries); n > 0 {
+		last := req.Entries[n-1]
+		*nextIdx = last.Index + 1
+	}
+	return false
+}
+
+// pipelineDecode is used to decode the responses of pipelined requests.
+func (r *Raft) pipelineDecode(s *followerReplication, p AppendPipeline, stopCh, finishCh chan struct{}) {
+	defer close(finishCh)
+	respCh := p.Consumer()
+	for {
+		select {
+		case ready := <-respCh:
+			req, resp := ready.Request(), ready.Response()
+			appendStats(s.peer, ready.Start(), float32(len(req.Entries)))
+
+			// Check for a newer term, stop running
+			if resp.Term > req.Term {
+				r.handleStaleTerm(s)
+				return
+			}
+
+			// Update the last contact
+			s.setLastContact()
+
+			// Abort pipeline if not successful
+			if !resp.Success {
+				return
+			}
+
+			// Update our replication state
+			updateLastAppended(s, req)
+		case <-stopCh:
+			return
+		}
+	}
+}
+
+// setupAppendEntries is used to setup an append entries request.
+func (r *Raft) setupAppendEntries(s *followerReplication, req *AppendEntriesRequest, nextIndex, lastIndex uint64) error {
+	req.Term = s.currentTerm
+	req.Leader = r.trans.EncodePeer(r.localAddr)
+	req.LeaderCommitIndex = r.getCommitIndex()
+	if err := r.setPreviousLog(req, nextIndex); err != nil {
+		return err
+	}
+	if err := r.setNewLogs(req, nextIndex, lastIndex); err != nil {
+		return err
+	}
+	return nil
+}
+
+// setPreviousLog is used to setup the PrevLogEntry and PrevLogTerm for an
+// AppendEntriesRequest given the next index to replicate.
+func (r *Raft) setPreviousLog(req *AppendEntriesRequest, nextIndex uint64) error {
+	// Guard for the first index, since there is no 0 log entry
+	// Guard against the previous index being a snapshot as well
+	if nextIndex == 1 {
+		req.PrevLogEntry = 0
+		req.PrevLogTerm = 0
+
+	} else if (nextIndex - 1) == r.getLastSnapshotIndex() {
+		req.PrevLogEntry = r.getLastSnapshotIndex()
+		req.PrevLogTerm = r.getLastSnapshotTerm()
+
+	} else {
+		var l Log
+		if err := r.logs.GetLog(nextIndex-1, &l); err != nil {
+			r.logger.Printf("[ERR] raft: Failed to get log at index %d: %v",
+				nextIndex-1, err)
+			return err
+		}
+
+		// Set the previous index and term (0 if nextIndex is 1)
+		req.PrevLogEntry = l.Index
+		req.PrevLogTerm = l.Term
+	}
+	return nil
+}
+
+// setNewLogs is used to setup the logs which should be appended for a request.
+func (r *Raft) setNewLogs(req *AppendEntriesRequest, nextIndex, lastIndex uint64) error {
+	// Append up to MaxAppendEntries or up to the lastIndex
+	req.Entries = make([]*Log, 0, r.conf.MaxAppendEntries)
+	maxIndex := min(nextIndex+uint64(r.conf.MaxAppendEntries)-1, lastIndex)
+	for i := nextIndex; i <= maxIndex; i++ {
+		oldLog := new(Log)
+		if err := r.logs.GetLog(i, oldLog); err != nil {
+			r.logger.Printf("[ERR] raft: Failed to get log at index %d: %v", i, err)
+			return err
+		}
+		req.Entries = append(req.Entries, oldLog)
+	}
+	return nil
+}
+
+// appendStats is used to emit stats about an AppendEntries invocation.
+func appendStats(peer string, start time.Time, logs float32) {
+	metrics.MeasureSince([]string{"raft", "replication", "appendEntries", "rpc", peer}, start)
+	metrics.IncrCounter([]string{"raft", "replication", "appendEntries", "logs", peer}, logs)
+}
+
+// handleStaleTerm is used when a follower indicates that we have a stale term.
+func (r *Raft) handleStaleTerm(s *followerReplication) {
+	r.logger.Printf("[ERR] raft: peer %v has newer term, stopping replication", s.peer)
+	s.notifyAll(false) // No longer leader
+	asyncNotifyCh(s.stepDown)
+}
+
+// updateLastAppended is used to update follower replication state after a successful
+// AppendEntries RPC.
+func updateLastAppended(s *followerReplication, req *AppendEntriesRequest) {
+	// Mark any inflight logs as committed
+	if logs := req.Entries; len(logs) > 0 {
+		first := logs[0]
+		last := logs[len(logs)-1]
+		s.inflight.CommitRange(first.Index, last.Index)
+
+		// Update the indexes
+		s.matchIndex = last.Index
+		s.nextIndex = last.Index + 1
+	}
+
+	// Notify still leader
+	s.notifyAll(true)
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/snapshot.go b/Godeps/_workspace/src/github.com/hashicorp/raft/snapshot.go
new file mode 100644
index 00000000000..7151f43ce26
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/snapshot.go
@@ -0,0 +1,40 @@
+package raft
+
+import (
+	"io"
+)
+
+// SnapshotMeta is for metadata of a snapshot.
+type SnapshotMeta struct {
+	ID    string // ID is opaque to the store, and is used for opening
+	Index uint64
+	Term  uint64
+	Peers []byte
+	Size  int64
+}
+
+// SnapshotStore interface is used to allow for flexible implementations
+// of snapshot storage and retrieval. For example, a client could implement
+// a shared state store such as S3, allowing new nodes to restore snapshots
+// without steaming from the leader.
+type SnapshotStore interface {
+	// Create is used to begin a snapshot at a given index and term,
+	// with the current peer set already encoded.
+	Create(index, term uint64, peers []byte) (SnapshotSink, error)
+
+	// List is used to list the available snapshots in the store.
+	// It should return then in descending order, with the highest index first.
+	List() ([]*SnapshotMeta, error)
+
+	// Open takes a snapshot ID and provides a ReadCloser. Once close is
+	// called it is assumed the snapshot is no longer needed.
+	Open(id string) (*SnapshotMeta, io.ReadCloser, error)
+}
+
+// SnapshotSink is returned by StartSnapshot. The FSM will Write state
+// to the sink and call Close on completion. On error, Cancel will be invoked.
+type SnapshotSink interface {
+	io.WriteCloser
+	ID() string
+	Cancel() error
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/stable.go b/Godeps/_workspace/src/github.com/hashicorp/raft/stable.go
new file mode 100644
index 00000000000..ff59a8c570a
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/stable.go
@@ -0,0 +1,15 @@
+package raft
+
+// StableStore is used to provide stable storage
+// of key configurations to ensure safety.
+type StableStore interface {
+	Set(key []byte, val []byte) error
+
+	// Get returns the value for key, or an empty byte slice if key was not found.
+	Get(key []byte) ([]byte, error)
+
+	SetUint64(key []byte, val uint64) error
+
+	// GetUint64 returns the uint64 value for key, or 0 if key was not found.
+	GetUint64(key []byte) (uint64, error)
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/state.go b/Godeps/_workspace/src/github.com/hashicorp/raft/state.go
new file mode 100644
index 00000000000..41e80a1b510
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/state.go
@@ -0,0 +1,169 @@
+package raft
+
+import (
+	"sync/atomic"
+)
+
+// RaftState captures the state of a Raft node: Follower, Candidate, Leader,
+// or Shutdown.
+type RaftState uint32
+
+const (
+	// Follower is the initial state of a Raft node.
+	Follower RaftState = iota
+
+	// Candidate is one of the valid states of a Raft node.
+	Candidate
+
+	// Leader is one of the valid states of a Raft node.
+	Leader
+
+	// Shutdown is the terminal state of a Raft node.
+	Shutdown
+)
+
+func (s RaftState) String() string {
+	switch s {
+	case Follower:
+		return "Follower"
+	case Candidate:
+		return "Candidate"
+	case Leader:
+		return "Leader"
+	case Shutdown:
+		return "Shutdown"
+	default:
+		return "Unknown"
+	}
+}
+
+// raftState is used to maintain various state variables
+// and provides an interface to set/get the variables in a
+// thread safe manner.
+type raftState struct {
+	// The current term, cache of StableStore
+	currentTerm uint64
+
+	// Cache the latest log from LogStore
+	LastLogIndex uint64
+	LastLogTerm  uint64
+
+	// Highest committed log entry
+	commitIndex uint64
+
+	// Last applied log to the FSM
+	lastApplied uint64
+
+	// Cache the latest snapshot index/term
+	lastSnapshotIndex uint64
+	lastSnapshotTerm  uint64
+
+	// Tracks the number of live routines
+	runningRoutines int32
+
+	// The current state
+	state RaftState
+}
+
+func (r *raftState) getState() RaftState {
+	stateAddr := (*uint32)(&r.state)
+	return RaftState(atomic.LoadUint32(stateAddr))
+}
+
+func (r *raftState) setState(s RaftState) {
+	stateAddr := (*uint32)(&r.state)
+	atomic.StoreUint32(stateAddr, uint32(s))
+}
+
+func (r *raftState) getCurrentTerm() uint64 {
+	return atomic.LoadUint64(&r.currentTerm)
+}
+
+func (r *raftState) setCurrentTerm(term uint64) {
+	atomic.StoreUint64(&r.currentTerm, term)
+}
+
+func (r *raftState) getLastLogIndex() uint64 {
+	return atomic.LoadUint64(&r.LastLogIndex)
+}
+
+func (r *raftState) setLastLogIndex(term uint64) {
+	atomic.StoreUint64(&r.LastLogIndex, term)
+}
+
+func (r *raftState) getLastLogTerm() uint64 {
+	return atomic.LoadUint64(&r.LastLogTerm)
+}
+
+func (r *raftState) setLastLogTerm(term uint64) {
+	atomic.StoreUint64(&r.LastLogTerm, term)
+}
+
+func (r *raftState) getCommitIndex() uint64 {
+	return atomic.LoadUint64(&r.commitIndex)
+}
+
+func (r *raftState) setCommitIndex(term uint64) {
+	atomic.StoreUint64(&r.commitIndex, term)
+}
+
+func (r *raftState) getLastApplied() uint64 {
+	return atomic.LoadUint64(&r.lastApplied)
+}
+
+func (r *raftState) setLastApplied(term uint64) {
+	atomic.StoreUint64(&r.lastApplied, term)
+}
+
+func (r *raftState) getLastSnapshotIndex() uint64 {
+	return atomic.LoadUint64(&r.lastSnapshotIndex)
+}
+
+func (r *raftState) setLastSnapshotIndex(term uint64) {
+	atomic.StoreUint64(&r.lastSnapshotIndex, term)
+}
+
+func (r *raftState) getLastSnapshotTerm() uint64 {
+	return atomic.LoadUint64(&r.lastSnapshotTerm)
+}
+
+func (r *raftState) setLastSnapshotTerm(term uint64) {
+	atomic.StoreUint64(&r.lastSnapshotTerm, term)
+}
+
+func (r *raftState) incrRoutines() {
+	atomic.AddInt32(&r.runningRoutines, 1)
+}
+
+func (r *raftState) decrRoutines() {
+	atomic.AddInt32(&r.runningRoutines, -1)
+}
+
+func (r *raftState) getRoutines() int32 {
+	return atomic.LoadInt32(&r.runningRoutines)
+}
+
+// Start a goroutine and properly handle the race between a routine
+// starting and incrementing, and exiting and decrementing.
+func (r *raftState) goFunc(f func()) {
+	r.incrRoutines()
+	go func() {
+		defer r.decrRoutines()
+		f()
+	}()
+}
+
+// getLastIndex returns the last index in stable storage.
+// Either from the last log or from the last snapshot.
+func (r *raftState) getLastIndex() uint64 {
+	return max(r.getLastLogIndex(), r.getLastSnapshotIndex())
+}
+
+// getLastEntry returns the last index and term in stable storage.
+// Either from the last log or from the last snapshot.
+func (r *raftState) getLastEntry() (uint64, uint64) {
+	if r.getLastLogIndex() >= r.getLastSnapshotIndex() {
+		return r.getLastLogIndex(), r.getLastLogTerm()
+	}
+	return r.getLastSnapshotIndex(), r.getLastSnapshotTerm()
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/tcp_transport.go b/Godeps/_workspace/src/github.com/hashicorp/raft/tcp_transport.go
new file mode 100644
index 00000000000..50c6d15df18
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/tcp_transport.go
@@ -0,0 +1,105 @@
+package raft
+
+import (
+	"errors"
+	"io"
+	"log"
+	"net"
+	"time"
+)
+
+var (
+	errNotAdvertisable = errors.New("local bind address is not advertisable")
+	errNotTCP          = errors.New("local address is not a TCP address")
+)
+
+// TCPStreamLayer implements StreamLayer interface for plain TCP.
+type TCPStreamLayer struct {
+	advertise net.Addr
+	listener  *net.TCPListener
+}
+
+// NewTCPTransport returns a NetworkTransport that is built on top of
+// a TCP streaming transport layer.
+func NewTCPTransport(
+	bindAddr string,
+	advertise net.Addr,
+	maxPool int,
+	timeout time.Duration,
+	logOutput io.Writer,
+) (*NetworkTransport, error) {
+	return newTCPTransport(bindAddr, advertise, maxPool, timeout, func(stream StreamLayer) *NetworkTransport {
+		return NewNetworkTransport(stream, maxPool, timeout, logOutput)
+	})
+}
+
+// NewTCPTransportWithLogger returns a NetworkTransport that is built on top of
+// a TCP streaming transport layer, with log output going to the supplied Logger
+func NewTCPTransportWithLogger(
+	bindAddr string,
+	advertise net.Addr,
+	maxPool int,
+	timeout time.Duration,
+	logger *log.Logger,
+) (*NetworkTransport, error) {
+	return newTCPTransport(bindAddr, advertise, maxPool, timeout, func(stream StreamLayer) *NetworkTransport {
+		return NewNetworkTransportWithLogger(stream, maxPool, timeout, logger)
+	})
+}
+
+func newTCPTransport(bindAddr string,
+	advertise net.Addr,
+	maxPool int,
+	timeout time.Duration,
+	transportCreator func(stream StreamLayer) *NetworkTransport) (*NetworkTransport, error) {
+	// Try to bind
+	list, err := net.Listen("tcp", bindAddr)
+	if err != nil {
+		return nil, err
+	}
+
+	// Create stream
+	stream := &TCPStreamLayer{
+		advertise: advertise,
+		listener:  list.(*net.TCPListener),
+	}
+
+	// Verify that we have a usable advertise address
+	addr, ok := stream.Addr().(*net.TCPAddr)
+	if !ok {
+		list.Close()
+		return nil, errNotTCP
+	}
+	if addr.IP.IsUnspecified() {
+		list.Close()
+		return nil, errNotAdvertisable
+	}
+
+	// Create the network transport
+	trans := transportCreator(stream)
+	return trans, nil
+}
+
+// Dial implements the StreamLayer interface.
+func (t *TCPStreamLayer) Dial(address string, timeout time.Duration) (net.Conn, error) {
+	return net.DialTimeout("tcp", address, timeout)
+}
+
+// Accept implements the net.Listener interface.
+func (t *TCPStreamLayer) Accept() (c net.Conn, err error) {
+	return t.listener.Accept()
+}
+
+// Close implements the net.Listener interface.
+func (t *TCPStreamLayer) Close() (err error) {
+	return t.listener.Close()
+}
+
+// Addr implements the net.Listener interface.
+func (t *TCPStreamLayer) Addr() net.Addr {
+	// Use an advertise addr if provided
+	if t.advertise != nil {
+		return t.advertise
+	}
+	return t.listener.Addr()
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/transport.go b/Godeps/_workspace/src/github.com/hashicorp/raft/transport.go
new file mode 100644
index 00000000000..8928de0c2fc
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/transport.go
@@ -0,0 +1,85 @@
+package raft
+
+import (
+	"io"
+	"time"
+)
+
+// RPCResponse captures both a response and a potential error.
+type RPCResponse struct {
+	Response interface{}
+	Error    error
+}
+
+// RPC has a command, and provides a response mechanism.
+type RPC struct {
+	Command  interface{}
+	Reader   io.Reader // Set only for InstallSnapshot
+	RespChan chan<- RPCResponse
+}
+
+// Respond is used to respond with a response, error or both
+func (r *RPC) Respond(resp interface{}, err error) {
+	r.RespChan <- RPCResponse{resp, err}
+}
+
+// Transport provides an interface for network transports
+// to allow Raft to communicate with other nodes.
+type Transport interface {
+	// Consumer returns a channel that can be used to
+	// consume and respond to RPC requests.
+	Consumer() <-chan RPC
+
+	// LocalAddr is used to return our local address to distinguish from our peers.
+	LocalAddr() string
+
+	// AppendEntriesPipeline returns an interface that can be used to pipeline
+	// AppendEntries requests.
+	AppendEntriesPipeline(target string) (AppendPipeline, error)
+
+	// AppendEntries sends the appropriate RPC to the target node.
+	AppendEntries(target string, args *AppendEntriesRequest, resp *AppendEntriesResponse) error
+
+	// RequestVote sends the appropriate RPC to the target node.
+	RequestVote(target string, args *RequestVoteRequest, resp *RequestVoteResponse) error
+
+	// InstallSnapshot is used to push a snapshot down to a follower. The data is read from
+	// the ReadCloser and streamed to the client.
+	InstallSnapshot(target string, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error
+
+	// EncodePeer is used to serialize a peer name.
+	EncodePeer(string) []byte
+
+	// DecodePeer is used to deserialize a peer name.
+	DecodePeer([]byte) string
+
+	// SetHeartbeatHandler is used to setup a heartbeat handler
+	// as a fast-pass. This is to avoid head-of-line blocking from
+	// disk IO. If a Transport does not support this, it can simply
+	// ignore the call, and push the heartbeat onto the Consumer channel.
+	SetHeartbeatHandler(cb func(rpc RPC))
+}
+
+// AppendPipeline is used for pipelining AppendEntries requests. It is used
+// to increase the replication throughput by masking latency and better
+// utilizing bandwidth.
+type AppendPipeline interface {
+	// AppendEntries is used to add another request to the pipeline.
+	// The send may block which is an effective form of back-pressure.
+	AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error)
+
+	// Consumer returns a channel that can be used to consume
+	// response futures when they are ready.
+	Consumer() <-chan AppendFuture
+
+	// Closes pipeline and cancels all inflight RPCs
+	Close() error
+}
+
+// AppendFuture is used to return information about a pipelined AppendEntries request.
+type AppendFuture interface {
+	Future
+	Start() time.Time
+	Request() *AppendEntriesRequest
+	Response() *AppendEntriesResponse
+}
diff --git a/Godeps/_workspace/src/github.com/hashicorp/raft/util.go b/Godeps/_workspace/src/github.com/hashicorp/raft/util.go
new file mode 100644
index 00000000000..a6642c4c9e6
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/hashicorp/raft/util.go
@@ -0,0 +1,200 @@
+package raft
+
+import (
+	"bytes"
+	crand "crypto/rand"
+	"encoding/binary"
+	"fmt"
+	"math"
+	"math/big"
+	"math/rand"
+	"time"
+
+	"github.com/hashicorp/go-msgpack/codec"
+)
+
+func init() {
+	// Ensure we use a high-entropy seed for the psuedo-random generator
+	rand.Seed(newSeed())
+}
+
+// returns an int64 from a crypto random source
+// can be used to seed a source for a math/rand.
+func newSeed() int64 {
+	r, err := crand.Int(crand.Reader, big.NewInt(math.MaxInt64))
+	if err != nil {
+		panic(fmt.Errorf("failed to read random bytes: %v", err))
+	}
+	return r.Int64()
+}
+
+// randomTimeout returns a value that is between the minVal and 2x minVal.
+func randomTimeout(minVal time.Duration) <-chan time.Time {
+	if minVal == 0 {
+		return nil
+	}
+	extra := (time.Duration(rand.Int63()) % minVal)
+	return time.After(minVal + extra)
+}
+
+// min returns the minimum.
+func min(a, b uint64) uint64 {
+	if a <= b {
+		return a
+	}
+	return b
+}
+
+// max returns the maximum.
+func max(a, b uint64) uint64 {
+	if a >= b {
+		return a
+	}
+	return b
+}
+
+// generateUUID is used to generate a random UUID.
+func generateUUID() string {
+	buf := make([]byte, 16)
+	if _, err := crand.Read(buf); err != nil {
+		panic(fmt.Errorf("failed to read random bytes: %v", err))
+	}
+
+	return fmt.Sprintf("%08x-%04x-%04x-%04x-%12x",
+		buf[0:4],
+		buf[4:6],
+		buf[6:8],
+		buf[8:10],
+		buf[10:16])
+}
+
+// asyncNotify is used to do an async channel send to
+// a list of channels. This will not block.
+func asyncNotify(chans []chan struct{}) {
+	for _, ch := range chans {
+		asyncNotifyCh(ch)
+	}
+}
+
+// asyncNotifyCh is used to do an async channel send
+// to a single channel without blocking.
+func asyncNotifyCh(ch chan struct{}) {
+	select {
+	case ch <- struct{}{}:
+	default:
+	}
+}
+
+// asyncNotifyBool is used to do an async notification
+// on a bool channel.
+func asyncNotifyBool(ch chan bool, v bool) {
+	select {
+	case ch <- v:
+	default:
+	}
+}
+
+// ExcludePeer is used to exclude a single peer from a list of peers.
+func ExcludePeer(peers []string, peer string) []string {
+	otherPeers := make([]string, 0, len(peers))
+	for _, p := range peers {
+		if p != peer {
+			otherPeers = append(otherPeers, p)
+		}
+	}
+	return otherPeers
+}
+
+// PeerContained checks if a given peer is contained in a list.
+func PeerContained(peers []string, peer string) bool {
+	for _, p := range peers {
+		if p == peer {
+			return true
+		}
+	}
+	return false
+}
+
+// AddUniquePeer is used to add a peer to a list of existing
+// peers only if it is not already contained.
+func AddUniquePeer(peers []string, peer string) []string {
+	if PeerContained(peers, peer) {
+		return peers
+	}
+	return append(peers, peer)
+}
+
+// encodePeers is used to serialize a list of peers.
+func encodePeers(peers []string, trans Transport) []byte {
+	// Encode each peer
+	var encPeers [][]byte
+	for _, p := range peers {
+		encPeers = append(encPeers, trans.EncodePeer(p))
+	}
+
+	// Encode the entire array
+	buf, err := encodeMsgPack(encPeers)
+	if err != nil {
+		panic(fmt.Errorf("failed to encode peers: %v", err))
+	}
+
+	return buf.Bytes()
+}
+
+// decodePeers is used to deserialize a list of peers.
+func decodePeers(buf []byte, trans Transport) []string {
+	// Decode the buffer first
+	var encPeers [][]byte
+	if err := decodeMsgPack(buf, &encPeers); err != nil {
+		panic(fmt.Errorf("failed to decode peers: %v", err))
+	}
+
+	// Deserialize each peer
+	var peers []string
+	for _, enc := range encPeers {
+		peers = append(peers, trans.DecodePeer(enc))
+	}
+
+	return peers
+}
+
+// Decode reverses the encode operation on a byte slice input.
+func decodeMsgPack(buf []byte, out interface{}) error {
+	r := bytes.NewBuffer(buf)
+	hd := codec.MsgpackHandle{}
+	dec := codec.NewDecoder(r, &hd)
+	return dec.Decode(out)
+}
+
+// Encode writes an encoded object to a new bytes buffer.
+func encodeMsgPack(in interface{}) (*bytes.Buffer, error) {
+	buf := bytes.NewBuffer(nil)
+	hd := codec.MsgpackHandle{}
+	enc := codec.NewEncoder(buf, &hd)
+	err := enc.Encode(in)
+	return buf, err
+}
+
+// Converts bytes to an integer.
+func bytesToUint64(b []byte) uint64 {
+	return binary.BigEndian.Uint64(b)
+}
+
+// Converts a uint64 to a byte slice.
+func uint64ToBytes(u uint64) []byte {
+	buf := make([]byte, 8)
+	binary.BigEndian.PutUint64(buf, u)
+	return buf
+}
+
+// backoff is used to compute an exponential backoff
+// duration. Base time is scaled by the current round,
+// up to some maximum scale factor.
+func backoff(base time.Duration, round, limit uint64) time.Duration {
+	power := min(round, limit)
+	for power > 2 {
+		base *= 2
+		power--
+	}
+	return base
+}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/LICENSE b/Godeps/_workspace/src/github.com/influxdb/influxdb/LICENSE
index 03f21e89fec..d50222706cf 100644
--- a/Godeps/_workspace/src/github.com/influxdb/influxdb/LICENSE
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/LICENSE
@@ -1,6 +1,6 @@
 The MIT License (MIT)
 
-Copyright (c) 2013-2014 Errplane Inc.
+Copyright (c) 2013-2015 Errplane Inc.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/_vendor/raft/LICENSE b/Godeps/_workspace/src/github.com/influxdb/influxdb/_vendor/raft/LICENSE
deleted file mode 100644
index ee7f222286b..00000000000
--- a/Godeps/_workspace/src/github.com/influxdb/influxdb/_vendor/raft/LICENSE
+++ /dev/null
@@ -1,20 +0,0 @@
-Copyright 2013 go-raft contributors
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/client/README.md b/Godeps/_workspace/src/github.com/influxdb/influxdb/client/README.md
index 2d849dfb7e0..012109bc00c 100644
--- a/Godeps/_workspace/src/github.com/influxdb/influxdb/client/README.md
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/client/README.md
@@ -1,2 +1,206 @@
-influxdb-go
-===========
+# InfluxDB Client
+
+[![GoDoc](https://godoc.org/github.com/influxdb/influxdb?status.svg)](http://godoc.org/github.com/influxdb/influxdb/client)
+
+## Description
+
+A Go client library written and maintained by the **InfluxDB** team.
+This package provides convenience functions to read and write time series data.
+It uses the HTTP protocol to communicate with your **InfluxDB** cluster.
+
+
+## Getting Started
+
+### Connecting To Your Database
+
+Connecting to an **InfluxDB** database is straightforward. You will need a host
+name, a port and the cluster user credentials if applicable. The default port is 8086.
+You can customize these settings to your specific installation via the
+**InfluxDB** configuration file.
+
+Thought not necessary for experimentation, you may want to create a new user
+and authenticate the connection to your database.
+
+For more information please check out the
+[Cluster Admin Docs](http://influxdb.com/docs/v0.9/query_language/database_administration.html).
+
+For the impatient, you can create a new admin user _bubba_ by firing off the
+[InfluxDB CLI](https://github.com/influxdb/influxdb/blob/master/cmd/influx/main.go).
+
+```shell
+influx
+> create user bubba with password 'bumblebeetuna'
+> grant all privileges to bubba
+```
+
+And now for good measure set the credentials in you shell environment.
+In the example below we will use $INFLUX_USER and $INFLUX_PWD
+
+Now with the administrivia out of the way, let's connect to our database.
+
+NOTE: If you've opted out of creating a user, you can omit Username and Password in
+the configuration below.
+
+```go
+package main
+
+import "github.com/influxdb/influxdb/client"
+
+const (
+	MyHost        = "localhost"
+	MyPort        = 8086
+	MyDB          = "square_holes"
+	MyMeasurement = "shapes"
+)
+
+func main() {
+	u, err := url.Parse(fmt.Sprintf("http://%s:%d", MyHost, MyPort))
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	conf := client.Config{
+		URL:      *u,
+		Username: os.Getenv("INFLUX_USER"),
+		Password: os.Getenv("INFLUX_PWD"),
+	}
+
+	con, err := client.NewClient(conf)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	dur, ver, err := con.Ping()
+	if err != nil {
+		log.Fatal(err)
+	}
+	log.Printf("Happy as a Hippo! %v, %s", dur, ver)
+}
+
+```
+
+### Inserting Data
+
+Time series data aka *points* are written to the database using batch inserts.
+The mechanism is to create one or more points and then create a batch aka *batch points*
+and write these to a given database and series. A series is a combination of a
+measurement (time/values) and a set of tags.
+
+In this sample we will create a batch of a 1,000 points. Each point has a time and
+a single value as well as 2 tags indicating a shape and color. We write these points
+to a database called _square_holes_ using a measurement named _shapes_.
+
+NOTE: You can specify a RetentionPolicy as part of the batch points. If not
+provided InfluxDB will use the database _default_ retention policy. By default, the _default_
+retention policy never deletes any data it contains.
+
+```go
+func writePoints(con *client.Client) {
+	var (
+		shapes     = []string{"circle", "rectangle", "square", "triangle"}
+		colors     = []string{"red", "blue", "green"}
+		sampleSize = 1000
+		pts        = make([]client.Point, sampleSize)
+	)
+
+	rand.Seed(42)
+	for i := 0; i < sampleSize; i++ {
+		pts[i] = client.Point{
+			Measurement: "shapes",
+			Tags: map[string]string{
+				"color": strconv.Itoa(rand.Intn(len(colors))),
+				"shape": strconv.Itoa(rand.Intn(len(shapes))),
+			},
+			Fields: map[string]interface{}{
+				"value": rand.Intn(sampleSize),
+			},
+			Time: time.Now(),
+			Precision: "s",
+		}
+	}
+
+	bps := client.BatchPoints{
+		Points:          pts,
+		Database:        MyDB,
+		RetentionPolicy: "default",
+	}
+	_, err := con.Write(bps)
+	if err != nil {
+		log.Fatal(err)
+	}
+}
+```
+
+
+### Querying Data
+
+One nice advantage of using **InfluxDB** the ability to query your data using familiar
+SQL constructs. In this example we can create a convenience function to query the database
+as follows:
+
+```go
+// queryDB convenience function to query the database
+func queryDB(con *client.Client, cmd string) (res []client.Result, err error) {
+	q := client.Query{
+		Command:  cmd,
+		Database: MyDB,
+	}
+	if response, err := con.Query(q); err == nil {
+		if response.Error() != nil {
+			return res, response.Error()
+		}
+		res = response.Results
+	}
+	return
+}
+```
+
+#### Creating a Database
+```go
+_, err := queryDB(con, fmt.Sprintf("create database %s", MyDB))
+if err != nil {
+	log.Fatal(err)
+}
+```
+
+#### Count Records
+```go
+q := fmt.Sprintf("select count(%s) from %s", "value", MyMeasurement)
+res, err := queryDB(con, q)
+if err != nil {
+	log.Fatal(err)
+}
+count := res[0].Series[0].Values[0][1]
+log.Printf("Found a total of `%v records", count)
+
+```
+
+#### Find the last 10 _shapes_ records
+
+```go
+q := fmt.Sprintf("select * from %s limit %d", MyMeasurement, 20)
+res, err = queryDB(con, q)
+if err != nil {
+	log.Fatal(err)
+}
+
+for i, row := range res[0].Series[0].Values {
+	t, err := time.Parse(time.RFC3339, row[0].(string))
+	if err != nil {
+		log.Fatal(err)
+	}
+	val, err := row[1].(json.Number).Int64()
+	log.Printf("[%2d] %s: %03d\n", i, t.Format(time.Stamp), val)
+}
+```
+
+## Go Docs
+
+Please refer to
+[http://godoc.org/github.com/influxdb/influxdb/client](http://godoc.org/github.com/influxdb/influxdb/client)
+for documentation.
+
+## See Also
+
+You can also examine how the client library is used by the
+[InfluxDB CLI](https://github.com/influxdb/influxdb/blob/master/cmd/influx/main.go).
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/client/examples/example.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/client/examples/example.go
deleted file mode 100644
index 6cc866e88c0..00000000000
--- a/Godeps/_workspace/src/github.com/influxdb/influxdb/client/examples/example.go
+++ /dev/null
@@ -1,200 +0,0 @@
-package examples
-
-import (
-	"fmt"
-
-	"github.com/influxdb/influxdb/client"
-)
-
-func main() {
-	TestClient()
-}
-
-func TestClient() {
-	internalTest(true)
-}
-
-func TestClientWithoutCompression() {
-	internalTest(false)
-}
-
-func internalTest(compression bool) {
-	c, err := client.NewClient(&client.ClientConfig{})
-	if err != nil {
-		panic(err)
-	}
-
-	admins, err := c.GetClusterAdminList()
-	if err != nil {
-		panic(err)
-	}
-
-	if len(admins) == 1 {
-		if err := c.CreateClusterAdmin("admin", "password"); err != nil {
-			panic(err)
-		}
-	}
-
-	admins, err = c.GetClusterAdminList()
-	if err != nil {
-		panic(err)
-	}
-
-	if len(admins) != 2 {
-		panic("more than two admins returned")
-	}
-
-	dbs, err := c.GetDatabaseList()
-	if err != nil {
-		panic(err)
-	}
-
-	if len(dbs) == 0 {
-		if err := c.CreateDatabase("foobar"); err != nil {
-			panic(err)
-		}
-	}
-
-	dbs, err = c.GetDatabaseList()
-	if err != nil {
-		panic(err)
-	}
-
-	if len(dbs) != 1 && dbs[0]["foobar"] == nil {
-		panic("List of databases don't match")
-	}
-
-	users, err := c.GetDatabaseUserList("foobar")
-	if err != nil {
-		panic(err)
-	}
-
-	if len(users) == 0 {
-		if err := c.CreateDatabaseUser("foobar", "dbuser", "pass"); err != nil {
-			panic(err)
-		}
-
-		if err := c.AlterDatabasePrivilege("foobar", "dbuser", true); err != nil {
-			panic(err)
-		}
-	}
-
-	users, err = c.GetDatabaseUserList("foobar")
-	if err != nil {
-		panic(err)
-	}
-
-	if len(users) != 1 {
-		panic("more than one user returned")
-	}
-
-	c, err = client.NewClient(&client.ClientConfig{
-		Username: "dbuser",
-		Password: "pass",
-		Database: "foobar",
-	})
-
-	if !compression {
-		c.DisableCompression()
-	}
-
-	if err != nil {
-		panic(err)
-	}
-
-	name := "ts9"
-	if !compression {
-		name = "ts9_uncompressed"
-	}
-
-	series := &client.Series{
-		Name:    name,
-		Columns: []string{"value"},
-		Points: [][]interface{}{
-			{1.0},
-		},
-	}
-	if err := c.WriteSeries([]*client.Series{series}); err != nil {
-		panic(err)
-	}
-
-	result, err := c.Query("select * from " + name)
-	if err != nil {
-		panic(err)
-	}
-
-	if len(result) != 1 {
-		panic(fmt.Errorf("expected one time series returned: %d", len(result)))
-	}
-
-	if len(result[0].Points) != 1 {
-		panic(fmt.Errorf("Expected one point: %d", len(result[0].Points)))
-	}
-
-	if result[0].Points[0][2].(float64) != 1 {
-		panic("Value not equal to 1")
-	}
-
-	c, err = client.NewClient(&client.ClientConfig{
-		Username: "root",
-		Password: "root",
-	})
-
-	if err != nil {
-		panic(err)
-	}
-
-	spaces, err := c.GetShardSpaces()
-	if err != nil || len(spaces) == 0 {
-		panic(fmt.Errorf("Got empty spaces back: %s", err))
-	}
-	if spaces[0].Name != "default" {
-		panic("Space name isn't default")
-	}
-	space := &client.ShardSpace{Name: "foo", Regex: "/^paul_is_rad/"}
-	err = c.CreateShardSpace("foobar", space)
-	if err != nil {
-		panic(err)
-	}
-	spaces, _ = c.GetShardSpaces()
-	if spaces[1].Name != "foo" {
-		panic("Space name isn't foo")
-	}
-	shards, err := c.GetShards()
-	if err != nil {
-		panic(fmt.Errorf("Couldn't get shards back: %s", err))
-	}
-
-	c, err = client.NewClient(&client.ClientConfig{
-		Username: "root",
-		Password: "root",
-		Database: "",
-	})
-	series = &client.Series{
-		Name:    "paul_is_rad",
-		Columns: []string{"value"},
-		Points: [][]interface{}{
-			{1.0},
-		},
-	}
-	if err := c.WriteSeries([]*client.Series{series}); err != nil {
-		panic(err)
-	}
-
-	spaces, _ = c.GetShardSpaces()
-	count := 0
-	for _, s := range shards.All {
-		if s.SpaceName == "foo" {
-			count++
-		}
-	}
-
-	if err := c.DropShardSpace("foobar", "foo"); err != nil {
-		panic(fmt.Errorf("Error: %s", err))
-	}
-
-	spaces, err = c.GetShardSpaces()
-	if err != nil || len(spaces) != 1 || spaces[0].Name != "default" {
-		panic(fmt.Errorf("Error: %s, %d, %s", err, len(spaces), spaces[0].Name))
-	}
-}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/client/influxdb.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/client/influxdb.go
index 22a50e5bcd2..2ec08a96eec 100644
--- a/Godeps/_workspace/src/github.com/influxdb/influxdb/client/influxdb.go
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/client/influxdb.go
@@ -2,609 +2,582 @@ package client
 
 import (
 	"bytes"
-	"compress/gzip"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"io"
 	"io/ioutil"
-	"net"
 	"net/http"
 	"net/url"
-	"strings"
+	"time"
+
+	"github.com/influxdb/influxdb/influxql"
+	"github.com/influxdb/influxdb/tsdb"
 )
 
-const (
-	UDPMaxMessageSize = 2048
-)
+// Query is used to send a command to the server. Both Command and Database are required.
+type Query struct {
+	Command  string
+	Database string
+}
 
+// Config is used to specify what server to connect to.
+// URL: The URL of the server connecting to.
+// Username/Password are optional.  They will be passed via basic auth if provided.
+// UserAgent: If not provided, will default "InfluxDBClient",
+// Timeout: If not provided, will default to 0 (no timeout)
+type Config struct {
+	URL       url.URL
+	Username  string
+	Password  string
+	UserAgent string
+	Timeout   time.Duration
+}
+
+// Client is used to make calls to the server.
 type Client struct {
-	host        string
-	username    string
-	password    string
-	database    string
-	httpClient  *http.Client
-	udpConn     *net.UDPConn
-	schema      string
-	compression bool
+	url        url.URL
+	username   string
+	password   string
+	httpClient *http.Client
+	userAgent  string
 }
 
-type ClientConfig struct {
-	Host       string
-	Username   string
-	Password   string
-	Database   string
-	HttpClient *http.Client
-	IsSecure   bool
-	IsUDP      bool
-}
-
-var defaults *ClientConfig
-
-func init() {
-	defaults = &ClientConfig{
-		Host:       "localhost:8086",
-		Username:   "root",
-		Password:   "root",
-		Database:   "",
-		HttpClient: http.DefaultClient,
-	}
-}
-
-func getDefault(value, defaultValue string) string {
-	if value == "" {
-		return defaultValue
-	}
-	return value
-}
-
-func New(config *ClientConfig) (*Client, error) {
-	return NewClient(config)
-}
-
-func NewClient(config *ClientConfig) (*Client, error) {
-	host := getDefault(config.Host, defaults.Host)
-	username := getDefault(config.Username, defaults.Username)
-	password := getDefault(config.Password, defaults.Password)
-	database := getDefault(config.Database, defaults.Database)
-	if config.HttpClient == nil {
-		config.HttpClient = defaults.HttpClient
-	}
-	var udpConn *net.UDPConn
-	if config.IsUDP {
-		serverAddr, err := net.ResolveUDPAddr("udp", host)
-		if err != nil {
-			return nil, err
-		}
-		udpConn, err = net.DialUDP("udp", nil, serverAddr)
-		if err != nil {
-			return nil, err
-		}
-	}
-
-	schema := "http"
-	if config.IsSecure {
-		schema = "https"
-	}
-	return &Client{host, username, password, database, config.HttpClient, udpConn, schema, false}, nil
-}
-
-func (self *Client) DisableCompression() {
-	self.compression = false
-}
-
-func (self *Client) getUrl(path string) string {
-	return self.getUrlWithUserAndPass(path, self.username, self.password)
-}
-
-func (self *Client) getUrlWithUserAndPass(path, username, password string) string {
-	return fmt.Sprintf("%s://%s%s?u=%s&p=%s", self.schema, self.host, path, username, password)
-}
-
-func responseToError(response *http.Response, err error, closeResponse bool) error {
-	if err != nil {
-		return err
-	}
-	if closeResponse {
-		defer response.Body.Close()
-	}
-	if response.StatusCode >= 200 && response.StatusCode < 300 {
-		return nil
-	}
-	defer response.Body.Close()
-	body, err := ioutil.ReadAll(response.Body)
-	if err != nil {
-		return err
-	}
-	return fmt.Errorf("Server returned (%d): %s", response.StatusCode, string(body))
-}
-
-func (self *Client) CreateDatabase(name string) error {
-	url := self.getUrl("/db")
-	payload := map[string]string{"name": name}
-	data, err := json.Marshal(payload)
-	if err != nil {
-		return err
-	}
-	resp, err := self.httpClient.Post(url, "application/json", bytes.NewBuffer(data))
-	return responseToError(resp, err, true)
-}
-
-func (self *Client) del(url string) (*http.Response, error) {
-	return self.delWithBody(url, nil)
-}
-
-func (self *Client) delWithBody(url string, body io.Reader) (*http.Response, error) {
-	req, err := http.NewRequest("DELETE", url, body)
-	if err != nil {
-		return nil, err
-	}
-	return self.httpClient.Do(req)
-}
-
-func (self *Client) DeleteDatabase(name string) error {
-	url := self.getUrl("/db/" + name)
-	resp, err := self.del(url)
-	return responseToError(resp, err, true)
-}
-
-func (self *Client) get(url string) ([]byte, error) {
-	resp, err := self.httpClient.Get(url)
-	err = responseToError(resp, err, false)
-	if err != nil {
-		return nil, err
-	}
-	defer resp.Body.Close()
-	body, err := ioutil.ReadAll(resp.Body)
-	return body, err
-}
-
-func (self *Client) getWithVersion(url string) ([]byte, string, error) {
-	resp, err := self.httpClient.Get(url)
-	err = responseToError(resp, err, false)
-	if err != nil {
-		return nil, "", err
-	}
-	defer resp.Body.Close()
-	body, err := ioutil.ReadAll(resp.Body)
-	version := resp.Header.Get("X-Influxdb-Version")
-	fields := strings.Fields(version)
-	if len(fields) > 2 {
-		return body, fields[1], err
-	}
-	return body, "", err
-}
-
-func (self *Client) listSomething(url string) ([]map[string]interface{}, error) {
-	body, err := self.get(url)
-	if err != nil {
-		return nil, err
-	}
-	somethings := []map[string]interface{}{}
-	err = json.Unmarshal(body, &somethings)
-	if err != nil {
-		return nil, err
-	}
-	return somethings, nil
-}
-
-func (self *Client) GetDatabaseList() ([]map[string]interface{}, error) {
-	url := self.getUrl("/db")
-	return self.listSomething(url)
-}
-
-func (self *Client) CreateClusterAdmin(name, password string) error {
-	url := self.getUrl("/cluster_admins")
-	payload := map[string]string{"name": name, "password": password}
-	data, err := json.Marshal(payload)
-	if err != nil {
-		return err
-	}
-	resp, err := self.httpClient.Post(url, "application/json", bytes.NewBuffer(data))
-	return responseToError(resp, err, true)
-}
-
-func (self *Client) UpdateClusterAdmin(name, password string) error {
-	url := self.getUrl("/cluster_admins/" + name)
-	payload := map[string]string{"password": password}
-	data, err := json.Marshal(payload)
-	if err != nil {
-		return err
-	}
-	resp, err := self.httpClient.Post(url, "application/json", bytes.NewBuffer(data))
-	return responseToError(resp, err, true)
-}
-
-func (self *Client) DeleteClusterAdmin(name string) error {
-	url := self.getUrl("/cluster_admins/" + name)
-	resp, err := self.del(url)
-	return responseToError(resp, err, true)
-}
-
-func (self *Client) GetClusterAdminList() ([]map[string]interface{}, error) {
-	url := self.getUrl("/cluster_admins")
-	return self.listSomething(url)
-}
-
-func (self *Client) Servers() ([]map[string]interface{}, error) {
-	url := self.getUrl("/cluster/servers")
-	return self.listSomething(url)
-}
-
-func (self *Client) RemoveServer(id int) error {
-	resp, err := self.del(self.getUrl(fmt.Sprintf("/cluster/servers/%d", id)))
-	return responseToError(resp, err, true)
-}
-
-// Creates a new database user for the given database. permissions can
-// be omitted in which case the user will be able to read and write to
-// all time series. If provided, there should be two strings, the
-// first for read and the second for write. The strings are regexes
-// that are used to match the time series name to determine whether
-// the user has the ability to read/write to the given time series.
-//
-//     client.CreateDatabaseUser("db", "user", "pass")
-//     // the following user cannot read from any series and can write
-//     // to the limited time series only
-//     client.CreateDatabaseUser("db", "limited", "pass", "^$", "limited")
-func (self *Client) CreateDatabaseUser(database, name, password string, permissions ...string) error {
-	readMatcher, writeMatcher := ".*", ".*"
-	switch len(permissions) {
-	case 0:
-	case 2:
-		readMatcher, writeMatcher = permissions[0], permissions[1]
-	default:
-		return fmt.Errorf("You have to provide two ")
-	}
-
-	url := self.getUrl("/db/" + database + "/users")
-	payload := map[string]string{"name": name, "password": password, "readFrom": readMatcher, "writeTo": writeMatcher}
-	data, err := json.Marshal(payload)
-	if err != nil {
-		return err
-	}
-	resp, err := self.httpClient.Post(url, "application/json", bytes.NewBuffer(data))
-	return responseToError(resp, err, true)
-}
-
-// Change the cluster admin password
-func (self *Client) ChangeClusterAdminPassword(name, newPassword string) error {
-	url := self.getUrl("/cluster_admins/" + name)
-	payload := map[string]interface{}{"password": newPassword}
-	data, err := json.Marshal(payload)
-	if err != nil {
-		return err
-	}
-	resp, err := self.httpClient.Post(url, "application/json", bytes.NewBuffer(data))
-	return responseToError(resp, err, true)
-}
-
-// Change the user password, adming flag and optionally permissions
-func (self *Client) ChangeDatabaseUser(database, name, newPassword string, isAdmin bool, newPermissions ...string) error {
-	switch len(newPermissions) {
-	case 0, 2:
-	default:
-		return fmt.Errorf("You have to provide two ")
-	}
-
-	url := self.getUrl("/db/" + database + "/users/" + name)
-	payload := map[string]interface{}{"password": newPassword, "admin": isAdmin}
-	if len(newPermissions) == 2 {
-		payload["readFrom"] = newPermissions[0]
-		payload["writeTo"] = newPermissions[1]
-	}
-	data, err := json.Marshal(payload)
-	if err != nil {
-		return err
-	}
-	resp, err := self.httpClient.Post(url, "application/json", bytes.NewBuffer(data))
-	return responseToError(resp, err, true)
-}
-
-// See Client.CreateDatabaseUser for more info on the permissions
-// argument
-func (self *Client) updateDatabaseUserCommon(database, name string, password *string, isAdmin *bool, permissions ...string) error {
-	url := self.getUrl("/db/" + database + "/users/" + name)
-	payload := map[string]interface{}{}
-	if password != nil {
-		payload["password"] = *password
-	}
-	if isAdmin != nil {
-		payload["admin"] = *isAdmin
-	}
-	switch len(permissions) {
-	case 0:
-	case 2:
-		payload["readFrom"] = permissions[0]
-		payload["writeTo"] = permissions[1]
-	default:
-	}
-
-	data, err := json.Marshal(payload)
-	if err != nil {
-		return err
-	}
-	resp, err := self.httpClient.Post(url, "application/json", bytes.NewBuffer(data))
-	return responseToError(resp, err, true)
-}
-
-func (self *Client) UpdateDatabaseUser(database, name, password string) error {
-	return self.updateDatabaseUserCommon(database, name, &password, nil)
-}
-
-func (self *Client) UpdateDatabaseUserPermissions(database, name, readPermission, writePermissions string) error {
-	return self.updateDatabaseUserCommon(database, name, nil, nil, readPermission, writePermissions)
-}
-
-func (self *Client) DeleteDatabaseUser(database, name string) error {
-	url := self.getUrl("/db/" + database + "/users/" + name)
-	resp, err := self.del(url)
-	return responseToError(resp, err, true)
-}
-
-func (self *Client) GetDatabaseUserList(database string) ([]map[string]interface{}, error) {
-	url := self.getUrl("/db/" + database + "/users")
-	return self.listSomething(url)
-}
-
-func (self *Client) AlterDatabasePrivilege(database, name string, isAdmin bool, permissions ...string) error {
-	return self.updateDatabaseUserCommon(database, name, nil, &isAdmin, permissions...)
-}
-
-type TimePrecision string
-
 const (
-	Second      TimePrecision = "s"
-	Millisecond TimePrecision = "ms"
-	Microsecond TimePrecision = "u"
+	ConsistencyOne    = "one"
+	ConsistencyAll    = "all"
+	ConsistencyQuorum = "quorum"
+	ConsistencyAny    = "any"
 )
 
-func (self *Client) WriteSeries(series []*Series) error {
-	return self.writeSeriesCommon(series, nil)
+// NewClient will instantiate and return a connected client to issue commands to the server.
+func NewClient(c Config) (*Client, error) {
+	client := Client{
+		url:        c.URL,
+		username:   c.Username,
+		password:   c.Password,
+		httpClient: &http.Client{Timeout: c.Timeout},
+		userAgent:  c.UserAgent,
+	}
+	if client.userAgent == "" {
+		client.userAgent = "InfluxDBClient"
+	}
+	return &client, nil
 }
 
-func (self *Client) WriteSeriesOverUDP(series []*Series) error {
-	if self.udpConn == nil {
-		return fmt.Errorf("UDP isn't enabled. Make sure to set config.IsUDP to true")
+// SetAuth will update the username and passwords
+func (c *Client) SetAuth(u, p string) {
+	c.username = u
+	c.password = p
+}
+
+// Query sends a command to the server and returns the Response
+func (c *Client) Query(q Query) (*Response, error) {
+	u := c.url
+
+	u.Path = "query"
+	values := u.Query()
+	values.Set("q", q.Command)
+	values.Set("db", q.Database)
+	u.RawQuery = values.Encode()
+
+	req, err := http.NewRequest("GET", u.String(), nil)
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("User-Agent", c.userAgent)
+	if c.username != "" {
+		req.SetBasicAuth(c.username, c.password)
 	}
 
-	data, err := json.Marshal(series)
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	var response Response
+	dec := json.NewDecoder(resp.Body)
+	dec.UseNumber()
+	decErr := dec.Decode(&response)
+
+	// ignore this error if we got an invalid status code
+	if decErr != nil && decErr.Error() == "EOF" && resp.StatusCode != http.StatusOK {
+		decErr = nil
+	}
+	// If we got a valid decode error, send that back
+	if decErr != nil {
+		return nil, decErr
+	}
+	// If we don't have an error in our json response, and didn't get  statusOK, then send back an error
+	if resp.StatusCode != http.StatusOK && response.Error() == nil {
+		return &response, fmt.Errorf("received status code %d from server", resp.StatusCode)
+	}
+	return &response, nil
+}
+
+// Write takes BatchPoints and allows for writing of multiple points with defaults
+// If successful, error is nil and Response is nil
+// If an error occurs, Response may contain additional information if populated.
+func (c *Client) Write(bp BatchPoints) (*Response, error) {
+	c.url.Path = "write"
+
+	var b bytes.Buffer
+	for _, p := range bp.Points {
+		if p.Raw != "" {
+			if _, err := b.WriteString(p.Raw); err != nil {
+				return nil, err
+			}
+		} else {
+			for k, v := range bp.Tags {
+				if p.Tags == nil {
+					p.Tags = make(map[string]string, len(bp.Tags))
+				}
+				p.Tags[k] = v
+			}
+
+			if _, err := b.WriteString(p.MarshalString()); err != nil {
+				return nil, err
+			}
+		}
+
+		if err := b.WriteByte('\n'); err != nil {
+			return nil, err
+		}
+	}
+
+	req, err := http.NewRequest("POST", c.url.String(), &b)
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Content-Type", "")
+	req.Header.Set("User-Agent", c.userAgent)
+	if c.username != "" {
+		req.SetBasicAuth(c.username, c.password)
+	}
+	params := req.URL.Query()
+	params.Add("db", bp.Database)
+	params.Add("rp", bp.RetentionPolicy)
+	params.Add("precision", bp.Precision)
+	params.Add("consistency", bp.WriteConsistency)
+	req.URL.RawQuery = params.Encode()
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	var response Response
+	body, err := ioutil.ReadAll(resp.Body)
+	if err != nil && err.Error() != "EOF" {
+		return nil, err
+	}
+
+	if resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusOK {
+		var err = fmt.Errorf(string(body))
+		response.Err = err
+		return &response, err
+	}
+
+	return nil, nil
+}
+
+// Ping will check to see if the server is up
+// Ping returns how long the request took, the version of the server it connected to, and an error if one occurred.
+func (c *Client) Ping() (time.Duration, string, error) {
+	now := time.Now()
+	u := c.url
+	u.Path = "ping"
+
+	req, err := http.NewRequest("GET", u.String(), nil)
+	if err != nil {
+		return 0, "", err
+	}
+	req.Header.Set("User-Agent", c.userAgent)
+	if c.username != "" {
+		req.SetBasicAuth(c.username, c.password)
+	}
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return 0, "", err
+	}
+	defer resp.Body.Close()
+
+	version := resp.Header.Get("X-Influxdb-Version")
+	return time.Since(now), version, nil
+}
+
+// Dump connects to server and retrieves all data stored for specified database.
+// If successful, Dump returns the entire response body, which is an io.ReadCloser
+func (c *Client) Dump(db string) (io.ReadCloser, error) {
+	u := c.url
+	u.Path = "dump"
+	values := u.Query()
+	values.Set("db", db)
+	u.RawQuery = values.Encode()
+
+	req, err := http.NewRequest("GET", u.String(), nil)
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("User-Agent", c.userAgent)
+	if c.username != "" {
+		req.SetBasicAuth(c.username, c.password)
+	}
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	if resp.StatusCode != http.StatusOK {
+		return resp.Body, fmt.Errorf("HTTP Protocol error %d", resp.StatusCode)
+	}
+	return resp.Body, nil
+}
+
+// Structs
+
+// Result represents a resultset returned from a single statement.
+type Result struct {
+	Series []influxql.Row
+	Err    error
+}
+
+// MarshalJSON encodes the result into JSON.
+func (r *Result) MarshalJSON() ([]byte, error) {
+	// Define a struct that outputs "error" as a string.
+	var o struct {
+		Series []influxql.Row `json:"series,omitempty"`
+		Err    string         `json:"error,omitempty"`
+	}
+
+	// Copy fields to output struct.
+	o.Series = r.Series
+	if r.Err != nil {
+		o.Err = r.Err.Error()
+	}
+
+	return json.Marshal(&o)
+}
+
+// UnmarshalJSON decodes the data into the Result struct
+func (r *Result) UnmarshalJSON(b []byte) error {
+	var o struct {
+		Series []influxql.Row `json:"series,omitempty"`
+		Err    string         `json:"error,omitempty"`
+	}
+
+	dec := json.NewDecoder(bytes.NewBuffer(b))
+	dec.UseNumber()
+	err := dec.Decode(&o)
 	if err != nil {
 		return err
 	}
-	// because max of msg over upd is 2048 bytes
-	// https://github.com/influxdb/influxdb/blob/master/src/api/udp/api.go#L65
-	if len(data) >= UDPMaxMessageSize {
-		err = fmt.Errorf("data size over limit %v limit is %v", len(data), UDPMaxMessageSize)
-		fmt.Println(err)
-		return err
-	}
-	_, err = self.udpConn.Write(data)
-	if err != nil {
-		return err
+	r.Series = o.Series
+	if o.Err != "" {
+		r.Err = errors.New(o.Err)
 	}
 	return nil
 }
 
-func (self *Client) WriteSeriesWithTimePrecision(series []*Series, timePrecision TimePrecision) error {
-	return self.writeSeriesCommon(series, map[string]string{"time_precision": string(timePrecision)})
+// Response represents a list of statement results.
+type Response struct {
+	Results []Result
+	Err     error
 }
 
-func (self *Client) writeSeriesCommon(series []*Series, options map[string]string) error {
-	data, err := json.Marshal(series)
+// MarshalJSON encodes the response into JSON.
+func (r *Response) MarshalJSON() ([]byte, error) {
+	// Define a struct that outputs "error" as a string.
+	var o struct {
+		Results []Result `json:"results,omitempty"`
+		Err     string   `json:"error,omitempty"`
+	}
+
+	// Copy fields to output struct.
+	o.Results = r.Results
+	if r.Err != nil {
+		o.Err = r.Err.Error()
+	}
+
+	return json.Marshal(&o)
+}
+
+// UnmarshalJSON decodes the data into the Response struct
+func (r *Response) UnmarshalJSON(b []byte) error {
+	var o struct {
+		Results []Result `json:"results,omitempty"`
+		Err     string   `json:"error,omitempty"`
+	}
+
+	dec := json.NewDecoder(bytes.NewBuffer(b))
+	dec.UseNumber()
+	err := dec.Decode(&o)
 	if err != nil {
 		return err
 	}
-	url := self.getUrl("/db/" + self.database + "/series")
-	for name, value := range options {
-		url += fmt.Sprintf("&%s=%s", name, value)
+	r.Results = o.Results
+	if o.Err != "" {
+		r.Err = errors.New(o.Err)
 	}
-	var b *bytes.Buffer
-	if self.compression {
-		b = bytes.NewBuffer(nil)
-		w := gzip.NewWriter(b)
-		if _, err := w.Write(data); err != nil {
+	return nil
+}
+
+// Error returns the first error from any statement.
+// Returns nil if no errors occurred on any statements.
+func (r Response) Error() error {
+	if r.Err != nil {
+		return r.Err
+	}
+	for _, result := range r.Results {
+		if result.Err != nil {
+			return result.Err
+		}
+	}
+	return nil
+}
+
+// Point defines the fields that will be written to the database
+// Measurement, Time, and Fields are required
+// Precision can be specified if the time is in epoch format (integer).
+// Valid values for Precision are n, u, ms, s, m, and h
+type Point struct {
+	Measurement string
+	Tags        map[string]string
+	Time        time.Time
+	Fields      map[string]interface{}
+	Precision   string
+	Raw         string
+}
+
+// MarshalJSON will format the time in RFC3339Nano
+// Precision is also ignored as it is only used for writing, not reading
+// Or another way to say it is we always send back in nanosecond precision
+func (p *Point) MarshalJSON() ([]byte, error) {
+	point := struct {
+		Measurement string                 `json:"measurement,omitempty"`
+		Tags        map[string]string      `json:"tags,omitempty"`
+		Time        string                 `json:"time,omitempty"`
+		Fields      map[string]interface{} `json:"fields,omitempty"`
+		Precision   string                 `json:"precision,omitempty"`
+	}{
+		Measurement: p.Measurement,
+		Tags:        p.Tags,
+		Fields:      p.Fields,
+		Precision:   p.Precision,
+	}
+	// Let it omit empty if it's really zero
+	if !p.Time.IsZero() {
+		point.Time = p.Time.UTC().Format(time.RFC3339Nano)
+	}
+	return json.Marshal(&point)
+}
+
+func (p *Point) MarshalString() string {
+	return tsdb.NewPoint(p.Measurement, p.Tags, p.Fields, p.Time).String()
+}
+
+// UnmarshalJSON decodes the data into the Point struct
+func (p *Point) UnmarshalJSON(b []byte) error {
+	var normal struct {
+		Measurement string                 `json:"measurement"`
+		Tags        map[string]string      `json:"tags"`
+		Time        time.Time              `json:"time"`
+		Precision   string                 `json:"precision"`
+		Fields      map[string]interface{} `json:"fields"`
+	}
+	var epoch struct {
+		Measurement string                 `json:"measurement"`
+		Tags        map[string]string      `json:"tags"`
+		Time        *int64                 `json:"time"`
+		Precision   string                 `json:"precision"`
+		Fields      map[string]interface{} `json:"fields"`
+	}
+
+	if err := func() error {
+		var err error
+		dec := json.NewDecoder(bytes.NewBuffer(b))
+		dec.UseNumber()
+		if err = dec.Decode(&epoch); err != nil {
 			return err
 		}
-		w.Flush()
-		w.Close()
-	} else {
-		b = bytes.NewBuffer(data)
+		// Convert from epoch to time.Time, but only if Time
+		// was actually set.
+		var ts time.Time
+		if epoch.Time != nil {
+			ts, err = EpochToTime(*epoch.Time, epoch.Precision)
+			if err != nil {
+				return err
+			}
+		}
+		p.Measurement = epoch.Measurement
+		p.Tags = epoch.Tags
+		p.Time = ts
+		p.Precision = epoch.Precision
+		p.Fields = normalizeFields(epoch.Fields)
+		return nil
+	}(); err == nil {
+		return nil
 	}
-	req, err := http.NewRequest("POST", url, b)
-	if err != nil {
+
+	dec := json.NewDecoder(bytes.NewBuffer(b))
+	dec.UseNumber()
+	if err := dec.Decode(&normal); err != nil {
 		return err
 	}
-	if self.compression {
-		req.Header.Set("Content-Encoding", "gzip")
-	}
-	resp, err := self.httpClient.Do(req)
-	return responseToError(resp, err, true)
+	normal.Time = SetPrecision(normal.Time, normal.Precision)
+	p.Measurement = normal.Measurement
+	p.Tags = normal.Tags
+	p.Time = normal.Time
+	p.Precision = normal.Precision
+	p.Fields = normalizeFields(normal.Fields)
+
+	return nil
 }
 
-func (self *Client) Query(query string, precision ...TimePrecision) ([]*Series, error) {
-	return self.queryCommon(query, false, precision...)
+// Remove any notion of json.Number
+func normalizeFields(fields map[string]interface{}) map[string]interface{} {
+	newFields := map[string]interface{}{}
+
+	for k, v := range fields {
+		switch v := v.(type) {
+		case json.Number:
+			jv, e := v.Float64()
+			if e != nil {
+				panic(fmt.Sprintf("unable to convert json.Number to float64: %s", e))
+			}
+			newFields[k] = jv
+		default:
+			newFields[k] = v
+		}
+	}
+	return newFields
 }
 
-func (self *Client) QueryWithNumbers(query string, precision ...TimePrecision) ([]*Series, error) {
-	return self.queryCommon(query, true, precision...)
+// BatchPoints is used to send batched data in a single write.
+// Database and Points are required
+// If no retention policy is specified, it will use the databases default retention policy.
+// If tags are specified, they will be "merged" with all points.  If a point already has that tag, it is ignored.
+// If time is specified, it will be applied to any point with an empty time.
+// Precision can be specified if the time is in epoch format (integer).
+// Valid values for Precision are n, u, ms, s, m, and h
+type BatchPoints struct {
+	Points           []Point           `json:"points,omitempty"`
+	Database         string            `json:"database,omitempty"`
+	RetentionPolicy  string            `json:"retentionPolicy,omitempty"`
+	Tags             map[string]string `json:"tags,omitempty"`
+	Time             time.Time         `json:"time,omitempty"`
+	Precision        string            `json:"precision,omitempty"`
+	WriteConsistency string            `json:"-"`
 }
 
-func (self *Client) queryCommon(query string, useNumber bool, precision ...TimePrecision) ([]*Series, error) {
-	escapedQuery := url.QueryEscape(query)
-	url := self.getUrl("/db/" + self.database + "/series")
-	if len(precision) > 0 {
-		url += "&time_precision=" + string(precision[0])
+// UnmarshalJSON decodes the data into the BatchPoints struct
+func (bp *BatchPoints) UnmarshalJSON(b []byte) error {
+	var normal struct {
+		Points          []Point           `json:"points"`
+		Database        string            `json:"database"`
+		RetentionPolicy string            `json:"retentionPolicy"`
+		Tags            map[string]string `json:"tags"`
+		Time            time.Time         `json:"time"`
+		Precision       string            `json:"precision"`
 	}
-	url += "&q=" + escapedQuery
-	req, err := http.NewRequest("GET", url, nil)
-	if err != nil {
-		return nil, err
-	}
-	if !self.compression {
-		req.Header.Set("Accept-Encoding", "identity")
-	}
-	resp, err := self.httpClient.Do(req)
-	err = responseToError(resp, err, false)
-	if err != nil {
-		return nil, err
-	}
-	defer resp.Body.Close()
-
-	series := []*Series{}
-	decoder := json.NewDecoder(resp.Body)
-	if useNumber {
-		decoder.UseNumber()
-	}
-	err = decoder.Decode(&series)
-	if err != nil {
-		return nil, err
-	}
-	return series, nil
-}
-
-func (self *Client) Ping() error {
-	url := self.getUrl("/ping")
-	resp, err := self.httpClient.Get(url)
-	return responseToError(resp, err, true)
-}
-
-func (self *Client) AuthenticateDatabaseUser(database, username, password string) error {
-	url := self.getUrlWithUserAndPass(fmt.Sprintf("/db/%s/authenticate", database), username, password)
-	resp, err := self.httpClient.Get(url)
-	return responseToError(resp, err, true)
-}
-
-func (self *Client) AuthenticateClusterAdmin(username, password string) error {
-	url := self.getUrlWithUserAndPass("/cluster_admins/authenticate", username, password)
-	resp, err := self.httpClient.Get(url)
-	return responseToError(resp, err, true)
-}
-
-type LongTermShortTermShards struct {
-	// Long term shards, (doesn't get populated for version >= 0.8.0)
-	LongTerm []*Shard `json:"longTerm"`
-	// Short term shards, (doesn't get populated for version >= 0.8.0)
-	ShortTerm []*Shard `json:"shortTerm"`
-	// All shards in the system (Long + Short term shards for version < 0.8.0)
-	All []*Shard `json:"-"`
-}
-
-type Shard struct {
-	Id        uint32   `json:"id"`
-	EndTime   int64    `json:"endTime"`
-	StartTime int64    `json:"startTime"`
-	ServerIds []uint32 `json:"serverIds"`
-	SpaceName string   `json:"spaceName"`
-	Database  string   `json:"database"`
-}
-
-type ShardSpaceCollection struct {
-	ShardSpaces []ShardSpace
-}
-
-func (self *Client) GetShards() (*LongTermShortTermShards, error) {
-	url := self.getUrlWithUserAndPass("/cluster/shards", self.username, self.password)
-	body, version, err := self.getWithVersion(url)
-	if err != nil {
-		return nil, err
-	}
-	return parseShards(body, version)
-}
-
-func isOrNewerThan(version, reference string) bool {
-	if version == "vdev" {
-		return true
-	}
-	majorMinor := strings.Split(version[1:], ".")[:2]
-	refMajorMinor := strings.Split(reference[1:], ".")[:2]
-	if majorMinor[0] > refMajorMinor[0] {
-		return true
-	}
-	if majorMinor[1] > refMajorMinor[1] {
-		return true
-	}
-	return majorMinor[1] == refMajorMinor[1]
-}
-
-func parseShards(body []byte, version string) (*LongTermShortTermShards, error) {
-	// strip the initial v in `v0.8.0` and split on the dots
-	if version != "" && isOrNewerThan(version, "v0.8") {
-		return parseNewShards(body)
-	}
-	shards := &LongTermShortTermShards{}
-	err := json.Unmarshal(body, &shards)
-	if err != nil {
-		return nil, err
+	var epoch struct {
+		Points          []Point           `json:"points"`
+		Database        string            `json:"database"`
+		RetentionPolicy string            `json:"retentionPolicy"`
+		Tags            map[string]string `json:"tags"`
+		Time            *int64            `json:"time"`
+		Precision       string            `json:"precision"`
 	}
 
-	shards.All = make([]*Shard, len(shards.LongTerm)+len(shards.ShortTerm))
-	copy(shards.All, shards.LongTerm)
-	copy(shards.All[len(shards.LongTerm):], shards.ShortTerm)
-	return shards, nil
-}
-
-func parseNewShards(body []byte) (*LongTermShortTermShards, error) {
-	shards := []*Shard{}
-	err := json.Unmarshal(body, &shards)
-	if err != nil {
-		return nil, err
+	if err := func() error {
+		var err error
+		if err = json.Unmarshal(b, &epoch); err != nil {
+			return err
+		}
+		// Convert from epoch to time.Time
+		var ts time.Time
+		if epoch.Time != nil {
+			ts, err = EpochToTime(*epoch.Time, epoch.Precision)
+			if err != nil {
+				return err
+			}
+		}
+		bp.Points = epoch.Points
+		bp.Database = epoch.Database
+		bp.RetentionPolicy = epoch.RetentionPolicy
+		bp.Tags = epoch.Tags
+		bp.Time = ts
+		bp.Precision = epoch.Precision
+		return nil
+	}(); err == nil {
+		return nil
 	}
 
-	return &LongTermShortTermShards{All: shards}, nil
-}
-
-// Added to InfluxDB in 0.8.0
-func (self *Client) GetShardSpaces() ([]*ShardSpace, error) {
-	url := self.getUrlWithUserAndPass("/cluster/shard_spaces", self.username, self.password)
-	body, err := self.get(url)
-	if err != nil {
-		return nil, err
-	}
-	spaces := []*ShardSpace{}
-	err = json.Unmarshal(body, &spaces)
-	if err != nil {
-		return nil, err
-	}
-
-	return spaces, nil
-}
-
-// Added to InfluxDB in 0.8.0
-func (self *Client) DropShardSpace(database, name string) error {
-	url := self.getUrlWithUserAndPass(fmt.Sprintf("/cluster/shard_spaces/%s/%s", database, name), self.username, self.password)
-	_, err := self.del(url)
-	return err
-}
-
-// Added to InfluxDB in 0.8.0
-func (self *Client) CreateShardSpace(database string, space *ShardSpace) error {
-	url := self.getUrl(fmt.Sprintf("/cluster/shard_spaces/%s", database))
-	data, err := json.Marshal(space)
-	if err != nil {
+	if err := json.Unmarshal(b, &normal); err != nil {
 		return err
 	}
-	resp, err := self.httpClient.Post(url, "application/json", bytes.NewBuffer(data))
-	return responseToError(resp, err, true)
+	normal.Time = SetPrecision(normal.Time, normal.Precision)
+	bp.Points = normal.Points
+	bp.Database = normal.Database
+	bp.RetentionPolicy = normal.RetentionPolicy
+	bp.Tags = normal.Tags
+	bp.Time = normal.Time
+	bp.Precision = normal.Precision
+
+	return nil
 }
 
-func (self *Client) DropShard(id uint32, serverIds []uint32) error {
-	url := self.getUrlWithUserAndPass(fmt.Sprintf("/cluster/shards/%d", id), self.username, self.password)
-	ids := map[string][]uint32{"serverIds": serverIds}
-	body, err := json.Marshal(ids)
-	if err != nil {
-		return err
-	}
-	_, err = self.delWithBody(url, bytes.NewBuffer(body))
-	return err
+// utility functions
+
+// Addr provides the current url as a string of the server the client is connected to.
+func (c *Client) Addr() string {
+	return c.url.String()
 }
 
-// Added to InfluxDB in 0.8.2
-func (self *Client) UpdateShardSpace(database, name string, space *ShardSpace) error {
-	url := self.getUrl(fmt.Sprintf("/cluster/shard_spaces/%s/%s", database, name))
-	data, err := json.Marshal(space)
-	if err != nil {
-		return err
+// helper functions
+
+// EpochToTime takes a unix epoch time and uses precision to return back a time.Time
+func EpochToTime(epoch int64, precision string) (time.Time, error) {
+	if precision == "" {
+		precision = "s"
 	}
-	resp, err := self.httpClient.Post(url, "application/json", bytes.NewBuffer(data))
-	return responseToError(resp, err, true)
+	var t time.Time
+	switch precision {
+	case "h":
+		t = time.Unix(0, epoch*int64(time.Hour))
+	case "m":
+		t = time.Unix(0, epoch*int64(time.Minute))
+	case "s":
+		t = time.Unix(0, epoch*int64(time.Second))
+	case "ms":
+		t = time.Unix(0, epoch*int64(time.Millisecond))
+	case "u":
+		t = time.Unix(0, epoch*int64(time.Microsecond))
+	case "n":
+		t = time.Unix(0, epoch)
+	default:
+		return time.Time{}, fmt.Errorf("Unknown precision %q", precision)
+	}
+	return t, nil
+}
+
+// SetPrecision will round a time to the specified precision
+func SetPrecision(t time.Time, precision string) time.Time {
+	switch precision {
+	case "n":
+	case "u":
+		return t.Round(time.Microsecond)
+	case "ms":
+		return t.Round(time.Millisecond)
+	case "s":
+		return t.Round(time.Second)
+	case "m":
+		return t.Round(time.Minute)
+	case "h":
+		return t.Round(time.Hour)
+	}
+	return t
 }
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/client/series.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/client/series.go
deleted file mode 100644
index f18b8bbb59e..00000000000
--- a/Godeps/_workspace/src/github.com/influxdb/influxdb/client/series.go
+++ /dev/null
@@ -1,19 +0,0 @@
-package client
-
-type Series struct {
-	Name    string          `json:"name"`
-	Columns []string        `json:"columns"`
-	Points  [][]interface{} `json:"points"`
-}
-
-func (self *Series) GetName() string {
-	return self.Name
-}
-
-func (self *Series) GetColumns() []string {
-	return self.Columns
-}
-
-func (self *Series) GetPoints() [][]interface{} {
-	return self.Points
-}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/client/shard_space.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/client/shard_space.go
deleted file mode 100644
index 87dea1173bc..00000000000
--- a/Godeps/_workspace/src/github.com/influxdb/influxdb/client/shard_space.go
+++ /dev/null
@@ -1,15 +0,0 @@
-package client
-
-type ShardSpace struct {
-	// required, must be unique within the database
-	Name string `json:"name"`
-	// required, a database has many shard spaces and a shard space belongs to a database
-	Database string `json:"database"`
-	// this is optional, if they don't set it, we'll set to /.*/
-	Regex string `json:"regex"`
-	// this is optional, if they don't set it, it will default to the storage.dir in the config
-	RetentionPolicy   string `json:"retentionPolicy"`
-	ShardDuration     string `json:"shardDuration"`
-	ReplicationFactor uint32 `json:"replicationFactor"`
-	Split             uint32 `json:"split"`
-}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/INFLUXQL.md b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/INFLUXQL.md
new file mode 100644
index 00000000000..087fc3b9ff6
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/INFLUXQL.md
@@ -0,0 +1,650 @@
+# The Influx Query Language Specification
+
+## Introduction
+
+This is a reference for the Influx Query Language ("InfluxQL").
+
+InfluxQL is a SQL-like query language for interacting with InfluxDB.  It has been lovingly crafted to feel familiar to those coming from other SQL or SQL-like environments while providing features specific to storing and analyzing time series data.
+
+## Notation
+
+The syntax is specified using Extended Backus-Naur Form ("EBNF").  EBNF is the same notation used in the [Go](http://golang.org) programming language specification, which can be found [here](https://golang.org/ref/spec).  Not so coincidentally, InfluxDB is written in Go.
+
+```
+Production  = production_name "=" [ Expression ] "." .
+Expression  = Alternative { "|" Alternative } .
+Alternative = Term { Term } .
+Term        = production_name | token [ "…" token ] | Group | Option | Repetition .
+Group       = "(" Expression ")" .
+Option      = "[" Expression "]" .
+Repetition  = "{" Expression "}" .
+```
+
+Notation operators in order of increasing precedence:
+
+```
+|   alternation
+()  grouping
+[]  option (0 or 1 times)
+{}  repetition (0 to n times)
+```
+
+## Query representation
+
+### Characters
+
+InfluxQL is Unicode text encoded in [UTF-8](http://en.wikipedia.org/wiki/UTF-8).
+
+```
+newline             = /* the Unicode code point U+000A */ .
+unicode_char        = /* an arbitrary Unicode code point except newline */ .
+```
+
+## Letters and digits
+
+Letters are the set of ASCII characters plus the underscore character _ (U+005F) is considered a letter.
+
+Only decimal digits are supported.
+
+```
+letter              = ascii_letter | "_" .
+ascii_letter        = "A" … "Z" | "a" … "z" .
+digit               = "0" … "9" .
+```
+
+## Identifiers
+
+Identifiers are tokens which refer to database names, retention policy names, user names, measurement names, tag keys, and field names.
+
+The rules:
+
+- double quoted identifiers can contain any unicode character other than a new line
+- double quoted identifiers can contain escaped `"` characters (i.e., `\"`)
+- unquoted identifiers must start with an upper or lowercase ASCII character or "_"
+- unquoted identifiers may contain only ASCII letters, decimal digits, and "_"
+
+```
+identifier          = unquoted_identifier | quoted_identifier .
+unquoted_identifier = ( letter ) { letter | digit } .
+quoted_identifier   = `"` unicode_char { unicode_char } `"` .
+```
+
+#### Examples:
+
+```
+cpu
+_cpu_stats
+"1h"
+"anything really"
+"1_Crazy-1337.identifier>NAME👍"
+```
+
+## Keywords
+
+```
+ALL          ALTER        AS           ASC          BEGIN        BY
+CREATE       CONTINUOUS   DATABASE     DATABASES    DEFAULT      DELETE
+DESC         DROP         DURATION     END          EXISTS       EXPLAIN
+FIELD        FROM         GRANT        GROUP        IF           IN
+INNER        INSERT       INTO         KEY          KEYS         LIMIT
+SHOW         MEASUREMENT  MEASUREMENTS OFFSET       ON           ORDER
+PASSWORD     POLICY       POLICIES     PRIVILEGES   QUERIES      QUERY
+READ         REPLICATION  RETENTION    REVOKE       SELECT       SERIES
+SLIMIT       SOFFSET      TAG          TO           USER         USERS
+VALUES       WHERE        WITH         WRITE
+```
+
+## Literals
+
+### Integers
+
+InfluxQL supports decimal integer literals.  Hexadecimal and octal literals are not currently supported.
+
+```
+int_lit             = ( "1" … "9" ) { digit } .
+```
+
+### Floats
+
+InfluxQL supports floating-point literals.  Exponents are not currently supported.
+
+```
+float_lit           = int_lit "." int_lit .
+```
+
+### Strings
+
+String literals must be surrounded by single quotes. Strings may contain `'` characters as long as they are escaped (i.e., `\'`).
+
+```
+string_lit          = `'` { unicode_char } `'`' .
+```
+
+### Durations
+
+Duration literals specify a length of time.  An integer literal followed immediately (with no spaces) by a duration unit listed below is interpreted as a duration literal.
+
+```
+Duration unit definitions
+-------------------------
+| Units  | Meaning                                 |
+|--------|-----------------------------------------|
+| u or µ | microseconds (1 millionth of a second)  |
+| ms     | milliseconds (1 thousandth of a second) |
+| s      | second                                  |
+| m      | minute                                  |
+| h      | hour                                    |
+| d      | day                                     |
+| w      | week                                    |
+```
+
+```
+duration_lit        = int_lit duration_unit .
+duration_unit       = "u" | "µ" | "s" | "h" | "d" | "w" | "ms" .
+```
+
+### Dates & Times
+
+The date and time literal format is not specified in EBNF like the rest of this document.  It is specified using Go's date / time parsing format, which is a reference date written in the format required by InfluxQL.  The reference date time is:
+
+InfluxQL reference date time: January 2nd, 2006 at 3:04:05 PM
+
+```
+time_lit            = "2006-01-02 15:04:05.999999" | "2006-01-02"
+```
+
+### Booleans
+
+```
+bool_lit            = TRUE | FALSE .
+```
+
+### Regular Expressions
+
+```
+regex_lit           = "/" { unicode_char } "/" .
+```
+
+## Queries
+
+A query is composed of one or more statements separated by a semicolon.
+
+```
+query               = statement { ; statement } .
+
+statement           = alter_retention_policy_stmt |
+                      create_continuous_query_stmt |
+                      create_database_stmt |
+                      create_retention_policy_stmt |
+                      create_user_stmt |
+                      delete_stmt |
+                      drop_continuous_query_stmt |
+                      drop_database_stmt |
+                      drop_measurement_stmt |
+                      drop_retention_policy_stmt |
+                      drop_series_stmt |
+                      drop_user_stmt |
+                      grant_stmt |
+                      show_continuous_queries_stmt |
+                      show_databases_stmt |
+                      show_field_keys_stmt |
+                      show_measurements_stmt |
+                      show_retention_policies |
+                      show_series_stmt |
+                      show_tag_keys_stmt |
+                      show_tag_values_stmt |
+                      show_users_stmt |
+                      revoke_stmt |
+                      select_stmt .
+```
+
+## Statements
+
+### ALTER RETENTION POLICY
+
+```
+alter_retention_policy_stmt  = "ALTER RETENTION POLICY" policy_name "ON"
+                               db_name retention_policy_option
+                               [ retention_policy_option ]
+                               [ retention_policy_option ] .
+
+db_name                      = identifier .
+
+policy_name                  = identifier .
+
+retention_policy_option      = retention_policy_duration |
+                               retention_policy_replication |
+                               "DEFAULT" .
+
+retention_policy_duration    = "DURATION" duration_lit .
+retention_policy_replication = "REPLICATION" int_lit
+```
+
+#### Examples:
+
+```sql
+-- Set default retention policy for mydb to 1h.cpu.
+ALTER RETENTION POLICY "1h.cpu" ON mydb DEFAULT;
+
+-- Change duration and replication factor.
+ALTER RETENTION POLICY policy1 ON somedb DURATION 1h REPLICATION 4
+```
+
+### CREATE CONTINUOUS QUERY
+
+```
+create_continuous_query_stmt = "CREATE CONTINUOUS QUERY" query_name "ON" db_name
+                               "BEGIN" select_stmt "END" .
+
+query_name                   = identifier .
+```
+
+#### Examples:
+
+```sql
+-- selects from default retention policy and writes into 6_months retention policy
+CREATE CONTINUOUS QUERY "10m_event_count"
+ON db_name
+BEGIN
+  SELECT count(value)
+  INTO "6_months".events
+  FROM events
+  GROUP BY time(10m)
+END;
+
+-- this selects from the output of one continuous query in one retention policy and outputs to another series in another retention policy
+CREATE CONTINUOUS QUERY "1h_event_count"
+ON db_name
+BEGIN
+  SELECT sum(count) as count
+  INTO "2_years".events
+  FROM "6_months".events
+  GROUP BY time(1h)
+END;
+```
+
+### CREATE DATABASE
+
+```
+create_database_stmt = "CREATE DATABASE" db_name
+```
+
+#### Example:
+
+```sql
+CREATE DATABASE foo
+```
+
+### CREATE RETENTION POLICY
+
+```
+create_retention_policy_stmt = "CREATE RETENTION POLICY" policy_name "ON"
+                               db_name retention_policy_duration
+                               retention_policy_replication
+                               [ "DEFAULT" ] .
+```
+
+#### Examples
+
+```sql
+-- Create a retention policy.
+CREATE RETENTION POLICY "10m.events" ON somedb DURATION 10m REPLICATION 2;
+
+-- Create a retention policy and set it as the default.
+CREATE RETENTION POLICY "10m.events" ON somedb DURATION 10m REPLICATION 2 DEFAULT;
+```
+
+### CREATE USER
+
+```
+create_user_stmt = "CREATE USER" user_name "WITH PASSWORD" password
+                   [ "WITH ALL PRIVILEGES" ] .
+```
+
+#### Examples:
+
+```sql
+-- Create a normal database user.
+CREATE USER jdoe WITH PASSWORD '1337password';
+
+-- Create a cluster admin.
+-- Note: Unlike the GRANT statement, the "PRIVILEGES" keyword is required here.
+CREATE USER jdoe WITH PASSWORD '1337password' WITH ALL PRIVILEGES;
+```
+
+### DELETE
+
+```
+delete_stmt  = "DELETE" from_clause where_clause .
+```
+
+#### Example:
+
+```sql
+-- delete data points from the cpu measurement where the region tag
+-- equals 'uswest'
+DELETE FROM cpu WHERE region = 'uswest';
+```
+
+### DROP CONTINUOUS QUERY
+
+drop_continuous_query_stmt = "DROP CONTINUOUS QUERY" query_name .
+
+#### Example:
+
+```sql
+DROP CONTINUOUS QUERY myquery;
+```
+
+### DROP DATABASE
+
+drop_database_stmt = "DROP DATABASE" db_name .
+
+#### Example:
+
+```sql
+DROP DATABASE mydb;
+```
+
+### DROP MEASUREMENT
+
+```
+drop_measurement_stmt = "DROP MEASUREMENT" measurement .
+```
+
+#### Examples:
+
+```sql
+-- drop the cpu measurement
+DROP MEASUREMENT cpu;
+```
+
+### DROP RETENTION POLICY
+
+```
+drop_retention_policy_stmt = "DROP RETENTION POLICY" policy_name "ON" db_name .
+```
+
+#### Example:
+
+```sql
+-- drop the retention policy named 1h.cpu from mydb
+DROP RETENTION POLICY "1h.cpu" ON mydb;
+```
+
+### DROP SERIES
+
+```
+drop_series_stmt = "DROP SERIES" [ from_clause ] [ where_clause ]
+```
+
+#### Example:
+
+```sql
+
+```
+
+### DROP USER
+
+```
+drop_user_stmt = "DROP USER" user_name .
+```
+
+#### Example:
+
+```sql
+DROP USER jdoe;
+
+```
+
+### GRANT
+
+NOTE: Users can be granted privileges on databases that do not exist.
+
+```
+grant_stmt = "GRANT" privilege [ on_clause ] to_clause
+```
+
+#### Examples:
+
+```sql
+-- grant cluster admin privileges
+GRANT ALL TO jdoe;
+
+-- grant read access to a database
+GRANT READ ON mydb TO jdoe;
+```
+
+### SHOW CONTINUOUS QUERIES
+
+show_continuous_queries_stmt = "SHOW CONTINUOUS QUERIES"
+
+#### Example:
+
+```sql
+-- show all continuous queries
+SHOW CONTINUOUS QUERIES;
+```
+
+### SHOW DATABASES
+
+```
+show_databases_stmt = "SHOW DATABASES" .
+```
+
+#### Example:
+
+```sql
+-- show all databases
+SHOW DATABASES;
+```
+
+### SHOW FIELD
+
+show_field_keys_stmt = "SHOW FIELD KEYS" [ from_clause ] .
+
+#### Examples:
+
+```sql
+-- show field keys from all measurements
+SHOW FIELD KEYS;
+
+-- show field keys from specified measurement
+SHOW FIELD KEYS FROM cpu;
+```
+
+### SHOW MEASUREMENTS
+
+show_measurements_stmt = [ where_clause ] [ group_by_clause ] [ limit_clause ]
+                         [ offset_clause ] .
+
+```sql
+-- show all measurements
+SHOW MEASUREMENTS;
+
+-- show measurements where region tag = 'uswest' AND host tag = 'serverA'
+SHOW MEASUREMENTS WHERE region = 'uswest' AND host = 'serverA';
+```
+
+### SHOW RETENTION POLICIES
+
+```
+show_retention_policies = "SHOW RETENTION POLICIES" db_name .
+```
+
+#### Example:
+
+```sql
+-- show all retention policies on a database
+SHOW RETENTION POLICIES mydb;
+```
+
+### SHOW SERIES
+
+```
+show_series_stmt = [ from_clause ] [ where_clause ] [ group_by_clause ]
+                   [ limit_clause ] [ offset_clause ] .
+```
+
+#### Example:
+
+```sql
+
+```
+
+### SHOW TAG KEYS
+
+```
+show_tag_keys_stmt = [ from_clause ] [ where_clause ] [ group_by_clause ]
+                     [ limit_clause ] [ offset_clause ] .
+```
+
+#### Examples:
+
+```sql
+-- show all tag keys
+SHOW TAG KEYS;
+
+-- show all tag keys from the cpu measurement
+SHOW TAG KEYS FROM cpu;
+
+-- show all tag keys from the cpu measurement where the region key = 'uswest'
+SHOW TAG KEYS FROM cpu WHERE region = 'uswest';
+
+-- show all tag keys where the host key = 'serverA'
+SHOW TAG KEYS WHERE host = 'serverA';
+```
+
+### SHOW TAG VALUES
+
+```
+show_tag_values_stmt = [ from_clause ] with_tag_clause [ where_clause ]
+                       [ group_by_clause ] [ limit_clause ] [ offset_clause ] .
+```
+
+#### Examples:
+
+```sql
+-- show all tag values across all measurements for the region tag
+SHOW TAG VALUES WITH TAG = 'region';
+
+-- show tag values from the cpu measurement for the region tag
+SHOW TAG VALUES FROM cpu WITH TAG = 'region';
+
+-- show tag values from the cpu measurement for region & host tag keys where service = 'redis'
+SHOW TAG VALUES FROM cpu WITH TAG IN (region, host) WHERE service = 'redis';
+```
+
+### SHOW USERS
+
+```
+show_users_stmt = "SHOW USERS" .
+```
+
+#### Example:
+
+```sql
+-- show all users
+SHOW USERS;
+```
+
+### REVOKE
+
+```
+revoke_stmt = privilege [ "ON" db_name ] "FROM" user_name
+```
+
+#### Examples:
+
+```sql
+-- revoke cluster admin from jdoe
+REVOKE ALL PRIVILEGES FROM jdoe;
+
+-- revoke read privileges from jdoe on mydb
+REVOKE READ ON mydb FROM jdoe;
+```
+
+### SELECT
+
+```
+select_stmt = fields from_clause [ into_clause ] [ where_clause ]
+              [ group_by_clause ] [ order_by_clause ] [ limit_clause ]
+              [ offset_clause ] [ slimit_clause ] [ soffset_clause ].
+```
+
+#### Examples:
+
+```sql
+-- select mean value from the cpu measurement where region = 'uswest' grouped by 10 minute intervals
+SELECT mean(value) FROM cpu WHERE region = 'uswest' GROUP BY time(10m) fill(0);
+```
+
+## Clauses
+
+```
+from_clause     = "FROM" measurements .
+
+group_by_clause = "GROUP BY" dimensions fill(<option>).
+
+limit_clause    = "LIMIT" int_lit .
+
+offset_clause   = "OFFSET" int_lit .
+
+slimit_clause    = "SLIMIT" int_lit .
+
+soffset_clause   = "SOFFSET" int_lit .
+
+on_clause       = db_name .
+
+order_by_clause = "ORDER BY" sort_fields .
+
+to_clause       = user_name .
+
+where_clause    = "WHERE" expr .
+```
+
+## Expressions
+
+```
+binary_op        = "+" | "-" | "*" | "/" | "AND" | "OR" | "=" | "!=" | "<" |
+                   "<=" | ">" | ">=" .
+
+expr             = unary_expr { binary_op unary_expr } .
+
+unary_expr       = "(" expr ")" | var_ref | time_lit | string_lit | int_lit |
+                   float_lit | bool_lit | duration_lit | regex_lit .
+```
+
+## Other
+
+```
+dimension         = expr .
+
+dimensions        = dimension { "," dimension } .
+
+field            = expr [ alias ] .
+
+fields           = field { "," field } .
+
+measurement      = measurement_name |
+                   ( policy_name "." measurement_name ) |
+                   ( db_name "." [ policy_name ] "." measurement_name ) .
+
+measurements     = measurement { "," measurement } .
+
+measurement_name = identifier .
+
+password         = identifier .
+
+policy_name      = identifier .
+
+privilege        = "ALL" [ "PRIVILEGES" ] | "READ" | "WRITE" .
+
+series_id        = int_lit .
+
+sort_field       = field_name [ ASC | DESC ] .
+
+sort_fields      = sort_field { "," sort_field } .
+
+user_name        = identifier .
+```
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/NOTES b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/NOTES
new file mode 100644
index 00000000000..35de158bfec
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/NOTES
@@ -0,0 +1,682 @@
+SELECT mean(value) FROM cpu
+WHERE service = 'redis'
+GROUP BY region, time(10m)
+
+
+based on group by, get unique tag sets for region
+cpu region=uswest -> get series ids from cpu where <tagset> and <where cond>
+cpu region=useast -> get series ids from cpu where <tagset> and <where cond>
+
+for each shard group in time range {
+  for each group by tagset {
+    shardItrs := map[shard]itr
+    for id := range seriesIds {
+      shard := group.shardForId(id)
+      shardItrs[shard].addId(id)
+    }
+    for _, itr := range shardItrs {
+      itr.tags = tagset
+      itr.name = cpu
+    }
+  }
+}
+
+(host = 'serverA' AND value > 100) OR (region = 'uswest' AND value < 10)
+
+value > 100 OR value < 10  (host=serverA, region=uswest)
+value < 10                 (host!=serverA, region=uswest)
+value > 100
+
+
+filters := make(map[whereCond]seriesIds)
+filters := make(map[uint32]whereCond)
+seriesIds
+
+
+select mean(value) from foo WHERE someField = 'important' group by time(5m)
+
+===================
+
+
+
+select derivative(mean(value))
+from cpu
+group by time(5m)
+
+select mean(value) from cpu group by time(5m)
+select top(10, value) from cpu group by host where time > now() - 1h
+
+this query uses this type of cycle
+-------REMOTE HOST ------------- -----HOST THAT GOT QUERY ---
+map -> reduce -> combine -> map  -> reduce -> combine -> user
+
+select mean(value) cpu group by time(5m), host where time > now() -4h
+map -> reduce -> combine -> user
+map -> reduce -> map -> reduce -> combine -> user
+map -> reduce -> combine -> map -> reduce -> combine -> user
+
+
+select value from
+(
+	select mean(value) AS value FROM cpu GROUP BY time(5m)
+)
+
+[
+{
+	name: cpu,
+	tags: {
+		host: servera,
+	},
+	columns: [time, mean],
+	values : [
+		[23423423, 88.8]
+	]
+},
+{
+	name: cpu,
+	tags: {
+		host: serverb,
+	}
+}
+]
+
+
+================================================================================
+
+// list series ->
+/*
+[
+	{
+		"name": "cpu",
+		"columns": ["id", "region", "host"],
+		"values": [
+			1, "uswest", "servera",
+			2, "uswest", "serverb"
+		]
+	},
+	{
+		""
+	}
+]
+
+list series where region = 'uswest'
+
+list tags where name = 'cpu'
+
+list tagKeys where name = 'cpu'
+
+list series where name = 'cpu' and region = 'uswest'
+
+select distinct(region) from cpu
+
+list names
+list tagKeys
+
+list tagValeus where tagKey = 'region' and time > now() -1h
+
+select a.value, b.value from a join b where a.user_id == 100
+  select a.value from a where a.user_id == 100
+  select b.value from b
+
+          3                1              2
+select sum(a.value) + (sum(b.value) / min(b.value)) from a join b group by region
+
+	select suM(a.value) from a group by time(5m)
+	select sum(b.value) from b group by time(5m)
+
+execute sum MR on series [23, 65, 88, 99, 101, 232]
+
+map -> 1 tick per 5m
+reduce -> combines ticks per 5m interval -> outputs
+
+planner -> take reduce output per 5m interval from the two reducers
+           and combine with the join function, which is +
+
+[1,/,2,+,3]
+
+
+
+for v := s[0].Next(); v != nil; v = 2[0].Next() {
+	var result interface{}
+	for i := 1; i < len(s); i += 2 {
+		/ it's an operator
+		if i % 2 == 1 {
+
+		}
+	}
+}
+
+select count(distinct(host)) from cpu where time > now() - 5m
+
+type mapper interface {
+	Map(iterator)
+}
+
+type floatCountMapper struct {}
+func(m *floatCountMapper) Map(i Iterator) {
+	itr := i.(*floatIterator)
+}
+
+type Iterator interface {
+	itr()
+}
+
+type iterator struct {
+	cursor *bolt.Cursor
+	timeBucket time.Time
+	name string
+	seriesID uint32
+	tags map[string]string
+	fieldID uint8
+	where *WhereClause
+}
+
+func (i *intIterator) itr() {}
+func (i *intIterator) Next() (k int64, v float64) {
+	// loop through bolt cursor applying where clause and yield next point
+	// if cursor is at end or time is out of range, yield nil
+}
+
+*/
+
+
+
+
+
+field: ipaddress
+
+select top(10, count, ipaddress) from hits group by time(5m), host
+
+map -> 10 records, <key(time,host)>, <value(count,ipaddresses)>
+
+reducer -> take in all map outputs for each 5m bucket
+  combine them, sort, take out the top 10
+  output -> 10 records, count, ipaddresses, time
+
+
+==========
+
+select top(10, count, host) from hits group by time(5m)
+
+select host, value from cpu where time > now() - 1h
+
+select last(value) from cpu group by time(auto), host fill(previous) where time > now() - 1h
+
+select sum(value) from cpu group by host where time > now() - 1h
+
+
+
+
+
+
+select sum(value) from cpu where time > now() - 1h
+
+select * from a;
+
+[
+{
+	"name": "cpu",
+	"tags": {
+		"host": "servera"
+	},
+	"fields": [
+		"time",
+		"count",
+		"ipaddress"
+	]
+	"values": [
+		[t, v, "123.23.22.2"],
+		[t, v, "192.232.2.2"],
+		
+	]
+},
+{
+	"name": "cpu",
+	"tags": {
+		"host": "serverb"
+	},
+	"values": [
+		[t, v],
+		[t + 1, v],
+		
+	]
+},
+]
+
+[t, v, "servera"]
+[t, v, "serverb"]
+[t+1, v, "servera"]
+[t+1, v, "serverb"]
+
+======
+
+a INNER JOIN b
+
+- planner always has "group by"
+
+select count(errors.value) / count(requests.value) as error_rate
+from errors join requests as "mysuperseries"
+group by time(5m)
+fill(previous)
+where time > now() - 4h
+
+select mean(value) as cpu_mean from cpu group by time(5m) where host = 'servera'
+
+select count(value) from errors group by time(5m) fill(previous) where..
+select count(value) from requests group by time(5m) fill(previ...
+
+{
+	"name": "errors.requests",
+	"tags": {},
+	"fields": ["time", "errors.count", "requests.count"],
+	"values": [
+		[t, n, m]
+	]
+}
+
+
+a MERGE b
+
+a - t
+b - t
+a - t + 1
+b - t + 1
+b - t + 2
+a - t + 3
+
+<cpu, host>
+
+select value from cpu
+select mean(value) from cpu group by time(5m)
+
+select first(value) from cpu
+
+
+=====
+
+1. Group by time
+2. Group by
+3. Raw
+
+======
+
+SELECT sum(value) FROM myseries
+
+host=servera
+host=serverb
+
+{"host":"servera", "value":100}
+{"host":"serverb", "value":"hello!"}
+
+
+series = <name, tags>
+series = seriesID
+
+seriesID -> name
+
+name has_many seriesIDs
+name has_many fields
+
+field -> (type, id)
+
+<seriesName,fieldID> -> (type, id)
+
+
+<seriesID, time> -> fieldValues
+
+
+field
+
+type topMapper struct {
+	count int
+}
+
+func newTopMaper(count int) {
+	
+}
+
+func (t *topCountMapper) Map(i Iterator) {
+	topValues := make(map[string]int)
+	for p := i.Next(); p != nil; p = i.Next() {
+		topValues[p.String()] += 1
+	}
+	for k, v := range topValues {
+	 t.job.Emit(k, v)
+	}
+}
+
+type topCountReducer struct {
+	count int	
+}
+
+func (r *topCountReducer) Reduce(i Iterator) {
+	realzTop10 := make(map[string]int)
+	for v := i.Next(); v != nil; v = i.Next() {
+		top10 := v.(map[string]int)
+		for k, n := range top10 {
+			realzTop10[k] += n
+		}
+	}
+	realyrealTop10 := make(map[string]int)
+	// do sorty magic on reazTop10 and set realyreal
+	r.job.Emit(realyrealTop10)
+}
+
+type Transformer interface {
+	Transform(interface{}) Series
+}
+
+type ReduceOutput struct {
+	values [][]interface{}
+	fieldIDs [] 
+}
+
+// for topCountReducer ReduceOutput would look like
+// values = [t, c, "some string"]
+// fieldIDs = [0, 0, 3]
+
+SELECT val1, val2 FROM abc
+
+
+select mean(value) from cpu where region='uswest' group by time(5m), host
+
+2000 series
+
+200 series to each machine
+
+
+
+================================================================================
+
+
+
+type Mapper interface {
+	Map(Iterator)
+}
+
+
+type countMapper struct {}
+
+// Iterator is the entire series if not an aggregate query
+// or iterator is the entire time bucket if an aggregate query
+func (m *sumMapper) Map(i Iterator) {
+	var sum int
+	for p := i.Next(); p != nil; p = i.Next() {
+		sum += p.Float()
+	}
+	m.Emitter.Emit(k, sum)
+}
+
+type Point interface {
+	String(name)
+	Int(name)
+}
+
+type cursorIterator struct {
+	Cursor *bolt.Cursor
+	FieldID uint8
+	Value []byte
+}
+
+func (i cursorIterator) Next() Point {
+	_, i.Value = i.Cursor.Next()
+	return byteSlicePoint(i.Value)
+}
+
+type byteSlicePoint []byte
+
+func (p byteSlicePoint) String() string {
+	// unmarshal from byte slice.
+}
+
+/*
+{
+	"name": "foo",
+	"fields": {
+		"value": 23.2,
+		"user_id": 23
+	},
+	"tags": {
+
+	}
+}
+*/
+
+
+CNT  ID0  VALUEVALUEVALUEVALUEVALUEVALUEVALUEVALU
+0001 0000 0000 0000 0000 0000 0000 0000 0000 0000
+
+CNT  ID0  ID1  ID2  FLOATFLOA STRINGSTR STRINGSTR
+0002 0001 0002 0003 0000 0000 0000 0000 0000 0000
+
+
+
+// SELECT count() FROM cpu GROUP BY host
+
+// SELECT mean(value) from cpu where region = 'uswest'
+
+// SELECT derivative(value) from redis_key_count GROUP BY time(5m)
+
+
+// SELECT host, mean(value)
+// FROM cpu
+// GROUP BY host
+// HAVING top(20, mean)
+// WHERE time > now() - 1h
+// AND region = 'uswest'
+
+// SELECT ipaddress, count(ipaddress)
+// FROM hits
+// GROUP BY ipaddress
+// HAVING top(10, count)
+// WHERE time > now() - 1h
+
+
+series := meta.DistinctTagValues("cpu", "host")
+
+tye Series struct {
+	name string
+	fields map[uint8]string
+}
+
+type SeriesData struct {
+	ID
+	tags map[string]string
+}
+
+<id, time, value>
+
+mrJobs := make([]*MRJob, 0, len(series))
+for _, s := range series {
+	j := NewMRJob(s)
+	mrJobs = append(mrJobs, j)
+	j.Execute()
+}
+
+for _, j := range mrJobs {
+	// pull in results
+	// construct series object with same tags as series
+}
+
+
+================================================================================
+
+
+
+type Mapper interface {
+	Map(Iterator)
+}
+
+
+type countMapper struct {}
+
+// Iterator is the entire series if not an aggregate query
+// or iterator is the entire time bucket if an aggregate query
+func (m *sumMapper) Map(i Iterator) {
+	var sum int
+	for p := i.Next(); p != nil; p = i.Next() {
+		sum += p.Float()
+	}
+	m.Emitter.Emit(k, sum)
+}
+
+type Point interface {
+	String(name)
+	Int(name)
+}
+
+type cursorIterator struct {
+	Cursor *bolt.Cursor
+	FieldID uint8
+	Value []byte
+}
+
+func (i cursorIterator) Next() Point {
+	_, i.Value = i.Cursor.Next()
+	return byteSlicePoint(i.Value)
+}
+
+type byteSlicePoint []byte
+
+func (p byteSlicePoint) String() string {
+	// unmarshal from byte slice.
+}
+
+/*
+{
+	"name": "foo",
+	"fields": {
+		"value": 23.2,
+		"user_id": 23
+	},
+	"tags": {
+
+	}
+}
+*/
+
+
+CNT  ID0  VALUEVALUEVALUEVALUEVALUEVALUEVALUEVALU
+0001 0000 0000 0000 0000 0000 0000 0000 0000 0000
+
+CNT  ID0  ID1  ID2  FLOATFLOA STRINGSTR STRINGSTR
+0002 0001 0002 0003 0000 0000 0000 0000 0000 0000
+
+
+
+// SELECT count() FROM cpu GROUP BY host
+
+// SELECT mean(value) from cpu where region = 'uswest'
+
+// SELECT derivative(value) from redis_key_count GROUP BY time(5m)
+
+
+// SELECT host, mean(value)
+// FROM cpu
+// GROUP BY host
+// HAVING top(20, mean)
+// WHERE time > now() - 1h
+// AND region = 'uswest'
+
+// SELECT ipaddress, count(ipaddress)
+// FROM hits
+// GROUP BY ipaddress
+// HAVING top(10, count)
+// WHERE time > now() - 1h
+
+
+series := meta.DistinctTagValues("cpu", "host")
+
+mrJobs := make([]*MRJob, 0, len(series))
+for _, s := range series {
+	j := NewMRJob(s)
+	mrJobs = append(mrJobs, j)
+	j.Execute()
+}
+
+for _, j := range mrJobs {
+	// pull in results
+	// construct series object with same tags as series
+}
+
+
+================================================================================
+
+
+type Iterator interface {
+	Next() (interface{}, bool)
+}
+
+type iteratorCounter struct {
+	iterator Iterator
+}
+
+func (iteratorCounter) Next() {
+	
+}
+
+
+SELECT max(a.value), min(a.value), max(b.value)
+FROM a, b
+WHERE a.host = 'influxdb.org'
+
+
+grouper {
+	[]Iterator
+}
+
+
+SELECT max(a.value) FROM a WHERE a.host = 'influxdb.org'   --> 1 value
+SELECT min(a.value) FROM a WHERE a.host = 'influxdb.org'   --> 1 value
+SELECT max(b.value) FROM b                                 --> 1 value
+
+
+SELECT max(a.value) FROM a GROUP BY time WHERE a.host = 'influxdb.org'   --> key,value
+
+
+timeGrouper {
+	[]Iterator
+}
+
+
+type maxMapper struct {
+}
+
+IntervalIterator {
+}
+
+
+
+maxMapper.Map(Iterator) 
+
+
+
+- GROUP BY time
+- GROUP BY time, <tag>
+- GROUP BY <tag>
+
+
+
+
+
+COUNT(field)
+MIN(field)
+MAX(field)
+MEAN(field)
+MODE(field)
+MEDIAN(field)
+COUNT(DISTINCT field)
+PERCENTILE(field, N)
+HISTOGRAM(field [, bucketSize])
+DERIVATIVE(field)
+SUM(field)
+STDDEV(field)
+FIRST(field)
+LAST(field)
+DIFFERENCE(field)
+TOP(field, N)
+BOTTOM(field, N)  <----- multivalue
+
+
+
+================================================================================
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/ast.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/ast.go
new file mode 100644
index 00000000000..0942b779295
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/ast.go
@@ -0,0 +1,3016 @@
+package influxql
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"regexp"
+	"sort"
+	"strconv"
+	"strings"
+	"time"
+)
+
+// DataType represents the primitive data types available in InfluxQL.
+type DataType int
+
+const (
+	// Unknown primitive data type.
+	Unknown DataType = 0
+	// Float means the data type is a float
+	Float = 1
+	// Integer means the data type is a integer
+	Integer = 2
+	// Boolean means the data type is a boolean.
+	Boolean = 3
+	// String means the data type is a string of text.
+	String = 4
+	// Time means the data type is a time.
+	Time = 5
+	// Duration means the data type is a duration of time.
+	Duration = 6
+)
+
+// InspectDataType returns the data type of a given value.
+func InspectDataType(v interface{}) DataType {
+	switch v.(type) {
+	case float64:
+		return Float
+	case int64, int32, int:
+		return Integer
+	case bool:
+		return Boolean
+	case string:
+		return String
+	case time.Time:
+		return Time
+	case time.Duration:
+		return Duration
+	default:
+		return Unknown
+	}
+}
+
+func (d DataType) String() string {
+	switch d {
+	case Float:
+		return "float"
+	case Integer:
+		return "integer"
+	case Boolean:
+		return "boolean"
+	case String:
+		return "string"
+	case Time:
+		return "time"
+	case Duration:
+		return "duration"
+	}
+	return "unknown"
+}
+
+// Node represents a node in the InfluxDB abstract syntax tree.
+type Node interface {
+	node()
+	String() string
+}
+
+func (*Query) node()     {}
+func (Statements) node() {}
+
+func (*AlterRetentionPolicyStatement) node()  {}
+func (*CreateContinuousQueryStatement) node() {}
+func (*CreateDatabaseStatement) node()        {}
+func (*CreateRetentionPolicyStatement) node() {}
+func (*CreateUserStatement) node()            {}
+func (*Distinct) node()                       {}
+func (*DeleteStatement) node()                {}
+func (*DropContinuousQueryStatement) node()   {}
+func (*DropDatabaseStatement) node()          {}
+func (*DropMeasurementStatement) node()       {}
+func (*DropRetentionPolicyStatement) node()   {}
+func (*DropSeriesStatement) node()            {}
+func (*DropUserStatement) node()              {}
+func (*GrantStatement) node()                 {}
+func (*GrantAdminStatement) node()            {}
+func (*RevokeStatement) node()                {}
+func (*RevokeAdminStatement) node()           {}
+func (*SelectStatement) node()                {}
+func (*SetPasswordUserStatement) node()       {}
+func (*ShowContinuousQueriesStatement) node() {}
+func (*ShowGrantsForUserStatement) node()     {}
+func (*ShowServersStatement) node()           {}
+func (*ShowDatabasesStatement) node()         {}
+func (*ShowFieldKeysStatement) node()         {}
+func (*ShowRetentionPoliciesStatement) node() {}
+func (*ShowMeasurementsStatement) node()      {}
+func (*ShowSeriesStatement) node()            {}
+func (*ShowStatsStatement) node()             {}
+func (*ShowDiagnosticsStatement) node()       {}
+func (*ShowTagKeysStatement) node()           {}
+func (*ShowTagValuesStatement) node()         {}
+func (*ShowUsersStatement) node()             {}
+
+func (*BinaryExpr) node()      {}
+func (*BooleanLiteral) node()  {}
+func (*Call) node()            {}
+func (*Dimension) node()       {}
+func (Dimensions) node()       {}
+func (*DurationLiteral) node() {}
+func (*Field) node()           {}
+func (Fields) node()           {}
+func (*Measurement) node()     {}
+func (Measurements) node()     {}
+func (*nilLiteral) node()      {}
+func (*NumberLiteral) node()   {}
+func (*ParenExpr) node()       {}
+func (*RegexLiteral) node()    {}
+func (*SortField) node()       {}
+func (SortFields) node()       {}
+func (Sources) node()          {}
+func (*StringLiteral) node()   {}
+func (*Target) node()          {}
+func (*TimeLiteral) node()     {}
+func (*VarRef) node()          {}
+func (*Wildcard) node()        {}
+
+// Query represents a collection of ordered statements.
+type Query struct {
+	Statements Statements
+}
+
+// String returns a string representation of the query.
+func (q *Query) String() string { return q.Statements.String() }
+
+// Statements represents a list of statements.
+type Statements []Statement
+
+// String returns a string representation of the statements.
+func (a Statements) String() string {
+	var str []string
+	for _, stmt := range a {
+		str = append(str, stmt.String())
+	}
+	return strings.Join(str, ";\n")
+}
+
+// Statement represents a single command in InfluxQL.
+type Statement interface {
+	Node
+	stmt()
+	RequiredPrivileges() ExecutionPrivileges
+}
+
+// HasDefaultDatabase provides an interface to get the default database from a Statement.
+type HasDefaultDatabase interface {
+	Node
+	stmt()
+	DefaultDatabase() string
+}
+
+// ExecutionPrivilege is a privilege required for a user to execute
+// a statement on a database or resource.
+type ExecutionPrivilege struct {
+	// Admin privilege required.
+	Admin bool
+
+	// Name of the database.
+	Name string
+
+	// Database privilege required.
+	Privilege Privilege
+}
+
+// ExecutionPrivileges is a list of privileges required to execute a statement.
+type ExecutionPrivileges []ExecutionPrivilege
+
+func (*AlterRetentionPolicyStatement) stmt()  {}
+func (*CreateContinuousQueryStatement) stmt() {}
+func (*CreateDatabaseStatement) stmt()        {}
+func (*CreateRetentionPolicyStatement) stmt() {}
+func (*CreateUserStatement) stmt()            {}
+func (*DeleteStatement) stmt()                {}
+func (*DropContinuousQueryStatement) stmt()   {}
+func (*DropDatabaseStatement) stmt()          {}
+func (*DropMeasurementStatement) stmt()       {}
+func (*DropRetentionPolicyStatement) stmt()   {}
+func (*DropSeriesStatement) stmt()            {}
+func (*DropUserStatement) stmt()              {}
+func (*GrantStatement) stmt()                 {}
+func (*GrantAdminStatement) stmt()            {}
+func (*ShowContinuousQueriesStatement) stmt() {}
+func (*ShowGrantsForUserStatement) stmt()     {}
+func (*ShowServersStatement) stmt()           {}
+func (*ShowDatabasesStatement) stmt()         {}
+func (*ShowFieldKeysStatement) stmt()         {}
+func (*ShowMeasurementsStatement) stmt()      {}
+func (*ShowRetentionPoliciesStatement) stmt() {}
+func (*ShowSeriesStatement) stmt()            {}
+func (*ShowStatsStatement) stmt()             {}
+func (*ShowDiagnosticsStatement) stmt()       {}
+func (*ShowTagKeysStatement) stmt()           {}
+func (*ShowTagValuesStatement) stmt()         {}
+func (*ShowUsersStatement) stmt()             {}
+func (*RevokeStatement) stmt()                {}
+func (*RevokeAdminStatement) stmt()           {}
+func (*SelectStatement) stmt()                {}
+func (*SetPasswordUserStatement) stmt()       {}
+
+// Expr represents an expression that can be evaluated to a value.
+type Expr interface {
+	Node
+	expr()
+}
+
+func (*BinaryExpr) expr()      {}
+func (*BooleanLiteral) expr()  {}
+func (*Call) expr()            {}
+func (*Distinct) expr()        {}
+func (*DurationLiteral) expr() {}
+func (*nilLiteral) expr()      {}
+func (*NumberLiteral) expr()   {}
+func (*ParenExpr) expr()       {}
+func (*RegexLiteral) expr()    {}
+func (*StringLiteral) expr()   {}
+func (*TimeLiteral) expr()     {}
+func (*VarRef) expr()          {}
+func (*Wildcard) expr()        {}
+
+// Source represents a source of data for a statement.
+type Source interface {
+	Node
+	source()
+}
+
+func (*Measurement) source() {}
+
+// Sources represents a list of sources.
+type Sources []Source
+
+// String returns a string representation of a Sources array.
+func (a Sources) String() string {
+	var buf bytes.Buffer
+
+	ubound := len(a) - 1
+	for i, src := range a {
+		_, _ = buf.WriteString(src.String())
+		if i < ubound {
+			_, _ = buf.WriteString(", ")
+		}
+	}
+
+	return buf.String()
+}
+
+// SortField represents a field to sort results by.
+type SortField struct {
+	// Name of the field
+	Name string
+
+	// Sort order.
+	Ascending bool
+}
+
+// String returns a string representation of a sort field
+func (field *SortField) String() string {
+	var buf bytes.Buffer
+	if field.Name == "" {
+		_, _ = buf.WriteString(field.Name)
+		_, _ = buf.WriteString(" ")
+	}
+	if field.Ascending {
+		_, _ = buf.WriteString("ASC")
+	} else {
+		_, _ = buf.WriteString("DESC")
+	}
+	return buf.String()
+}
+
+// SortFields represents an ordered list of ORDER BY fields
+type SortFields []*SortField
+
+// String returns a string representation of sort fields
+func (a SortFields) String() string {
+	fields := make([]string, 0, len(a))
+	for _, field := range a {
+		fields = append(fields, field.String())
+	}
+	return strings.Join(fields, ", ")
+}
+
+// CreateDatabaseStatement represents a command for creating a new database.
+type CreateDatabaseStatement struct {
+	// Name of the database to be created.
+	Name string
+}
+
+// String returns a string representation of the create database statement.
+func (s *CreateDatabaseStatement) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("CREATE DATABASE ")
+	_, _ = buf.WriteString(s.Name)
+	return buf.String()
+}
+
+// RequiredPrivileges returns the privilege required to execute a CreateDatabaseStatement.
+func (s *CreateDatabaseStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: true, Name: "", Privilege: AllPrivileges}}
+}
+
+// DropDatabaseStatement represents a command to drop a database.
+type DropDatabaseStatement struct {
+	// Name of the database to be dropped.
+	Name string
+}
+
+// String returns a string representation of the drop database statement.
+func (s *DropDatabaseStatement) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("DROP DATABASE ")
+	_, _ = buf.WriteString(s.Name)
+	return buf.String()
+}
+
+// RequiredPrivileges returns the privilege required to execute a DropDatabaseStatement.
+func (s *DropDatabaseStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: true, Name: "", Privilege: AllPrivileges}}
+}
+
+// DropRetentionPolicyStatement represents a command to drop a retention policy from a database.
+type DropRetentionPolicyStatement struct {
+	// Name of the policy to drop.
+	Name string
+
+	// Name of the database to drop the policy from.
+	Database string
+}
+
+// String returns a string representation of the drop retention policy statement.
+func (s *DropRetentionPolicyStatement) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("DROP RETENTION POLICY ")
+	_, _ = buf.WriteString(s.Name)
+	_, _ = buf.WriteString(" ON ")
+	_, _ = buf.WriteString(s.Database)
+	return buf.String()
+}
+
+// RequiredPrivileges returns the privilege required to execute a DropRetentionPolicyStatement.
+func (s *DropRetentionPolicyStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: false, Name: s.Database, Privilege: WritePrivilege}}
+}
+
+// CreateUserStatement represents a command for creating a new user.
+type CreateUserStatement struct {
+	// Name of the user to be created.
+	Name string
+
+	// User's password.
+	Password string
+
+	// User's admin privilege.
+	Admin bool
+}
+
+// String returns a string representation of the create user statement.
+func (s *CreateUserStatement) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("CREATE USER ")
+	_, _ = buf.WriteString(s.Name)
+	_, _ = buf.WriteString(" WITH PASSWORD ")
+	_, _ = buf.WriteString("[REDACTED]")
+	if s.Admin {
+		_, _ = buf.WriteString(" WITH ALL PRIVILEGES")
+	}
+	return buf.String()
+}
+
+// RequiredPrivileges returns the privilege(s) required to execute a CreateUserStatement.
+func (s *CreateUserStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: true, Name: "", Privilege: AllPrivileges}}
+}
+
+// DropUserStatement represents a command for dropping a user.
+type DropUserStatement struct {
+	// Name of the user to drop.
+	Name string
+}
+
+// String returns a string representation of the drop user statement.
+func (s *DropUserStatement) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("DROP USER ")
+	_, _ = buf.WriteString(s.Name)
+	return buf.String()
+}
+
+// RequiredPrivileges returns the privilege(s) required to execute a DropUserStatement.
+func (s *DropUserStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: true, Name: "", Privilege: AllPrivileges}}
+}
+
+// Privilege is a type of action a user can be granted the right to use.
+type Privilege int
+
+const (
+	// NoPrivileges means no privileges required / granted / revoked.
+	NoPrivileges Privilege = iota
+	// ReadPrivilege means read privilege required / granted / revoked.
+	ReadPrivilege
+	// WritePrivilege means write privilege required / granted / revoked.
+	WritePrivilege
+	// AllPrivileges means all privileges required / granted / revoked.
+	AllPrivileges
+)
+
+// NewPrivilege returns an initialized *Privilege.
+func NewPrivilege(p Privilege) *Privilege { return &p }
+
+// String returns a string representation of a Privilege.
+func (p Privilege) String() string {
+	switch p {
+	case NoPrivileges:
+		return "NO PRIVILEGES"
+	case ReadPrivilege:
+		return "READ"
+	case WritePrivilege:
+		return "WRITE"
+	case AllPrivileges:
+		return "ALL PRIVILEGES"
+	}
+	return ""
+}
+
+// GrantStatement represents a command for granting a privilege.
+type GrantStatement struct {
+	// The privilege to be granted.
+	Privilege Privilege
+
+	// Database to grant the privilege to.
+	On string
+
+	// Who to grant the privilege to.
+	User string
+}
+
+// String returns a string representation of the grant statement.
+func (s *GrantStatement) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("GRANT ")
+	_, _ = buf.WriteString(s.Privilege.String())
+	_, _ = buf.WriteString(" ON ")
+	_, _ = buf.WriteString(s.On)
+	_, _ = buf.WriteString(" TO ")
+	_, _ = buf.WriteString(s.User)
+	return buf.String()
+}
+
+// RequiredPrivileges returns the privilege required to execute a GrantStatement.
+func (s *GrantStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: true, Name: "", Privilege: AllPrivileges}}
+}
+
+// GrantAdminStatement represents a command for granting admin privilege.
+type GrantAdminStatement struct {
+	// Who to grant the privilege to.
+	User string
+}
+
+// String returns a string representation of the grant admin statement.
+func (s *GrantAdminStatement) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("GRANT ALL PRIVILEGES TO ")
+	_, _ = buf.WriteString(s.User)
+	return buf.String()
+}
+
+// RequiredPrivileges returns the privilege required to execute a GrantAdminStatement.
+func (s *GrantAdminStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: true, Name: "", Privilege: AllPrivileges}}
+}
+
+// SetPasswordUserStatement represents a command for changing user password.
+type SetPasswordUserStatement struct {
+	// Plain Password
+	Password string
+
+	// Who to grant the privilege to.
+	Name string
+}
+
+// String returns a string representation of the set password statement.
+func (s *SetPasswordUserStatement) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("SET PASSWORD FOR ")
+	_, _ = buf.WriteString(s.Name)
+	_, _ = buf.WriteString(" = ")
+	_, _ = buf.WriteString("[REDACTED]")
+	return buf.String()
+}
+
+// RequiredPrivileges returns the privilege required to execute a SetPasswordUserStatement.
+func (s *SetPasswordUserStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: true, Name: "", Privilege: AllPrivileges}}
+}
+
+// RevokeStatement represents a command to revoke a privilege from a user.
+type RevokeStatement struct {
+	// The privilege to be revoked.
+	Privilege Privilege
+
+	// Database to revoke the privilege from.
+	On string
+
+	// Who to revoke privilege from.
+	User string
+}
+
+// String returns a string representation of the revoke statement.
+func (s *RevokeStatement) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("REVOKE ")
+	_, _ = buf.WriteString(s.Privilege.String())
+	_, _ = buf.WriteString(" ON ")
+	_, _ = buf.WriteString(s.On)
+	_, _ = buf.WriteString(" FROM ")
+	_, _ = buf.WriteString(s.User)
+	return buf.String()
+}
+
+// RequiredPrivileges returns the privilege required to execute a RevokeStatement.
+func (s *RevokeStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: true, Name: "", Privilege: AllPrivileges}}
+}
+
+// RevokeAdminStatement represents a command to revoke admin privilege from a user.
+type RevokeAdminStatement struct {
+	// Who to revoke admin privilege from.
+	User string
+}
+
+// String returns a string representation of the revoke admin statement.
+func (s *RevokeAdminStatement) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("REVOKE ALL PRIVILEGES FROM ")
+	_, _ = buf.WriteString(s.User)
+	return buf.String()
+}
+
+// RequiredPrivileges returns the privilege required to execute a RevokeAdminStatement.
+func (s *RevokeAdminStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: true, Name: "", Privilege: AllPrivileges}}
+}
+
+// CreateRetentionPolicyStatement represents a command to create a retention policy.
+type CreateRetentionPolicyStatement struct {
+	// Name of policy to create.
+	Name string
+
+	// Name of database this policy belongs to.
+	Database string
+
+	// Duration data written to this policy will be retained.
+	Duration time.Duration
+
+	// Replication factor for data written to this policy.
+	Replication int
+
+	// Should this policy be set as default for the database?
+	Default bool
+}
+
+// String returns a string representation of the create retention policy.
+func (s *CreateRetentionPolicyStatement) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("CREATE RETENTION POLICY ")
+	_, _ = buf.WriteString(s.Name)
+	_, _ = buf.WriteString(" ON ")
+	_, _ = buf.WriteString(s.Database)
+	_, _ = buf.WriteString(" DURATION ")
+	_, _ = buf.WriteString(FormatDuration(s.Duration))
+	_, _ = buf.WriteString(" REPLICATION ")
+	_, _ = buf.WriteString(strconv.Itoa(s.Replication))
+	if s.Default {
+		_, _ = buf.WriteString(" DEFAULT")
+	}
+	return buf.String()
+}
+
+// RequiredPrivileges returns the privilege required to execute a CreateRetentionPolicyStatement.
+func (s *CreateRetentionPolicyStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: true, Name: "", Privilege: AllPrivileges}}
+}
+
+// AlterRetentionPolicyStatement represents a command to alter an existing retention policy.
+type AlterRetentionPolicyStatement struct {
+	// Name of policy to alter.
+	Name string
+
+	// Name of the database this policy belongs to.
+	Database string
+
+	// Duration data written to this policy will be retained.
+	Duration *time.Duration
+
+	// Replication factor for data written to this policy.
+	Replication *int
+
+	// Should this policy be set as defalut for the database?
+	Default bool
+}
+
+// String returns a string representation of the alter retention policy statement.
+func (s *AlterRetentionPolicyStatement) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("ALTER RETENTION POLICY ")
+	_, _ = buf.WriteString(s.Name)
+	_, _ = buf.WriteString(" ON ")
+	_, _ = buf.WriteString(s.Database)
+
+	if s.Duration != nil {
+		_, _ = buf.WriteString(" DURATION ")
+		_, _ = buf.WriteString(FormatDuration(*s.Duration))
+	}
+
+	if s.Replication != nil {
+		_, _ = buf.WriteString(" REPLICATION ")
+		_, _ = buf.WriteString(strconv.Itoa(*s.Replication))
+	}
+
+	if s.Default {
+		_, _ = buf.WriteString(" DEFAULT")
+	}
+
+	return buf.String()
+}
+
+// RequiredPrivileges returns the privilege required to execute an AlterRetentionPolicyStatement.
+func (s *AlterRetentionPolicyStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: true, Name: "", Privilege: AllPrivileges}}
+}
+
+type FillOption int
+
+const (
+	// NullFill means that empty aggregate windows will just have null values.
+	NullFill FillOption = iota
+	// NoFill means that empty aggregate windows will be purged from the result.
+	NoFill
+	// NumberFill means that empty aggregate windows will be filled with the given number
+	NumberFill
+	// PreviousFill means that empty aggregate windows will be filled with whatever the previous aggregate window had
+	PreviousFill
+)
+
+// SelectStatement represents a command for extracting data from the database.
+type SelectStatement struct {
+	// Expressions returned from the selection.
+	Fields Fields
+
+	// Target (destination) for the result of the select.
+	Target *Target
+
+	// Expressions used for grouping the selection.
+	Dimensions Dimensions
+
+	// Data sources that fields are extracted from.
+	Sources Sources
+
+	// An expression evaluated on data point.
+	Condition Expr
+
+	// Fields to sort results by
+	SortFields SortFields
+
+	// Maximum number of rows to be returned. Unlimited if zero.
+	Limit int
+
+	// Returns rows starting at an offset from the first row.
+	Offset int
+
+	// Maxiumum number of series to be returned. Unlimited if zero.
+	SLimit int
+
+	// Returns series starting at an offset from the first one.
+	SOffset int
+
+	// memoize the group by interval
+	groupByInterval time.Duration
+
+	// if it's a query for raw data values (i.e. not an aggregate)
+	IsRawQuery bool
+
+	// What fill option the select statement uses, if any
+	Fill FillOption
+
+	// The value to fill empty aggregate buckets with, if any
+	FillValue interface{}
+}
+
+// HasDerivative returns true if one of the function calls in the statement is a
+// derivative aggregate
+func (s *SelectStatement) HasDerivative() bool {
+	for _, f := range s.FunctionCalls() {
+		if strings.HasSuffix(f.Name, "derivative") {
+			return true
+		}
+	}
+	return false
+}
+
+// IsSimpleDerivative return true if one of the function call is a derivative function with a
+// variable ref as the first arg
+func (s *SelectStatement) IsSimpleDerivative() bool {
+	for _, f := range s.FunctionCalls() {
+		if strings.HasSuffix(f.Name, "derivative") {
+			// it's nested if the first argument is an aggregate function
+			if _, ok := f.Args[0].(*VarRef); ok {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+// Clone returns a deep copy of the statement.
+func (s *SelectStatement) Clone() *SelectStatement {
+	clone := &SelectStatement{
+		Fields:     make(Fields, 0, len(s.Fields)),
+		Dimensions: make(Dimensions, 0, len(s.Dimensions)),
+		Sources:    cloneSources(s.Sources),
+		SortFields: make(SortFields, 0, len(s.SortFields)),
+		Condition:  CloneExpr(s.Condition),
+		Limit:      s.Limit,
+		Offset:     s.Offset,
+		SLimit:     s.SLimit,
+		SOffset:    s.SOffset,
+		Fill:       s.Fill,
+		FillValue:  s.FillValue,
+		IsRawQuery: s.IsRawQuery,
+	}
+	if s.Target != nil {
+		clone.Target = &Target{
+			Measurement: &Measurement{
+				Database:        s.Target.Measurement.Database,
+				RetentionPolicy: s.Target.Measurement.RetentionPolicy,
+				Name:            s.Target.Measurement.Name,
+				Regex:           CloneRegexLiteral(s.Target.Measurement.Regex),
+			},
+		}
+	}
+	for _, f := range s.Fields {
+		clone.Fields = append(clone.Fields, &Field{Expr: CloneExpr(f.Expr), Alias: f.Alias})
+	}
+	for _, d := range s.Dimensions {
+		clone.Dimensions = append(clone.Dimensions, &Dimension{Expr: CloneExpr(d.Expr)})
+	}
+	for _, f := range s.SortFields {
+		clone.SortFields = append(clone.SortFields, &SortField{Name: f.Name, Ascending: f.Ascending})
+	}
+	return clone
+}
+
+func cloneSources(sources Sources) Sources {
+	clone := make(Sources, 0, len(sources))
+	for _, s := range sources {
+		clone = append(clone, cloneSource(s))
+	}
+	return clone
+}
+
+func cloneSource(s Source) Source {
+	if s == nil {
+		return nil
+	}
+
+	switch s := s.(type) {
+	case *Measurement:
+		m := &Measurement{Database: s.Database, RetentionPolicy: s.RetentionPolicy, Name: s.Name}
+		if s.Regex != nil {
+			m.Regex = &RegexLiteral{Val: regexp.MustCompile(s.Regex.Val.String())}
+		}
+		return m
+	default:
+		panic("unreachable")
+	}
+}
+
+// RewriteWildcards returns the re-written form of the select statement. Any wildcard query
+// fields are replaced with the supplied fields, and any wildcard GROUP BY fields are replaced
+// with the supplied dimensions.
+func (s *SelectStatement) RewriteWildcards(fields Fields, dimensions Dimensions) *SelectStatement {
+	other := s.Clone()
+	selectWildcard, groupWildcard := false, false
+
+	// Rewrite all wildcard query fields
+	rwFields := make(Fields, 0, len(s.Fields))
+	for _, f := range s.Fields {
+		switch f.Expr.(type) {
+		case *Wildcard:
+			// Sort wildcard fields for consistent output
+			sort.Sort(fields)
+			rwFields = append(rwFields, fields...)
+			selectWildcard = true
+		default:
+			rwFields = append(rwFields, f)
+		}
+	}
+	other.Fields = rwFields
+
+	// Rewrite all wildcard GROUP BY fields
+	rwDimensions := make(Dimensions, 0, len(s.Dimensions))
+	for _, d := range s.Dimensions {
+		switch d.Expr.(type) {
+		case *Wildcard:
+			rwDimensions = append(rwDimensions, dimensions...)
+			groupWildcard = true
+		default:
+			rwDimensions = append(rwDimensions, d)
+		}
+	}
+
+	if selectWildcard && !groupWildcard {
+		rwDimensions = append(rwDimensions, dimensions...)
+	}
+	other.Dimensions = rwDimensions
+
+	return other
+}
+
+// RewriteDistinct rewrites the expression to be a call for map/reduce to work correctly
+// This method assumes all validation has passed
+func (s *SelectStatement) RewriteDistinct() {
+	for i, f := range s.Fields {
+		if d, ok := f.Expr.(*Distinct); ok {
+			s.Fields[i].Expr = d.NewCall()
+			s.IsRawQuery = false
+		}
+	}
+}
+
+// String returns a string representation of the select statement.
+func (s *SelectStatement) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("SELECT ")
+	_, _ = buf.WriteString(s.Fields.String())
+
+	if s.Target != nil {
+		_, _ = buf.WriteString(" ")
+		_, _ = buf.WriteString(s.Target.String())
+	}
+	if len(s.Sources) > 0 {
+		_, _ = buf.WriteString(" FROM ")
+		_, _ = buf.WriteString(s.Sources.String())
+	}
+	if s.Condition != nil {
+		_, _ = buf.WriteString(" WHERE ")
+		_, _ = buf.WriteString(s.Condition.String())
+	}
+	if len(s.Dimensions) > 0 {
+		_, _ = buf.WriteString(" GROUP BY ")
+		_, _ = buf.WriteString(s.Dimensions.String())
+	}
+	switch s.Fill {
+	case NoFill:
+		_, _ = buf.WriteString(" fill(none)")
+	case NumberFill:
+		_, _ = buf.WriteString(fmt.Sprintf(" fill(%v)", s.FillValue))
+	case PreviousFill:
+		_, _ = buf.WriteString(" fill(previous)")
+	}
+	if len(s.SortFields) > 0 {
+		_, _ = buf.WriteString(" ORDER BY ")
+		_, _ = buf.WriteString(s.SortFields.String())
+	}
+	if s.Limit > 0 {
+		_, _ = fmt.Fprintf(&buf, " LIMIT %d", s.Limit)
+	}
+	if s.Offset > 0 {
+		_, _ = buf.WriteString(" OFFSET ")
+		_, _ = buf.WriteString(strconv.Itoa(s.Offset))
+	}
+	if s.SLimit > 0 {
+		_, _ = fmt.Fprintf(&buf, " SLIMIT %d", s.SLimit)
+	}
+	if s.SOffset > 0 {
+		_, _ = fmt.Fprintf(&buf, " SOFFSET %d", s.SOffset)
+	}
+	return buf.String()
+}
+
+// RequiredPrivileges returns the privilege required to execute the SelectStatement.
+func (s *SelectStatement) RequiredPrivileges() ExecutionPrivileges {
+	ep := ExecutionPrivileges{{Admin: false, Name: "", Privilege: ReadPrivilege}}
+
+	if s.Target != nil {
+		p := ExecutionPrivilege{Admin: false, Name: s.Target.Measurement.Database, Privilege: WritePrivilege}
+		ep = append(ep, p)
+	}
+	return ep
+}
+
+// OnlyTimeDimensions returns true if the statement has a where clause with only time constraints
+func (s *SelectStatement) OnlyTimeDimensions() bool {
+	return s.walkForTime(s.Condition)
+}
+
+// walkForTime is called by the OnlyTimeDimensions method to walk the where clause to determine if
+// the only things specified are based on time
+func (s *SelectStatement) walkForTime(node Node) bool {
+	switch n := node.(type) {
+	case *BinaryExpr:
+		if n.Op == AND || n.Op == OR {
+			return s.walkForTime(n.LHS) && s.walkForTime(n.RHS)
+		}
+		if ref, ok := n.LHS.(*VarRef); ok && strings.ToLower(ref.Val) == "time" {
+			return true
+		}
+		return false
+	case *ParenExpr:
+		// walk down the tree
+		return s.walkForTime(n.Expr)
+	default:
+		return false
+	}
+}
+
+// HasWildcard returns whether or not the select statement has at least 1 wildcard
+func (s *SelectStatement) HasWildcard() bool {
+	for _, f := range s.Fields {
+		_, ok := f.Expr.(*Wildcard)
+		if ok {
+			return true
+		}
+	}
+
+	for _, d := range s.Dimensions {
+		_, ok := d.Expr.(*Wildcard)
+		if ok {
+			return true
+		}
+	}
+
+	return false
+}
+
+// hasTimeDimensions returns whether or not the select statement has at least 1
+// where condition with time as the condition
+func (s *SelectStatement) hasTimeDimensions(node Node) bool {
+	switch n := node.(type) {
+	case *BinaryExpr:
+		if n.Op == AND || n.Op == OR {
+			return s.hasTimeDimensions(n.LHS) || s.hasTimeDimensions(n.RHS)
+		}
+		if ref, ok := n.LHS.(*VarRef); ok && strings.ToLower(ref.Val) == "time" {
+			return true
+		}
+		return false
+	case *ParenExpr:
+		// walk down the tree
+		return s.hasTimeDimensions(n.Expr)
+	default:
+		return false
+	}
+}
+
+func (s *SelectStatement) validate(tr targetRequirement) error {
+	if err := s.validateDistinct(); err != nil {
+		return err
+	}
+
+	if err := s.validateCountDistinct(); err != nil {
+		return err
+	}
+
+	if err := s.validateAggregates(tr); err != nil {
+		return err
+	}
+
+	if err := s.validateDerivative(); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func (s *SelectStatement) validateAggregates(tr targetRequirement) error {
+	// First, determine if specific calls have at least one and only one argument
+	for _, f := range s.Fields {
+		if c, ok := f.Expr.(*Call); ok {
+			switch c.Name {
+			case "derivative", "non_negative_derivative":
+				if min, max, got := 1, 2, len(c.Args); got > max || got < min {
+					return fmt.Errorf("invalid number of arguments for %s, expected at least %d but no more than %d, got %d", c.Name, min, max, got)
+				}
+			case "percentile":
+				if exp, got := 2, len(c.Args); got != exp {
+					return fmt.Errorf("invalid number of arguments for %s, expected %d, got %d", c.Name, exp, got)
+				}
+			default:
+				if exp, got := 1, len(c.Args); got != exp {
+					return fmt.Errorf("invalid number of arguments for %s, expected %d, got %d", c.Name, exp, got)
+				}
+			}
+		}
+	}
+
+	// Now, check that we have valid duration and where clauses for aggregates
+
+	// fetch the group by duration
+	groupByDuration, _ := s.GroupByInterval()
+
+	// If we have a group by interval, but no aggregate function, it's an invalid statement
+	if s.IsRawQuery && groupByDuration > 0 {
+		return fmt.Errorf("GROUP BY requires at least one aggregate function")
+	}
+
+	// If we have an aggregate function with a group by time without a where clause, it's an invalid statement
+	if tr == targetNotRequired { // ignore create continuous query statements
+		if !s.IsRawQuery && groupByDuration > 0 && !s.hasTimeDimensions(s.Condition) {
+			return fmt.Errorf("aggregate functions with GROUP BY time require a WHERE time clause")
+		}
+	}
+	return nil
+}
+
+func (s *SelectStatement) HasDistinct() bool {
+	// determine if we have a call named distinct
+	for _, f := range s.Fields {
+		switch c := f.Expr.(type) {
+		case *Call:
+			if c.Name == "distinct" {
+				return true
+			}
+		case *Distinct:
+			return true
+		}
+	}
+	return false
+}
+
+func (s *SelectStatement) validateDistinct() error {
+	if !s.HasDistinct() {
+		return nil
+	}
+
+	if len(s.Fields) > 1 {
+		return fmt.Errorf("aggregate function distinct() can not be combined with other functions or fields")
+	}
+
+	switch c := s.Fields[0].Expr.(type) {
+	case *Call:
+		if len(c.Args) == 0 {
+			return fmt.Errorf("distinct function requires at least one argument")
+		}
+
+		if len(c.Args) != 1 {
+			return fmt.Errorf("distinct function can only have one argument")
+		}
+	}
+	return nil
+}
+
+func (s *SelectStatement) HasCountDistinct() bool {
+	for _, f := range s.Fields {
+		if c, ok := f.Expr.(*Call); ok {
+			if c.Name == "count" {
+				for _, a := range c.Args {
+					if _, ok := a.(*Distinct); ok {
+						return true
+					}
+					if c, ok := a.(*Call); ok {
+						if c.Name == "distinct" {
+							return true
+						}
+					}
+				}
+			}
+		}
+	}
+	return false
+}
+
+func (s *SelectStatement) validateCountDistinct() error {
+	if !s.HasCountDistinct() {
+		return nil
+	}
+
+	valid := func(e Expr) bool {
+		c, ok := e.(*Call)
+		if !ok {
+			return true
+		}
+		if c.Name != "count" {
+			return true
+		}
+		for _, a := range c.Args {
+			if _, ok := a.(*Distinct); ok {
+				return len(c.Args) == 1
+			}
+			if d, ok := a.(*Call); ok {
+				if d.Name == "distinct" {
+					return len(d.Args) == 1
+				}
+			}
+		}
+		return true
+	}
+
+	for _, f := range s.Fields {
+		if !valid(f.Expr) {
+			return fmt.Errorf("count(distinct <field>) can only have one argument")
+		}
+	}
+
+	return nil
+}
+
+func (s *SelectStatement) validateDerivative() error {
+	if !s.HasDerivative() {
+		return nil
+	}
+
+	// If a derivative is requested, it must be the only field in the query. We don't support
+	// multiple fields in combination w/ derivaties yet.
+	if len(s.Fields) != 1 {
+		return fmt.Errorf("derivative cannot be used with other fields")
+	}
+
+	aggr := s.FunctionCalls()
+	if len(aggr) != 1 {
+		return fmt.Errorf("derivative cannot be used with other fields")
+	}
+
+	// Derivative requires two arguments
+	derivativeCall := aggr[0]
+	if len(derivativeCall.Args) == 0 {
+		return fmt.Errorf("derivative requires a field argument")
+	}
+
+	// First arg must be a field or aggr over a field e.g. (mean(field))
+	_, callOk := derivativeCall.Args[0].(*Call)
+	_, varOk := derivativeCall.Args[0].(*VarRef)
+
+	if !(callOk || varOk) {
+		return fmt.Errorf("derivative requires a field argument")
+	}
+
+	// If a duration arg is pased, make sure it's a duration
+	if len(derivativeCall.Args) == 2 {
+		// Second must be a duration .e.g (1h)
+		if _, ok := derivativeCall.Args[1].(*DurationLiteral); !ok {
+			return fmt.Errorf("derivative requires a duration argument")
+		}
+	}
+
+	return nil
+}
+
+// GroupByIterval extracts the time interval, if specified.
+func (s *SelectStatement) GroupByInterval() (time.Duration, error) {
+	// return if we've already pulled it out
+	if s.groupByInterval != 0 {
+		return s.groupByInterval, nil
+	}
+
+	// Ignore if there are no dimensions.
+	if len(s.Dimensions) == 0 {
+		return 0, nil
+	}
+
+	for _, d := range s.Dimensions {
+		if call, ok := d.Expr.(*Call); ok && call.Name == "time" {
+			// Make sure there is exactly one argument.
+			if len(call.Args) != 1 {
+				return 0, errors.New("time dimension expected one argument")
+			}
+
+			// Ensure the argument is a duration.
+			lit, ok := call.Args[0].(*DurationLiteral)
+			if !ok {
+				return 0, errors.New("time dimension must have one duration argument")
+			}
+			s.groupByInterval = lit.Val
+			return lit.Val, nil
+		}
+	}
+	return 0, nil
+}
+
+// SetTimeRange sets the start and end time of the select statement to [start, end). i.e. start inclusive, end exclusive.
+// This is used commonly for continuous queries so the start and end are in buckets.
+func (s *SelectStatement) SetTimeRange(start, end time.Time) error {
+	cond := fmt.Sprintf("time >= '%s' AND time < '%s'", start.UTC().Format(time.RFC3339Nano), end.UTC().Format(time.RFC3339Nano))
+	if s.Condition != nil {
+		cond = fmt.Sprintf("%s AND %s", s.rewriteWithoutTimeDimensions(), cond)
+	}
+
+	expr, err := NewParser(strings.NewReader(cond)).ParseExpr()
+	if err != nil {
+		return err
+	}
+
+	// fold out any previously replaced time dimensios and set the condition
+	s.Condition = Reduce(expr, nil)
+
+	return nil
+}
+
+// rewriteWithoutTimeDimensions will remove any WHERE time... clauses from the select statement
+// This is necessary when setting an explicit time range to override any that previously existed.
+func (s *SelectStatement) rewriteWithoutTimeDimensions() string {
+	n := RewriteFunc(s.Condition, func(n Node) Node {
+		switch n := n.(type) {
+		case *BinaryExpr:
+			if n.LHS.String() == "time" {
+				return &BooleanLiteral{Val: true}
+			}
+			return n
+		case *Call:
+			return &BooleanLiteral{Val: true}
+		default:
+			return n
+		}
+	})
+
+	return n.String()
+}
+
+/*
+
+BinaryExpr
+
+SELECT mean(xxx.value) + avg(yyy.value) FROM xxx JOIN yyy WHERE xxx.host = 123
+
+from xxx where host = 123
+select avg(value) from yyy where host = 123
+
+SELECT xxx.value FROM xxx WHERE xxx.host = 123
+SELECT yyy.value FROM yyy
+
+---
+
+SELECT MEAN(xxx.value) + MEAN(cpu.load.value)
+FROM xxx JOIN yyy
+GROUP BY host
+WHERE (xxx.region == "uswest" OR yyy.region == "uswest") AND xxx.otherfield == "XXX"
+
+select * from (
+	select mean + mean from xxx join yyy
+	group by time(5m), host
+) (xxx.region == "uswest" OR yyy.region == "uswest") AND xxx.otherfield == "XXX"
+
+(seriesIDS for xxx.region = 'uswest' union seriesIDs for yyy.regnion = 'uswest') | seriesIDS xxx.otherfield = 'XXX'
+
+WHERE xxx.region == "uswest" AND xxx.otherfield == "XXX"
+WHERE yyy.region == "uswest"
+
+
+*/
+
+// Substatement returns a single-series statement for a given variable reference.
+func (s *SelectStatement) Substatement(ref *VarRef) (*SelectStatement, error) {
+	// Copy dimensions and properties to new statement.
+	other := &SelectStatement{
+		Fields:     Fields{{Expr: ref}},
+		Dimensions: s.Dimensions,
+		Limit:      s.Limit,
+		Offset:     s.Offset,
+		SortFields: s.SortFields,
+	}
+
+	// If there is only one series source then return it with the whole condition.
+	if len(s.Sources) == 1 {
+		other.Sources = s.Sources
+		other.Condition = s.Condition
+		return other, nil
+	}
+
+	// Find the matching source.
+	name := MatchSource(s.Sources, ref.Val)
+	if name == "" {
+		return nil, fmt.Errorf("field source not found: %s", ref.Val)
+	}
+	other.Sources = append(other.Sources, &Measurement{Name: name})
+
+	// Filter out conditions.
+	if s.Condition != nil {
+		other.Condition = filterExprBySource(name, s.Condition)
+	}
+
+	return other, nil
+}
+
+// NamesInWhere returns the field and tag names (idents) referenced in the where clause
+func (s *SelectStatement) NamesInWhere() []string {
+	var a []string
+	if s.Condition != nil {
+		a = walkNames(s.Condition)
+	}
+	return a
+}
+
+// NamesInSelect returns the field and tag names (idents) in the select clause
+func (s *SelectStatement) NamesInSelect() []string {
+	var a []string
+
+	for _, f := range s.Fields {
+		a = append(a, walkNames(f.Expr)...)
+	}
+
+	return a
+}
+
+// walkNames will walk the Expr and return the database fields
+func walkNames(exp Expr) []string {
+	switch expr := exp.(type) {
+	case *VarRef:
+		return []string{expr.Val}
+	case *Call:
+		if len(expr.Args) == 0 {
+			return nil
+		}
+		lit, ok := expr.Args[0].(*VarRef)
+		if !ok {
+			return nil
+		}
+
+		return []string{lit.Val}
+	case *BinaryExpr:
+		var ret []string
+		ret = append(ret, walkNames(expr.LHS)...)
+		ret = append(ret, walkNames(expr.RHS)...)
+		return ret
+	case *ParenExpr:
+		return walkNames(expr.Expr)
+	}
+
+	return nil
+}
+
+// FunctionCalls returns the Call objects from the query
+func (s *SelectStatement) FunctionCalls() []*Call {
+	var a []*Call
+	for _, f := range s.Fields {
+		a = append(a, walkFunctionCalls(f.Expr)...)
+	}
+	return a
+}
+
+// walkFunctionCalls walks the Field of a query for any function calls made
+func walkFunctionCalls(exp Expr) []*Call {
+	switch expr := exp.(type) {
+	case *VarRef:
+		return nil
+	case *Call:
+		return []*Call{expr}
+	case *BinaryExpr:
+		var ret []*Call
+		ret = append(ret, walkFunctionCalls(expr.LHS)...)
+		ret = append(ret, walkFunctionCalls(expr.RHS)...)
+		return ret
+	case *ParenExpr:
+		return walkFunctionCalls(expr.Expr)
+	}
+
+	return nil
+}
+
+// filters an expression to exclude expressions unrelated to a source.
+func filterExprBySource(name string, expr Expr) Expr {
+	switch expr := expr.(type) {
+	case *VarRef:
+		if !strings.HasPrefix(expr.Val, name) {
+			return nil
+		}
+
+	case *BinaryExpr:
+		lhs := filterExprBySource(name, expr.LHS)
+		rhs := filterExprBySource(name, expr.RHS)
+
+		// If an expr is logical then return either LHS/RHS or both.
+		// If an expr is arithmetic or comparative then require both sides.
+		if expr.Op == AND || expr.Op == OR {
+			if lhs == nil && rhs == nil {
+				return nil
+			} else if lhs != nil && rhs == nil {
+				return lhs
+			} else if lhs == nil && rhs != nil {
+				return rhs
+			}
+		} else {
+			if lhs == nil || rhs == nil {
+				return nil
+			}
+		}
+		return &BinaryExpr{Op: expr.Op, LHS: lhs, RHS: rhs}
+
+	case *ParenExpr:
+		exp := filterExprBySource(name, expr.Expr)
+		if exp == nil {
+			return nil
+		}
+		return &ParenExpr{Expr: exp}
+	}
+	return expr
+}
+
+// MatchSource returns the source name that matches a field name.
+// Returns a blank string if no sources match.
+func MatchSource(sources Sources, name string) string {
+	for _, src := range sources {
+		switch src := src.(type) {
+		case *Measurement:
+			if strings.HasPrefix(name, src.Name) {
+				return src.Name
+			}
+		}
+	}
+	return ""
+}
+
+// Target represents a target (destination) policy, measurement, and DB.
+type Target struct {
+	// Measurement to write into.
+	Measurement *Measurement
+}
+
+// String returns a string representation of the Target.
+func (t *Target) String() string {
+	if t == nil {
+		return ""
+	}
+
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("INTO ")
+	_, _ = buf.WriteString(t.Measurement.String())
+
+	return buf.String()
+}
+
+// DeleteStatement represents a command for removing data from the database.
+type DeleteStatement struct {
+	// Data source that values are removed from.
+	Source Source
+
+	// An expression evaluated on data point.
+	Condition Expr
+}
+
+// String returns a string representation of the delete statement.
+func (s *DeleteStatement) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("DELETE ")
+	_, _ = buf.WriteString(s.Source.String())
+	if s.Condition != nil {
+		_, _ = buf.WriteString(" WHERE ")
+		_, _ = buf.WriteString(s.Condition.String())
+	}
+	return s.String()
+}
+
+// RequiredPrivileges returns the privilege required to execute a DeleteStatement.
+func (s *DeleteStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: false, Name: "", Privilege: WritePrivilege}}
+}
+
+// ShowSeriesStatement represents a command for listing series in the database.
+type ShowSeriesStatement struct {
+	// Measurement(s) the series are listed for.
+	Sources Sources
+
+	// An expression evaluated on a series name or tag.
+	Condition Expr
+
+	// Fields to sort results by
+	SortFields SortFields
+
+	// Maximum number of rows to be returned.
+	// Unlimited if zero.
+	Limit int
+
+	// Returns rows starting at an offset from the first row.
+	Offset int
+}
+
+// String returns a string representation of the list series statement.
+func (s *ShowSeriesStatement) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("SHOW SERIES")
+
+	if s.Sources != nil {
+		_, _ = buf.WriteString(" FROM ")
+		_, _ = buf.WriteString(s.Sources.String())
+	}
+
+	if s.Condition != nil {
+		_, _ = buf.WriteString(" WHERE ")
+		_, _ = buf.WriteString(s.Condition.String())
+	}
+	if len(s.SortFields) > 0 {
+		_, _ = buf.WriteString(" ORDER BY ")
+		_, _ = buf.WriteString(s.SortFields.String())
+	}
+	if s.Limit > 0 {
+		_, _ = buf.WriteString(" LIMIT ")
+		_, _ = buf.WriteString(strconv.Itoa(s.Limit))
+	}
+	if s.Offset > 0 {
+		_, _ = buf.WriteString(" OFFSET ")
+		_, _ = buf.WriteString(strconv.Itoa(s.Offset))
+	}
+	return buf.String()
+}
+
+// RequiredPrivileges returns the privilege required to execute a ShowSeriesStatement.
+func (s *ShowSeriesStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: false, Name: "", Privilege: ReadPrivilege}}
+}
+
+// DropSeriesStatement represents a command for removing a series from the database.
+type DropSeriesStatement struct {
+	// Data source that fields are extracted from (optional)
+	Sources Sources
+
+	// An expression evaluated on data point (optional)
+	Condition Expr
+}
+
+// String returns a string representation of the drop series statement.
+func (s *DropSeriesStatement) String() string {
+	var buf bytes.Buffer
+	buf.WriteString("DROP SERIES")
+
+	if s.Sources != nil {
+		buf.WriteString(" FROM ")
+		buf.WriteString(s.Sources.String())
+	}
+	if s.Condition != nil {
+		buf.WriteString(" WHERE ")
+		buf.WriteString(s.Condition.String())
+	}
+
+	return buf.String()
+}
+
+// RequiredPrivileges returns the privilege required to execute a DropSeriesStatement.
+func (s DropSeriesStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: false, Name: "", Privilege: WritePrivilege}}
+}
+
+// ShowContinuousQueriesStatement represents a command for listing continuous queries.
+type ShowContinuousQueriesStatement struct{}
+
+// String returns a string representation of the list continuous queries statement.
+func (s *ShowContinuousQueriesStatement) String() string { return "SHOW CONTINUOUS QUERIES" }
+
+// RequiredPrivileges returns the privilege required to execute a ShowContinuousQueriesStatement.
+func (s *ShowContinuousQueriesStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: false, Name: "", Privilege: ReadPrivilege}}
+}
+
+// ShowGrantsForUserStatement represents a command for listing user privileges.
+type ShowGrantsForUserStatement struct {
+	// Name of the user to display privileges.
+	Name string
+}
+
+// String returns a string representation of the show grants for user.
+func (s *ShowGrantsForUserStatement) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("SHOW GRANTS FOR ")
+	_, _ = buf.WriteString(s.Name)
+
+	return buf.String()
+}
+
+// RequiredPrivileges returns the privilege required to execute a ShowGrantsForUserStatement
+func (s *ShowGrantsForUserStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: true, Name: "", Privilege: AllPrivileges}}
+}
+
+// ShowServersStatement represents a command for listing all servers.
+type ShowServersStatement struct{}
+
+// String returns a string representation of the show servers command.
+func (s *ShowServersStatement) String() string { return "SHOW SERVERS" }
+
+// RequiredPrivileges returns the privilege required to execute a ShowServersStatement
+func (s *ShowServersStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: true, Name: "", Privilege: AllPrivileges}}
+}
+
+// ShowDatabasesStatement represents a command for listing all databases in the cluster.
+type ShowDatabasesStatement struct{}
+
+// String returns a string representation of the list databases command.
+func (s *ShowDatabasesStatement) String() string { return "SHOW DATABASES" }
+
+// RequiredPrivileges returns the privilege required to execute a ShowDatabasesStatement
+func (s *ShowDatabasesStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: true, Name: "", Privilege: AllPrivileges}}
+}
+
+// CreateContinuousQueryStatement represents a command for creating a continuous query.
+type CreateContinuousQueryStatement struct {
+	// Name of the continuous query to be created.
+	Name string
+
+	// Name of the database to create the continuous query on.
+	Database string
+
+	// Source of data (SELECT statement).
+	Source *SelectStatement
+}
+
+// String returns a string representation of the statement.
+func (s *CreateContinuousQueryStatement) String() string {
+	return fmt.Sprintf("CREATE CONTINUOUS QUERY %s ON %s BEGIN %s END", QuoteIdent(s.Name), QuoteIdent(s.Database), s.Source.String())
+}
+
+// DefaultDatabase returns the default database from the statement.
+func (s *CreateContinuousQueryStatement) DefaultDatabase() string {
+	return s.Database
+}
+
+// RequiredPrivileges returns the privilege required to execute a CreateContinuousQueryStatement.
+func (s *CreateContinuousQueryStatement) RequiredPrivileges() ExecutionPrivileges {
+	ep := ExecutionPrivileges{{Admin: false, Name: s.Database, Privilege: ReadPrivilege}}
+
+	// Selecting into a database that's different from the source?
+	if s.Source.Target.Measurement.Database != "" {
+		// Change source database privilege requirement to read.
+		ep[0].Privilege = ReadPrivilege
+
+		// Add destination database privilege requirement and set it to write.
+		p := ExecutionPrivilege{
+			Admin:     false,
+			Name:      s.Source.Target.Measurement.Database,
+			Privilege: WritePrivilege,
+		}
+		ep = append(ep, p)
+	}
+
+	return ep
+}
+
+// DropContinuousQueryStatement represents a command for removing a continuous query.
+type DropContinuousQueryStatement struct {
+	Name     string
+	Database string
+}
+
+// String returns a string representation of the statement.
+func (s *DropContinuousQueryStatement) String() string {
+	return fmt.Sprintf("DROP CONTINUOUS QUERY %s", s.Name)
+}
+
+// RequiredPrivileges returns the privilege(s) required to execute a DropContinuousQueryStatement
+func (s *DropContinuousQueryStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: false, Name: "", Privilege: WritePrivilege}}
+}
+
+// ShowMeasurementsStatement represents a command for listing measurements.
+type ShowMeasurementsStatement struct {
+	// An expression evaluated on data point.
+	Condition Expr
+
+	// Fields to sort results by
+	SortFields SortFields
+
+	// Maximum number of rows to be returned.
+	// Unlimited if zero.
+	Limit int
+
+	// Returns rows starting at an offset from the first row.
+	Offset int
+}
+
+// String returns a string representation of the statement.
+func (s *ShowMeasurementsStatement) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("SHOW MEASUREMENTS")
+
+	if s.Condition != nil {
+		_, _ = buf.WriteString(" WHERE ")
+		_, _ = buf.WriteString(s.Condition.String())
+	}
+	if len(s.SortFields) > 0 {
+		_, _ = buf.WriteString(" ORDER BY ")
+		_, _ = buf.WriteString(s.SortFields.String())
+	}
+	if s.Limit > 0 {
+		_, _ = buf.WriteString(" LIMIT ")
+		_, _ = buf.WriteString(strconv.Itoa(s.Limit))
+	}
+	if s.Offset > 0 {
+		_, _ = buf.WriteString(" OFFSET ")
+		_, _ = buf.WriteString(strconv.Itoa(s.Offset))
+	}
+	return buf.String()
+}
+
+// RequiredPrivileges returns the privilege(s) required to execute a ShowMeasurementsStatement
+func (s *ShowMeasurementsStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: false, Name: "", Privilege: ReadPrivilege}}
+}
+
+// DropMeasurementStatement represents a command to drop a measurement.
+type DropMeasurementStatement struct {
+	// Name of the measurement to be dropped.
+	Name string
+}
+
+// String returns a string representation of the drop measurement statement.
+func (s *DropMeasurementStatement) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("DROP MEASUREMENT ")
+	_, _ = buf.WriteString(s.Name)
+	return buf.String()
+}
+
+// RequiredPrivileges returns the privilege(s) required to execute a DropMeasurementStatement
+func (s *DropMeasurementStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: true, Name: "", Privilege: AllPrivileges}}
+}
+
+// ShowRetentionPoliciesStatement represents a command for listing retention policies.
+type ShowRetentionPoliciesStatement struct {
+	// Name of the database to list policies for.
+	Database string
+}
+
+// String returns a string representation of a ShowRetentionPoliciesStatement.
+func (s *ShowRetentionPoliciesStatement) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("SHOW RETENTION POLICIES ")
+	_, _ = buf.WriteString(s.Database)
+	return buf.String()
+}
+
+// RequiredPrivileges returns the privilege(s) required to execute a ShowRetentionPoliciesStatement
+func (s *ShowRetentionPoliciesStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: false, Name: "", Privilege: ReadPrivilege}}
+}
+
+// ShowRetentionPoliciesStatement represents a command for displaying stats for a given server.
+type ShowStatsStatement struct {
+	// Hostname or IP of the server for stats.
+	Host string
+}
+
+// String returns a string representation of a ShowStatsStatement.
+func (s *ShowStatsStatement) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("SHOW STATS ")
+	if s.Host != "" {
+		_, _ = buf.WriteString(s.Host)
+	}
+	return buf.String()
+}
+
+// RequiredPrivileges returns the privilege(s) required to execute a ShowStatsStatement
+func (s *ShowStatsStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: true, Name: "", Privilege: AllPrivileges}}
+}
+
+// ShowDiagnosticsStatement represents a command for show node diagnostics.
+type ShowDiagnosticsStatement struct{}
+
+// String returns a string representation of the ShowDiagnosticsStatement.
+func (s *ShowDiagnosticsStatement) String() string { return "SHOW DIAGNOSTICS" }
+
+// RequiredPrivileges returns the privilege required to execute a ShowDiagnosticsStatement
+func (s *ShowDiagnosticsStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: true, Name: "", Privilege: AllPrivileges}}
+}
+
+// ShowTagKeysStatement represents a command for listing tag keys.
+type ShowTagKeysStatement struct {
+	// Data sources that fields are extracted from.
+	Sources Sources
+
+	// An expression evaluated on data point.
+	Condition Expr
+
+	// Fields to sort results by
+	SortFields SortFields
+
+	// Maximum number of rows to be returned.
+	// Unlimited if zero.
+	Limit int
+
+	// Returns rows starting at an offset from the first row.
+	Offset int
+}
+
+// String returns a string representation of the statement.
+func (s *ShowTagKeysStatement) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("SHOW TAG KEYS")
+
+	if s.Sources != nil {
+		_, _ = buf.WriteString(" FROM ")
+		_, _ = buf.WriteString(s.Sources.String())
+	}
+	if s.Condition != nil {
+		_, _ = buf.WriteString(" WHERE ")
+		_, _ = buf.WriteString(s.Condition.String())
+	}
+	if len(s.SortFields) > 0 {
+		_, _ = buf.WriteString(" ORDER BY ")
+		_, _ = buf.WriteString(s.SortFields.String())
+	}
+	if s.Limit > 0 {
+		_, _ = buf.WriteString(" LIMIT ")
+		_, _ = buf.WriteString(strconv.Itoa(s.Limit))
+	}
+	if s.Offset > 0 {
+		_, _ = buf.WriteString(" OFFSET ")
+		_, _ = buf.WriteString(strconv.Itoa(s.Offset))
+	}
+	return buf.String()
+}
+
+// RequiredPrivileges returns the privilege(s) required to execute a ShowTagKeysStatement
+func (s *ShowTagKeysStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: false, Name: "", Privilege: ReadPrivilege}}
+}
+
+// ShowTagValuesStatement represents a command for listing tag values.
+type ShowTagValuesStatement struct {
+	// Data source that fields are extracted from.
+	Sources Sources
+
+	// Tag key(s) to pull values from.
+	TagKeys []string
+
+	// An expression evaluated on data point.
+	Condition Expr
+
+	// Fields to sort results by
+	SortFields SortFields
+
+	// Maximum number of rows to be returned.
+	// Unlimited if zero.
+	Limit int
+
+	// Returns rows starting at an offset from the first row.
+	Offset int
+}
+
+// String returns a string representation of the statement.
+func (s *ShowTagValuesStatement) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("SHOW TAG VALUES")
+
+	if s.Sources != nil {
+		_, _ = buf.WriteString(" FROM ")
+		_, _ = buf.WriteString(s.Sources.String())
+	}
+	if s.Condition != nil {
+		_, _ = buf.WriteString(" WHERE ")
+		_, _ = buf.WriteString(s.Condition.String())
+	}
+	if len(s.SortFields) > 0 {
+		_, _ = buf.WriteString(" ORDER BY ")
+		_, _ = buf.WriteString(s.SortFields.String())
+	}
+	if s.Limit > 0 {
+		_, _ = buf.WriteString(" LIMIT ")
+		_, _ = buf.WriteString(strconv.Itoa(s.Limit))
+	}
+	if s.Offset > 0 {
+		_, _ = buf.WriteString(" OFFSET ")
+		_, _ = buf.WriteString(strconv.Itoa(s.Offset))
+	}
+	return buf.String()
+}
+
+// RequiredPrivileges returns the privilege(s) required to execute a ShowTagValuesStatement
+func (s *ShowTagValuesStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: false, Name: "", Privilege: ReadPrivilege}}
+}
+
+// ShowUsersStatement represents a command for listing users.
+type ShowUsersStatement struct{}
+
+// String returns a string representation of the ShowUsersStatement.
+func (s *ShowUsersStatement) String() string {
+	return "SHOW USERS"
+}
+
+// RequiredPrivileges returns the privilege(s) required to execute a ShowUsersStatement
+func (s *ShowUsersStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: true, Name: "", Privilege: AllPrivileges}}
+}
+
+// ShowFieldKeysStatement represents a command for listing field keys.
+type ShowFieldKeysStatement struct {
+	// Data sources that fields are extracted from.
+	Sources Sources
+
+	// Fields to sort results by
+	SortFields SortFields
+
+	// Maximum number of rows to be returned.
+	// Unlimited if zero.
+	Limit int
+
+	// Returns rows starting at an offset from the first row.
+	Offset int
+}
+
+// String returns a string representation of the statement.
+func (s *ShowFieldKeysStatement) String() string {
+	var buf bytes.Buffer
+	_, _ = buf.WriteString("SHOW FIELD KEYS")
+
+	if s.Sources != nil {
+		_, _ = buf.WriteString(" FROM ")
+		_, _ = buf.WriteString(s.Sources.String())
+	}
+	if len(s.SortFields) > 0 {
+		_, _ = buf.WriteString(" ORDER BY ")
+		_, _ = buf.WriteString(s.SortFields.String())
+	}
+	if s.Limit > 0 {
+		_, _ = buf.WriteString(" LIMIT ")
+		_, _ = buf.WriteString(strconv.Itoa(s.Limit))
+	}
+	if s.Offset > 0 {
+		_, _ = buf.WriteString(" OFFSET ")
+		_, _ = buf.WriteString(strconv.Itoa(s.Offset))
+	}
+	return buf.String()
+}
+
+// RequiredPrivileges returns the privilege(s) required to execute a ShowFieldKeysStatement
+func (s *ShowFieldKeysStatement) RequiredPrivileges() ExecutionPrivileges {
+	return ExecutionPrivileges{{Admin: false, Name: "", Privilege: ReadPrivilege}}
+}
+
+// Fields represents a list of fields.
+type Fields []*Field
+
+// String returns a string representation of the fields.
+func (a Fields) String() string {
+	var str []string
+	for _, f := range a {
+		str = append(str, f.String())
+	}
+	return strings.Join(str, ", ")
+}
+
+// Field represents an expression retrieved from a select statement.
+type Field struct {
+	Expr  Expr
+	Alias string
+}
+
+// Name returns the name of the field. Returns alias, if set.
+// Otherwise uses the function name or variable name.
+func (f *Field) Name() string {
+	// Return alias, if set.
+	if f.Alias != "" {
+		return f.Alias
+	}
+
+	// Return the function name or variable name, if available.
+	switch expr := f.Expr.(type) {
+	case *Call:
+		return expr.Name
+	case *VarRef:
+		return expr.Val
+	}
+
+	// Otherwise return a blank name.
+	return ""
+}
+
+// String returns a string representation of the field.
+func (f *Field) String() string {
+	str := f.Expr.String()
+
+	switch f.Expr.(type) {
+	case *VarRef:
+		quoted := false
+		// Escape any double-quotes in the field
+		if strings.Contains(str, `"`) {
+			str = strings.Replace(str, `"`, `\"`, -1)
+			quoted = true
+		}
+
+		// Escape any single-quotes in the field
+		if strings.Contains(str, `'`) {
+			quoted = true
+		}
+
+		// Double-quote field names with spaces or that were previously escaped
+		if strings.Contains(str, " ") || quoted {
+			str = fmt.Sprintf("\"%s\"", str)
+		}
+	}
+
+	if f.Alias == "" {
+		return str
+	}
+	return fmt.Sprintf("%s AS %s", str, fmt.Sprintf(`"%s"`, f.Alias))
+}
+
+// Sort Interface for Fields
+func (f Fields) Len() int           { return len(f) }
+func (f Fields) Less(i, j int) bool { return f[i].Name() < f[j].Name() }
+func (f Fields) Swap(i, j int)      { f[i], f[j] = f[j], f[i] }
+
+// Dimensions represents a list of dimensions.
+type Dimensions []*Dimension
+
+// String returns a string representation of the dimensions.
+func (a Dimensions) String() string {
+	var str []string
+	for _, d := range a {
+		str = append(str, d.String())
+	}
+	return strings.Join(str, ", ")
+}
+
+// Normalize returns the interval and tag dimensions separately.
+// Returns 0 if no time interval is specified.
+// Returns an error if multiple time dimensions exist or if non-VarRef dimensions are specified.
+func (a Dimensions) Normalize() (time.Duration, []string, error) {
+	var dur time.Duration
+	var tags []string
+
+	for _, dim := range a {
+		switch expr := dim.Expr.(type) {
+		case *Call:
+			// Ensure the call is time() and it only has one duration argument.
+			// If we already have a duration
+			if expr.Name != "time" {
+				return 0, nil, errors.New("only time() calls allowed in dimensions")
+			} else if len(expr.Args) != 1 {
+				return 0, nil, errors.New("time dimension expected one argument")
+			} else if lit, ok := expr.Args[0].(*DurationLiteral); !ok {
+				return 0, nil, errors.New("time dimension must have one duration argument")
+			} else if dur != 0 {
+				return 0, nil, errors.New("multiple time dimensions not allowed")
+			} else {
+				dur = lit.Val
+			}
+
+		case *VarRef:
+			tags = append(tags, expr.Val)
+
+		default:
+			return 0, nil, errors.New("only time and tag dimensions allowed")
+		}
+	}
+
+	return dur, tags, nil
+}
+
+// Dimension represents an expression that a select statement is grouped by.
+type Dimension struct {
+	Expr Expr
+}
+
+// String returns a string representation of the dimension.
+func (d *Dimension) String() string { return d.Expr.String() }
+
+// Measurements represents a list of measurements.
+type Measurements []*Measurement
+
+// String returns a string representation of the measurements.
+func (a Measurements) String() string {
+	var str []string
+	for _, m := range a {
+		str = append(str, m.String())
+	}
+	return strings.Join(str, ", ")
+}
+
+// Measurement represents a single measurement used as a datasource.
+type Measurement struct {
+	Database        string
+	RetentionPolicy string
+	Name            string
+	Regex           *RegexLiteral
+}
+
+// String returns a string representation of the measurement.
+func (m *Measurement) String() string {
+	var buf bytes.Buffer
+	if m.Database != "" {
+		_, _ = buf.WriteString(`"`)
+		_, _ = buf.WriteString(m.Database)
+		_, _ = buf.WriteString(`".`)
+	}
+
+	if m.RetentionPolicy != "" {
+		_, _ = buf.WriteString(`"`)
+		_, _ = buf.WriteString(m.RetentionPolicy)
+		_, _ = buf.WriteString(`"`)
+	}
+
+	if m.Database != "" || m.RetentionPolicy != "" {
+		_, _ = buf.WriteString(`.`)
+	}
+
+	if m.Name != "" {
+		_, _ = buf.WriteString(QuoteIdent(m.Name))
+	} else if m.Regex != nil {
+		_, _ = buf.WriteString(m.Regex.String())
+	}
+
+	return buf.String()
+}
+
+// VarRef represents a reference to a variable.
+type VarRef struct {
+	Val string
+}
+
+// String returns a string representation of the variable reference.
+func (r *VarRef) String() string { return r.Val }
+
+// Call represents a function call.
+type Call struct {
+	Name string
+	Args []Expr
+}
+
+// String returns a string representation of the call.
+func (c *Call) String() string {
+	// Join arguments.
+	var str []string
+	for _, arg := range c.Args {
+		str = append(str, arg.String())
+	}
+
+	// Write function name and args.
+	return fmt.Sprintf("%s(%s)", c.Name, strings.Join(str, ", "))
+}
+
+// Distinct represents a DISTINCT expression.
+type Distinct struct {
+	// Identifier following DISTINCT
+	Val string
+}
+
+// String returns a string representation of the expression.
+func (d *Distinct) String() string {
+	return fmt.Sprintf("DISTINCT %s", d.Val)
+}
+
+// NewCall returns a new call expression from this expressions.
+func (d *Distinct) NewCall() *Call {
+	return &Call{
+		Name: "distinct",
+		Args: []Expr{
+			&VarRef{Val: d.Val},
+		},
+	}
+}
+
+// NumberLiteral represents a numeric literal.
+type NumberLiteral struct {
+	Val float64
+}
+
+// String returns a string representation of the literal.
+func (l *NumberLiteral) String() string { return strconv.FormatFloat(l.Val, 'f', 3, 64) }
+
+// BooleanLiteral represents a boolean literal.
+type BooleanLiteral struct {
+	Val bool
+}
+
+// String returns a string representation of the literal.
+func (l *BooleanLiteral) String() string {
+	if l.Val {
+		return "true"
+	}
+	return "false"
+}
+
+// isTrueLiteral returns true if the expression is a literal "true" value.
+func isTrueLiteral(expr Expr) bool {
+	if expr, ok := expr.(*BooleanLiteral); ok {
+		return expr.Val == true
+	}
+	return false
+}
+
+// isFalseLiteral returns true if the expression is a literal "false" value.
+func isFalseLiteral(expr Expr) bool {
+	if expr, ok := expr.(*BooleanLiteral); ok {
+		return expr.Val == false
+	}
+	return false
+}
+
+// StringLiteral represents a string literal.
+type StringLiteral struct {
+	Val string
+}
+
+// String returns a string representation of the literal.
+func (l *StringLiteral) String() string { return QuoteString(l.Val) }
+
+// TimeLiteral represents a point-in-time literal.
+type TimeLiteral struct {
+	Val time.Time
+}
+
+// String returns a string representation of the literal.
+func (l *TimeLiteral) String() string {
+	return `'` + l.Val.UTC().Format(time.RFC3339Nano) + `'`
+}
+
+// DurationLiteral represents a duration literal.
+type DurationLiteral struct {
+	Val time.Duration
+}
+
+// String returns a string representation of the literal.
+func (l *DurationLiteral) String() string { return FormatDuration(l.Val) }
+
+// nilLiteral represents a nil literal.
+// This is not available to the query language itself. It's only used internally.
+type nilLiteral struct{}
+
+// String returns a string representation of the literal.
+func (l *nilLiteral) String() string { return `nil` }
+
+// BinaryExpr represents an operation between two expressions.
+type BinaryExpr struct {
+	Op  Token
+	LHS Expr
+	RHS Expr
+}
+
+// String returns a string representation of the binary expression.
+func (e *BinaryExpr) String() string {
+	return fmt.Sprintf("%s %s %s", e.LHS.String(), e.Op.String(), e.RHS.String())
+}
+
+// ParenExpr represents a parenthesized expression.
+type ParenExpr struct {
+	Expr Expr
+}
+
+// String returns a string representation of the parenthesized expression.
+func (e *ParenExpr) String() string { return fmt.Sprintf("(%s)", e.Expr.String()) }
+
+// RegexLiteral represents a regular expression.
+type RegexLiteral struct {
+	Val *regexp.Regexp
+}
+
+// String returns a string representation of the literal.
+func (r *RegexLiteral) String() string {
+	if r.Val != nil {
+		return fmt.Sprintf("/%s/", r.Val.String())
+	}
+	return ""
+}
+
+// CloneRegexLiteral returns a clone of the RegexLiteral.
+func CloneRegexLiteral(r *RegexLiteral) *RegexLiteral {
+	if r == nil {
+		return nil
+	}
+
+	clone := &RegexLiteral{}
+	if r.Val != nil {
+		clone.Val = regexp.MustCompile(r.Val.String())
+	}
+
+	return clone
+}
+
+// Wildcard represents a wild card expression.
+type Wildcard struct{}
+
+// String returns a string representation of the wildcard.
+func (e *Wildcard) String() string { return "*" }
+
+// CloneExpr returns a deep copy of the expression.
+func CloneExpr(expr Expr) Expr {
+	if expr == nil {
+		return nil
+	}
+	switch expr := expr.(type) {
+	case *BinaryExpr:
+		return &BinaryExpr{Op: expr.Op, LHS: CloneExpr(expr.LHS), RHS: CloneExpr(expr.RHS)}
+	case *BooleanLiteral:
+		return &BooleanLiteral{Val: expr.Val}
+	case *Call:
+		args := make([]Expr, len(expr.Args))
+		for i, arg := range expr.Args {
+			args[i] = CloneExpr(arg)
+		}
+		return &Call{Name: expr.Name, Args: args}
+	case *Distinct:
+		return &Distinct{Val: expr.Val}
+	case *DurationLiteral:
+		return &DurationLiteral{Val: expr.Val}
+	case *NumberLiteral:
+		return &NumberLiteral{Val: expr.Val}
+	case *ParenExpr:
+		return &ParenExpr{Expr: CloneExpr(expr.Expr)}
+	case *RegexLiteral:
+		return &RegexLiteral{Val: expr.Val}
+	case *StringLiteral:
+		return &StringLiteral{Val: expr.Val}
+	case *TimeLiteral:
+		return &TimeLiteral{Val: expr.Val}
+	case *VarRef:
+		return &VarRef{Val: expr.Val}
+	case *Wildcard:
+		return &Wildcard{}
+	}
+	panic("unreachable")
+}
+
+// TimeRange returns the minimum and maximum times specified by an expression.
+// Returns zero times if there is no bound.
+func TimeRange(expr Expr) (min, max time.Time) {
+	WalkFunc(expr, func(n Node) {
+		if n, ok := n.(*BinaryExpr); ok {
+			// Extract literal expression & operator on LHS.
+			// Check for "time" on the left-hand side first.
+			// Otherwise check for for the right-hand side and flip the operator.
+			value, op := timeExprValue(n.LHS, n.RHS), n.Op
+			if value.IsZero() {
+				if value = timeExprValue(n.RHS, n.LHS); value.IsZero() {
+					return
+				} else if op == LT {
+					op = GT
+				} else if op == LTE {
+					op = GTE
+				} else if op == GT {
+					op = LT
+				} else if op == GTE {
+					op = LTE
+				}
+			}
+
+			// Update the min/max depending on the operator.
+			// The GT & LT update the value by +/- 1ns not make them "not equal".
+			switch op {
+			case GT:
+				if min.IsZero() || value.After(min) {
+					min = value.Add(time.Nanosecond)
+				}
+			case GTE:
+				if min.IsZero() || value.After(min) {
+					min = value
+				}
+			case LT:
+				if max.IsZero() || value.Before(max) {
+					max = value.Add(-time.Nanosecond)
+				}
+			case LTE:
+				if max.IsZero() || value.Before(max) {
+					max = value
+				}
+			case EQ:
+				if min.IsZero() || value.After(min) {
+					min = value
+				}
+				if max.IsZero() || value.Before(max) {
+					max = value
+				}
+			}
+		}
+	})
+	return
+}
+
+// TimeRange returns the minimum and maximum times, as epoch nano, specified by
+// and expression. If there is no lower bound, the start of the epoch is returned
+// for minimum. If there is no higher bound, now is returned for maximum.
+func TimeRangeAsEpochNano(expr Expr) (min, max int64) {
+	tmin, tmax := TimeRange(expr)
+	if tmin.IsZero() {
+		min = time.Unix(0, 0).UnixNano()
+	} else {
+		min = tmin.UnixNano()
+	}
+	if tmax.IsZero() {
+		max = time.Now().UnixNano()
+	} else {
+		max = tmax.UnixNano()
+	}
+	return
+}
+
+// timeExprValue returns the time literal value of a "time == <TimeLiteral>" expression.
+// Returns zero time if the expression is not a time expression.
+func timeExprValue(ref Expr, lit Expr) time.Time {
+	if ref, ok := ref.(*VarRef); ok && strings.ToLower(ref.Val) == "time" {
+		switch lit := lit.(type) {
+		case *TimeLiteral:
+			return lit.Val
+		case *DurationLiteral:
+			return time.Unix(0, int64(lit.Val)).UTC()
+		}
+	}
+	return time.Time{}
+}
+
+// Visitor can be called by Walk to traverse an AST hierarchy.
+// The Visit() function is called once per node.
+type Visitor interface {
+	Visit(Node) Visitor
+}
+
+// Walk traverses a node hierarchy in depth-first order.
+func Walk(v Visitor, node Node) {
+	if node == nil {
+		return
+	}
+
+	if v = v.Visit(node); v == nil {
+		return
+	}
+
+	switch n := node.(type) {
+	case *BinaryExpr:
+		Walk(v, n.LHS)
+		Walk(v, n.RHS)
+
+	case *Call:
+		for _, expr := range n.Args {
+			Walk(v, expr)
+		}
+
+	case *CreateContinuousQueryStatement:
+		Walk(v, n.Source)
+
+	case *Dimension:
+		Walk(v, n.Expr)
+
+	case Dimensions:
+		for _, c := range n {
+			Walk(v, c)
+		}
+
+	case *Field:
+		Walk(v, n.Expr)
+
+	case Fields:
+		for _, c := range n {
+			Walk(v, c)
+		}
+
+	case *ParenExpr:
+		Walk(v, n.Expr)
+
+	case *Query:
+		Walk(v, n.Statements)
+
+	case *SelectStatement:
+		Walk(v, n.Fields)
+		Walk(v, n.Target)
+		Walk(v, n.Dimensions)
+		Walk(v, n.Sources)
+		Walk(v, n.Condition)
+		Walk(v, n.SortFields)
+
+	case *ShowSeriesStatement:
+		Walk(v, n.Sources)
+		Walk(v, n.Condition)
+
+	case *ShowTagKeysStatement:
+		Walk(v, n.Sources)
+		Walk(v, n.Condition)
+		Walk(v, n.SortFields)
+
+	case *ShowTagValuesStatement:
+		Walk(v, n.Sources)
+		Walk(v, n.Condition)
+		Walk(v, n.SortFields)
+
+	case *ShowFieldKeysStatement:
+		Walk(v, n.Sources)
+		Walk(v, n.SortFields)
+
+	case SortFields:
+		for _, sf := range n {
+			Walk(v, sf)
+		}
+
+	case Sources:
+		for _, s := range n {
+			Walk(v, s)
+		}
+
+	case Statements:
+		for _, s := range n {
+			Walk(v, s)
+		}
+
+	case *Target:
+		if n != nil {
+			Walk(v, n.Measurement)
+		}
+	}
+}
+
+// WalkFunc traverses a node hierarchy in depth-first order.
+func WalkFunc(node Node, fn func(Node)) {
+	Walk(walkFuncVisitor(fn), node)
+}
+
+type walkFuncVisitor func(Node)
+
+func (fn walkFuncVisitor) Visit(n Node) Visitor { fn(n); return fn }
+
+// Rewriter can be called by Rewrite to replace nodes in the AST hierarchy.
+// The Rewrite() function is called once per node.
+type Rewriter interface {
+	Rewrite(Node) Node
+}
+
+// Rewrite recursively invokes the rewriter to replace each node.
+// Nodes are traversed depth-first and rewritten from leaf to root.
+func Rewrite(r Rewriter, node Node) Node {
+	switch n := node.(type) {
+	case *Query:
+		n.Statements = Rewrite(r, n.Statements).(Statements)
+
+	case Statements:
+		for i, s := range n {
+			n[i] = Rewrite(r, s).(Statement)
+		}
+
+	case *SelectStatement:
+		n.Fields = Rewrite(r, n.Fields).(Fields)
+		n.Dimensions = Rewrite(r, n.Dimensions).(Dimensions)
+		n.Sources = Rewrite(r, n.Sources).(Sources)
+		n.Condition = Rewrite(r, n.Condition).(Expr)
+
+	case Fields:
+		for i, f := range n {
+			n[i] = Rewrite(r, f).(*Field)
+		}
+
+	case *Field:
+		n.Expr = Rewrite(r, n.Expr).(Expr)
+
+	case Dimensions:
+		for i, d := range n {
+			n[i] = Rewrite(r, d).(*Dimension)
+		}
+
+	case *Dimension:
+		n.Expr = Rewrite(r, n.Expr).(Expr)
+
+	case *BinaryExpr:
+		n.LHS = Rewrite(r, n.LHS).(Expr)
+		n.RHS = Rewrite(r, n.RHS).(Expr)
+
+	case *ParenExpr:
+		n.Expr = Rewrite(r, n.Expr).(Expr)
+
+	case *Call:
+		for i, expr := range n.Args {
+			n.Args[i] = Rewrite(r, expr).(Expr)
+		}
+	}
+
+	return r.Rewrite(node)
+}
+
+// RewriteFunc rewrites a node hierarchy.
+func RewriteFunc(node Node, fn func(Node) Node) Node {
+	return Rewrite(rewriterFunc(fn), node)
+}
+
+type rewriterFunc func(Node) Node
+
+func (fn rewriterFunc) Rewrite(n Node) Node { return fn(n) }
+
+// Eval evaluates expr against a map.
+func Eval(expr Expr, m map[string]interface{}) interface{} {
+	if expr == nil {
+		return nil
+	}
+
+	switch expr := expr.(type) {
+	case *BinaryExpr:
+		return evalBinaryExpr(expr, m)
+	case *BooleanLiteral:
+		return expr.Val
+	case *NumberLiteral:
+		return expr.Val
+	case *ParenExpr:
+		return Eval(expr.Expr, m)
+	case *StringLiteral:
+		return expr.Val
+	case *VarRef:
+		return m[expr.Val]
+	default:
+		return nil
+	}
+}
+
+func evalBinaryExpr(expr *BinaryExpr, m map[string]interface{}) interface{} {
+	lhs := Eval(expr.LHS, m)
+	rhs := Eval(expr.RHS, m)
+
+	// Evaluate if both sides are simple types.
+	switch lhs := lhs.(type) {
+	case bool:
+		rhs, _ := rhs.(bool)
+		switch expr.Op {
+		case AND:
+			return lhs && rhs
+		case OR:
+			return lhs || rhs
+		case EQ:
+			return lhs == rhs
+		case NEQ:
+			return lhs != rhs
+		}
+	case float64:
+		rhs, _ := rhs.(float64)
+		switch expr.Op {
+		case EQ:
+			return lhs == rhs
+		case NEQ:
+			return lhs != rhs
+		case LT:
+			return lhs < rhs
+		case LTE:
+			return lhs <= rhs
+		case GT:
+			return lhs > rhs
+		case GTE:
+			return lhs >= rhs
+		case ADD:
+			return lhs + rhs
+		case SUB:
+			return lhs - rhs
+		case MUL:
+			return lhs * rhs
+		case DIV:
+			if rhs == 0 {
+				return float64(0)
+			}
+			return lhs / rhs
+		}
+	case int64:
+		// we parse all number literals as float 64, so we have to convert from
+		// an interface to the float64, then cast to an int64 for comparison
+		rhsf, _ := rhs.(float64)
+		rhs := int64(rhsf)
+		switch expr.Op {
+		case EQ:
+			return lhs == rhs
+		case NEQ:
+			return lhs != rhs
+		case LT:
+			return lhs < rhs
+		case LTE:
+			return lhs <= rhs
+		case GT:
+			return lhs > rhs
+		case GTE:
+			return lhs >= rhs
+		case ADD:
+			return lhs + rhs
+		case SUB:
+			return lhs - rhs
+		case MUL:
+			return lhs * rhs
+		case DIV:
+			if rhs == 0 {
+				return int64(0)
+			}
+			return lhs / rhs
+		}
+	case string:
+		rhs, _ := rhs.(string)
+		switch expr.Op {
+		case EQ:
+			return lhs == rhs
+		case NEQ:
+			return lhs != rhs
+		}
+	}
+	return nil
+}
+
+// Reduce evaluates expr using the available values in valuer.
+// References that don't exist in valuer are ignored.
+func Reduce(expr Expr, valuer Valuer) Expr {
+	expr = reduce(expr, valuer)
+
+	// Unwrap parens at top level.
+	if expr, ok := expr.(*ParenExpr); ok {
+		return expr.Expr
+	}
+	return expr
+}
+
+func reduce(expr Expr, valuer Valuer) Expr {
+	if expr == nil {
+		return nil
+	}
+
+	switch expr := expr.(type) {
+	case *BinaryExpr:
+		return reduceBinaryExpr(expr, valuer)
+	case *Call:
+		return reduceCall(expr, valuer)
+	case *ParenExpr:
+		return reduceParenExpr(expr, valuer)
+	case *VarRef:
+		return reduceVarRef(expr, valuer)
+	default:
+		return CloneExpr(expr)
+	}
+}
+
+func reduceBinaryExpr(expr *BinaryExpr, valuer Valuer) Expr {
+	// Reduce both sides first.
+	op := expr.Op
+	lhs := reduce(expr.LHS, valuer)
+	rhs := reduce(expr.RHS, valuer)
+
+	// Do not evaluate if one side is nil.
+	if lhs == nil || rhs == nil {
+		return &BinaryExpr{LHS: lhs, RHS: rhs, Op: expr.Op}
+	}
+
+	// If we have a logical operator (AND, OR) and one side is a boolean literal
+	// then we need to have special handling.
+	if op == AND {
+		if isFalseLiteral(lhs) || isFalseLiteral(rhs) {
+			return &BooleanLiteral{Val: false}
+		} else if isTrueLiteral(lhs) {
+			return rhs
+		} else if isTrueLiteral(rhs) {
+			return lhs
+		}
+	} else if op == OR {
+		if isTrueLiteral(lhs) || isTrueLiteral(rhs) {
+			return &BooleanLiteral{Val: true}
+		} else if isFalseLiteral(lhs) {
+			return rhs
+		} else if isFalseLiteral(rhs) {
+			return lhs
+		}
+	}
+
+	// Evaluate if both sides are simple types.
+	switch lhs := lhs.(type) {
+	case *BooleanLiteral:
+		return reduceBinaryExprBooleanLHS(op, lhs, rhs)
+	case *DurationLiteral:
+		return reduceBinaryExprDurationLHS(op, lhs, rhs)
+	case *nilLiteral:
+		return reduceBinaryExprNilLHS(op, lhs, rhs)
+	case *NumberLiteral:
+		return reduceBinaryExprNumberLHS(op, lhs, rhs)
+	case *StringLiteral:
+		return reduceBinaryExprStringLHS(op, lhs, rhs)
+	case *TimeLiteral:
+		return reduceBinaryExprTimeLHS(op, lhs, rhs)
+	default:
+		return &BinaryExpr{Op: op, LHS: lhs, RHS: rhs}
+	}
+}
+
+func reduceBinaryExprBooleanLHS(op Token, lhs *BooleanLiteral, rhs Expr) Expr {
+	switch rhs := rhs.(type) {
+	case *BooleanLiteral:
+		switch op {
+		case EQ:
+			return &BooleanLiteral{Val: lhs.Val == rhs.Val}
+		case NEQ:
+			return &BooleanLiteral{Val: lhs.Val != rhs.Val}
+		case AND:
+			return &BooleanLiteral{Val: lhs.Val && rhs.Val}
+		case OR:
+			return &BooleanLiteral{Val: lhs.Val || rhs.Val}
+		}
+	case *nilLiteral:
+		return &BooleanLiteral{Val: false}
+	}
+	return &BinaryExpr{Op: op, LHS: lhs, RHS: rhs}
+}
+
+func reduceBinaryExprDurationLHS(op Token, lhs *DurationLiteral, rhs Expr) Expr {
+	switch rhs := rhs.(type) {
+	case *DurationLiteral:
+		switch op {
+		case ADD:
+			return &DurationLiteral{Val: lhs.Val + rhs.Val}
+		case SUB:
+			return &DurationLiteral{Val: lhs.Val - rhs.Val}
+		case EQ:
+			return &BooleanLiteral{Val: lhs.Val == rhs.Val}
+		case NEQ:
+			return &BooleanLiteral{Val: lhs.Val != rhs.Val}
+		case GT:
+			return &BooleanLiteral{Val: lhs.Val > rhs.Val}
+		case GTE:
+			return &BooleanLiteral{Val: lhs.Val >= rhs.Val}
+		case LT:
+			return &BooleanLiteral{Val: lhs.Val < rhs.Val}
+		case LTE:
+			return &BooleanLiteral{Val: lhs.Val <= rhs.Val}
+		}
+	case *NumberLiteral:
+		switch op {
+		case MUL:
+			return &DurationLiteral{Val: lhs.Val * time.Duration(rhs.Val)}
+		case DIV:
+			if rhs.Val == 0 {
+				return &DurationLiteral{Val: 0}
+			}
+			return &DurationLiteral{Val: lhs.Val / time.Duration(rhs.Val)}
+		}
+	case *TimeLiteral:
+		switch op {
+		case ADD:
+			return &TimeLiteral{Val: rhs.Val.Add(lhs.Val)}
+		}
+	case *nilLiteral:
+		return &BooleanLiteral{Val: false}
+	}
+	return &BinaryExpr{Op: op, LHS: lhs, RHS: rhs}
+}
+
+func reduceBinaryExprNilLHS(op Token, lhs *nilLiteral, rhs Expr) Expr {
+	switch op {
+	case EQ, NEQ:
+		return &BooleanLiteral{Val: false}
+	}
+	return &BinaryExpr{Op: op, LHS: lhs, RHS: rhs}
+}
+
+func reduceBinaryExprNumberLHS(op Token, lhs *NumberLiteral, rhs Expr) Expr {
+	switch rhs := rhs.(type) {
+	case *NumberLiteral:
+		switch op {
+		case ADD:
+			return &NumberLiteral{Val: lhs.Val + rhs.Val}
+		case SUB:
+			return &NumberLiteral{Val: lhs.Val - rhs.Val}
+		case MUL:
+			return &NumberLiteral{Val: lhs.Val * rhs.Val}
+		case DIV:
+			if rhs.Val == 0 {
+				return &NumberLiteral{Val: 0}
+			}
+			return &NumberLiteral{Val: lhs.Val / rhs.Val}
+		case EQ:
+			return &BooleanLiteral{Val: lhs.Val == rhs.Val}
+		case NEQ:
+			return &BooleanLiteral{Val: lhs.Val != rhs.Val}
+		case GT:
+			return &BooleanLiteral{Val: lhs.Val > rhs.Val}
+		case GTE:
+			return &BooleanLiteral{Val: lhs.Val >= rhs.Val}
+		case LT:
+			return &BooleanLiteral{Val: lhs.Val < rhs.Val}
+		case LTE:
+			return &BooleanLiteral{Val: lhs.Val <= rhs.Val}
+		}
+	case *nilLiteral:
+		return &BooleanLiteral{Val: false}
+	}
+	return &BinaryExpr{Op: op, LHS: lhs, RHS: rhs}
+}
+
+func reduceBinaryExprStringLHS(op Token, lhs *StringLiteral, rhs Expr) Expr {
+	switch rhs := rhs.(type) {
+	case *StringLiteral:
+		switch op {
+		case EQ:
+			return &BooleanLiteral{Val: lhs.Val == rhs.Val}
+		case NEQ:
+			return &BooleanLiteral{Val: lhs.Val != rhs.Val}
+		case ADD:
+			return &StringLiteral{Val: lhs.Val + rhs.Val}
+		}
+	case *nilLiteral:
+		switch op {
+		case EQ, NEQ:
+			return &BooleanLiteral{Val: false}
+		}
+	}
+	return &BinaryExpr{Op: op, LHS: lhs, RHS: rhs}
+}
+
+func reduceBinaryExprTimeLHS(op Token, lhs *TimeLiteral, rhs Expr) Expr {
+	switch rhs := rhs.(type) {
+	case *DurationLiteral:
+		switch op {
+		case ADD:
+			return &TimeLiteral{Val: lhs.Val.Add(rhs.Val)}
+		case SUB:
+			return &TimeLiteral{Val: lhs.Val.Add(-rhs.Val)}
+		}
+	case *TimeLiteral:
+		switch op {
+		case SUB:
+			return &DurationLiteral{Val: lhs.Val.Sub(rhs.Val)}
+		case EQ:
+			return &BooleanLiteral{Val: lhs.Val.Equal(rhs.Val)}
+		case NEQ:
+			return &BooleanLiteral{Val: !lhs.Val.Equal(rhs.Val)}
+		case GT:
+			return &BooleanLiteral{Val: lhs.Val.After(rhs.Val)}
+		case GTE:
+			return &BooleanLiteral{Val: lhs.Val.After(rhs.Val) || lhs.Val.Equal(rhs.Val)}
+		case LT:
+			return &BooleanLiteral{Val: lhs.Val.Before(rhs.Val)}
+		case LTE:
+			return &BooleanLiteral{Val: lhs.Val.Before(rhs.Val) || lhs.Val.Equal(rhs.Val)}
+		}
+	case *nilLiteral:
+		return &BooleanLiteral{Val: false}
+	}
+	return &BinaryExpr{Op: op, LHS: lhs, RHS: rhs}
+}
+
+func reduceCall(expr *Call, valuer Valuer) Expr {
+	// Evaluate "now()" if valuer is set.
+	if expr.Name == "now" && len(expr.Args) == 0 && valuer != nil {
+		if v, ok := valuer.Value("now()"); ok {
+			v, _ := v.(time.Time)
+			return &TimeLiteral{Val: v}
+		}
+	}
+
+	// Otherwise reduce arguments.
+	args := make([]Expr, len(expr.Args))
+	for i, arg := range expr.Args {
+		args[i] = reduce(arg, valuer)
+	}
+	return &Call{Name: expr.Name, Args: args}
+}
+
+func reduceParenExpr(expr *ParenExpr, valuer Valuer) Expr {
+	subexpr := reduce(expr.Expr, valuer)
+	if subexpr, ok := subexpr.(*BinaryExpr); ok {
+		return &ParenExpr{Expr: subexpr}
+	}
+	return subexpr
+}
+
+func reduceVarRef(expr *VarRef, valuer Valuer) Expr {
+	// Ignore if there is no valuer.
+	if valuer == nil {
+		return &VarRef{Val: expr.Val}
+	}
+
+	// Retrieve the value of the ref.
+	// Ignore if the value doesn't exist.
+	v, ok := valuer.Value(expr.Val)
+	if !ok {
+		return &VarRef{Val: expr.Val}
+	}
+
+	// Return the value as a literal.
+	switch v := v.(type) {
+	case bool:
+		return &BooleanLiteral{Val: v}
+	case time.Duration:
+		return &DurationLiteral{Val: v}
+	case float64:
+		return &NumberLiteral{Val: v}
+	case string:
+		return &StringLiteral{Val: v}
+	case time.Time:
+		return &TimeLiteral{Val: v}
+	default:
+		return &nilLiteral{}
+	}
+}
+
+// Valuer is the interface that wraps the Value() method.
+//
+// Value returns the value and existence flag for a given key.
+type Valuer interface {
+	Value(key string) (interface{}, bool)
+}
+
+// nowValuer returns only the value for "now()".
+type NowValuer struct {
+	Now time.Time
+}
+
+func (v *NowValuer) Value(key string) (interface{}, bool) {
+	if key == "now()" {
+		return v.Now, true
+	}
+	return nil, false
+}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/doc.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/doc.go
new file mode 100644
index 00000000000..f93da4b45fc
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/doc.go
@@ -0,0 +1,64 @@
+/*
+Package influxql implements a parser for the InfluxDB query language.
+
+InfluxQL is a DML and DDL language for the InfluxDB time series database.
+It provides the ability to query for aggregate statistics as well as create
+and configure the InfluxDB server.
+
+Selecting data
+
+The SELECT query is used for retrieving data from one or more series. It allows
+for a list of columns followed by a list of series to select from.
+
+	SELECT value FROM cpu_load
+
+You can also add a a conditional expression to limit the results of the query:
+
+	SELECT value FROM cpu_load WHERE host = 'influxdb.com'
+
+Two or more series can be combined into a single query and executed together:
+
+	SELECT cpu0.value + cpu1.value
+	FROM cpu_load AS cpu0 INNER JOIN cpu_load cpu1 ON cpu0.host = cpu1.host
+
+Limits and ordering can be set on selection queries as well:
+
+	SELECT value FROM cpu_load LIMIT 100 ORDER DESC;
+
+
+Removing data
+
+The DELETE query is available to remove time series data points from the
+database. This query will delete "cpu_load" values older than an hour:
+
+	DELETE FROM cpu_load WHERE time < now() - 1h
+
+
+Continuous Queries
+
+Queries can be run indefinitely on the server in order to generate new series.
+This is done by running a "SELECT INTO" query. For example, this query computes
+the hourly mean for cpu_load and stores it into a "cpu_load" series in the
+"daily" shard space.
+
+	SELECT mean(value) AS value FROM cpu_load GROUP BY 1h
+	INTO daily.cpu_load
+
+If there is existing data on the source series then this query will be run for
+all historic data. To only execute the query on new incoming data you can append
+"NO BACKFILL" to the end of the query:
+
+	SELECT mean(value) AS value FROM cpu_load GROUP BY 1h
+	INTO daily.cpu_load NO BACKFILL
+
+Continuous queries will return an id that can be used to remove them in the
+future. To remove a continous query, use the DROP CONTINUOUS QUERY statement:
+
+	DROP CONTINUOUS QUERY 12
+
+You can also list all continuous queries by running:
+
+	LIST CONTINUOUS QUERIES
+
+*/
+package influxql
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/functions.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/functions.go
new file mode 100644
index 00000000000..f09c321e72e
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/functions.go
@@ -0,0 +1,1115 @@
+package influxql
+
+// All aggregate and query functions are defined in this file along with any intermediate data objects they need to process.
+// Query functions are represented as two discreet functions: Map and Reduce. These roughly follow the MapReduce
+// paradigm popularized by Google and Hadoop.
+//
+// When adding an aggregate function, define a mapper, a reducer, and add them in the switch statement in the MapReduceFuncs function
+
+import (
+	"encoding/json"
+	"fmt"
+	"math"
+	"math/rand"
+	"sort"
+	"strings"
+)
+
+// Iterator represents a forward-only iterator over a set of points.
+// These are used by the MapFunctions in this file
+type Iterator interface {
+	Next() (time int64, value interface{})
+}
+
+// MapFunc represents a function used for mapping over a sequential series of data.
+// The iterator represents a single group by interval
+type MapFunc func(Iterator) interface{}
+
+// ReduceFunc represents a function used for reducing mapper output.
+type ReduceFunc func([]interface{}) interface{}
+
+// UnmarshalFunc represents a function that can take bytes from a mapper from remote
+// server and marshal it into an interface the reducer can use
+type UnmarshalFunc func([]byte) (interface{}, error)
+
+// InitializeMapFunc takes an aggregate call from the query and returns the MapFunc
+func InitializeMapFunc(c *Call) (MapFunc, error) {
+	// see if it's a query for raw data
+	if c == nil {
+		return MapRawQuery, nil
+	}
+
+	// Ensure that there is either a single argument or if for percentile, two
+	if c.Name == "percentile" {
+		if len(c.Args) != 2 {
+			return nil, fmt.Errorf("expected two arguments for %s()", c.Name)
+		}
+	} else if strings.HasSuffix(c.Name, "derivative") {
+		// derivatives require a field name and optional duration
+		if len(c.Args) == 0 {
+			return nil, fmt.Errorf("expected field name argument for %s()", c.Name)
+		}
+	} else if len(c.Args) != 1 {
+		return nil, fmt.Errorf("expected one argument for %s()", c.Name)
+	}
+
+	// derivative can take a nested aggregate function, everything else expects
+	// a variable reference as the first arg
+	if !strings.HasSuffix(c.Name, "derivative") {
+		// Ensure the argument is appropriate for the aggregate function.
+		switch fc := c.Args[0].(type) {
+		case *VarRef:
+		case *Distinct:
+			if c.Name != "count" {
+				return nil, fmt.Errorf("expected field argument in %s()", c.Name)
+			}
+		case *Call:
+			if fc.Name != "distinct" {
+				return nil, fmt.Errorf("expected field argument in %s()", c.Name)
+			}
+		default:
+			return nil, fmt.Errorf("expected field argument in %s()", c.Name)
+		}
+	}
+
+	// Retrieve map function by name.
+	switch c.Name {
+	case "count":
+		if _, ok := c.Args[0].(*Distinct); ok {
+			return MapCountDistinct, nil
+		}
+		if c, ok := c.Args[0].(*Call); ok {
+			if c.Name == "distinct" {
+				return MapCountDistinct, nil
+			}
+		}
+		return MapCount, nil
+	case "distinct":
+		return MapDistinct, nil
+	case "sum":
+		return MapSum, nil
+	case "mean":
+		return MapMean, nil
+	case "median":
+		return MapStddev, nil
+	case "min":
+		return MapMin, nil
+	case "max":
+		return MapMax, nil
+	case "spread":
+		return MapSpread, nil
+	case "stddev":
+		return MapStddev, nil
+	case "first":
+		return MapFirst, nil
+	case "last":
+		return MapLast, nil
+	case "percentile":
+		_, ok := c.Args[1].(*NumberLiteral)
+		if !ok {
+			return nil, fmt.Errorf("expected float argument in percentile()")
+		}
+		return MapEcho, nil
+	case "derivative", "non_negative_derivative":
+		// If the arg is another aggregate e.g. derivative(mean(value)), then
+		// use the map func for that nested aggregate
+		if fn, ok := c.Args[0].(*Call); ok {
+			return InitializeMapFunc(fn)
+		}
+		return MapRawQuery, nil
+	default:
+		return nil, fmt.Errorf("function not found: %q", c.Name)
+	}
+}
+
+// InitializeReduceFunc takes an aggregate call from the query and returns the ReduceFunc
+func InitializeReduceFunc(c *Call) (ReduceFunc, error) {
+	// Retrieve reduce function by name.
+	switch c.Name {
+	case "count":
+		if _, ok := c.Args[0].(*Distinct); ok {
+			return ReduceCountDistinct, nil
+		}
+		if c, ok := c.Args[0].(*Call); ok {
+			if c.Name == "distinct" {
+				return ReduceCountDistinct, nil
+			}
+		}
+		return ReduceSum, nil
+	case "distinct":
+		return ReduceDistinct, nil
+	case "sum":
+		return ReduceSum, nil
+	case "mean":
+		return ReduceMean, nil
+	case "median":
+		return ReduceMedian, nil
+	case "min":
+		return ReduceMin, nil
+	case "max":
+		return ReduceMax, nil
+	case "spread":
+		return ReduceSpread, nil
+	case "stddev":
+		return ReduceStddev, nil
+	case "first":
+		return ReduceFirst, nil
+	case "last":
+		return ReduceLast, nil
+	case "percentile":
+		if len(c.Args) != 2 {
+			return nil, fmt.Errorf("expected float argument in percentile()")
+		}
+
+		lit, ok := c.Args[1].(*NumberLiteral)
+		if !ok {
+			return nil, fmt.Errorf("expected float argument in percentile()")
+		}
+		return ReducePercentile(lit.Val), nil
+	case "derivative", "non_negative_derivative":
+		// If the arg is another aggregate e.g. derivative(mean(value)), then
+		// use the map func for that nested aggregate
+		if fn, ok := c.Args[0].(*Call); ok {
+			return InitializeReduceFunc(fn)
+		}
+		return nil, fmt.Errorf("expected function argument to %s", c.Name)
+	default:
+		return nil, fmt.Errorf("function not found: %q", c.Name)
+	}
+}
+
+func InitializeUnmarshaller(c *Call) (UnmarshalFunc, error) {
+	// if c is nil it's a raw data query
+	if c == nil {
+		return func(b []byte) (interface{}, error) {
+			a := make([]*rawQueryMapOutput, 0)
+			err := json.Unmarshal(b, &a)
+			return a, err
+		}, nil
+	}
+
+	// Retrieve marshal function by name
+	switch c.Name {
+	case "mean":
+		return func(b []byte) (interface{}, error) {
+			var o meanMapOutput
+			err := json.Unmarshal(b, &o)
+			return &o, err
+		}, nil
+	case "spread":
+		return func(b []byte) (interface{}, error) {
+			var o spreadMapOutput
+			err := json.Unmarshal(b, &o)
+			return &o, err
+		}, nil
+	case "distinct":
+		return func(b []byte) (interface{}, error) {
+			var val distinctValues
+			err := json.Unmarshal(b, &val)
+			return val, err
+		}, nil
+	case "first":
+		return func(b []byte) (interface{}, error) {
+			var o firstLastMapOutput
+			err := json.Unmarshal(b, &o)
+			return &o, err
+		}, nil
+	case "last":
+		return func(b []byte) (interface{}, error) {
+			var o firstLastMapOutput
+			err := json.Unmarshal(b, &o)
+			return &o, err
+		}, nil
+	case "stddev":
+		return func(b []byte) (interface{}, error) {
+			val := make([]float64, 0)
+			err := json.Unmarshal(b, &val)
+			return val, err
+		}, nil
+	case "median":
+		return func(b []byte) (interface{}, error) {
+			a := make([]float64, 0)
+			err := json.Unmarshal(b, &a)
+			return a, err
+		}, nil
+	default:
+		return func(b []byte) (interface{}, error) {
+			var val interface{}
+			err := json.Unmarshal(b, &val)
+			return val, err
+		}, nil
+	}
+}
+
+// MapCount computes the number of values in an iterator.
+func MapCount(itr Iterator) interface{} {
+	n := float64(0)
+	for k, _ := itr.Next(); k != -1; k, _ = itr.Next() {
+		n++
+	}
+	if n > 0 {
+		return n
+	}
+	return nil
+}
+
+type distinctValues []interface{}
+
+func (d distinctValues) Len() int      { return len(d) }
+func (d distinctValues) Swap(i, j int) { d[i], d[j] = d[j], d[i] }
+func (d distinctValues) Less(i, j int) bool {
+	// Sort by type if types match
+	{
+		d1, ok1 := d[i].(float64)
+		d2, ok2 := d[j].(float64)
+		if ok1 && ok2 {
+			return d1 < d2
+		}
+	}
+
+	{
+		d1, ok1 := d[i].(uint64)
+		d2, ok2 := d[j].(uint64)
+		if ok1 && ok2 {
+			return d1 < d2
+		}
+	}
+
+	{
+		d1, ok1 := d[i].(bool)
+		d2, ok2 := d[j].(bool)
+		if ok1 && ok2 {
+			return d1 == false && d2 == true
+		}
+	}
+
+	{
+		d1, ok1 := d[i].(string)
+		d2, ok2 := d[j].(string)
+		if ok1 && ok2 {
+			return d1 < d2
+		}
+	}
+
+	// Types did not match, need to sort based on arbitrary weighting of type
+	const (
+		intWeight = iota
+		floatWeight
+		boolWeight
+		stringWeight
+	)
+
+	infer := func(val interface{}) (int, float64) {
+		switch v := val.(type) {
+		case uint64:
+			return intWeight, float64(v)
+		case int64:
+			return intWeight, float64(v)
+		case float64:
+			return floatWeight, v
+		case bool:
+			return boolWeight, 0
+		case string:
+			return stringWeight, 0
+		}
+		panic("unreachable code")
+	}
+
+	w1, n1 := infer(d[i])
+	w2, n2 := infer(d[j])
+
+	// If we had "numeric" data, use that for comparison
+	if n1 != n2 && (w1 == intWeight && w2 == floatWeight) || (w1 == floatWeight && w2 == intWeight) {
+		return n1 < n2
+	}
+
+	return w1 < w2
+}
+
+// MapDistinct computes the unique values in an iterator.
+func MapDistinct(itr Iterator) interface{} {
+	var index = make(map[interface{}]struct{})
+
+	for time, value := itr.Next(); time != -1; time, value = itr.Next() {
+		index[value] = struct{}{}
+	}
+
+	if len(index) == 0 {
+		return nil
+	}
+
+	results := make(distinctValues, len(index))
+	var i int
+	for value, _ := range index {
+		results[i] = value
+		i++
+	}
+	return results
+}
+
+// ReduceDistinct finds the unique values for each key.
+func ReduceDistinct(values []interface{}) interface{} {
+	var index = make(map[interface{}]struct{})
+
+	// index distinct values from each mapper
+	for _, v := range values {
+		if v == nil {
+			continue
+		}
+		d, ok := v.(distinctValues)
+		if !ok {
+			msg := fmt.Sprintf("expected distinctValues, got: %T", v)
+			panic(msg)
+		}
+		for _, distinctValue := range d {
+			index[distinctValue] = struct{}{}
+		}
+	}
+
+	// convert map keys to an array
+	results := make(distinctValues, len(index))
+	var i int
+	for k, _ := range index {
+		results[i] = k
+		i++
+	}
+	if len(results) > 0 {
+		sort.Sort(results)
+		return results
+	}
+	return nil
+}
+
+// MapCountDistinct computes the unique count of values in an iterator.
+func MapCountDistinct(itr Iterator) interface{} {
+	var index = make(map[interface{}]struct{})
+
+	for time, value := itr.Next(); time != -1; time, value = itr.Next() {
+		index[value] = struct{}{}
+	}
+
+	if len(index) == 0 {
+		return nil
+	}
+
+	return index
+}
+
+// ReduceCountDistinct finds the unique counts of values.
+func ReduceCountDistinct(values []interface{}) interface{} {
+	var index = make(map[interface{}]struct{})
+
+	// index distinct values from each mapper
+	for _, v := range values {
+		if v == nil {
+			continue
+		}
+		d, ok := v.(map[interface{}]struct{})
+		if !ok {
+			msg := fmt.Sprintf("expected map[interface{}]struct{}, got: %T", v)
+			panic(msg)
+		}
+		for distinctCountValue, _ := range d {
+			index[distinctCountValue] = struct{}{}
+		}
+	}
+
+	return len(index)
+}
+
+type NumberType int8
+
+const (
+	Float64Type NumberType = iota
+	Int64Type
+)
+
+// MapSum computes the summation of values in an iterator.
+func MapSum(itr Iterator) interface{} {
+	n := float64(0)
+	count := 0
+	var resultType NumberType
+	for k, v := itr.Next(); k != -1; k, v = itr.Next() {
+		count++
+		switch n1 := v.(type) {
+		case float64:
+			n += n1
+		case int64:
+			n += float64(n1)
+			resultType = Int64Type
+		}
+	}
+	if count > 0 {
+		switch resultType {
+		case Float64Type:
+			return n
+		case Int64Type:
+			return int64(n)
+		}
+	}
+	return nil
+}
+
+// ReduceSum computes the sum of values for each key.
+func ReduceSum(values []interface{}) interface{} {
+	var n float64
+	count := 0
+	var resultType NumberType
+	for _, v := range values {
+		if v == nil {
+			continue
+		}
+		count++
+		switch n1 := v.(type) {
+		case float64:
+			n += n1
+		case int64:
+			n += float64(n1)
+			resultType = Int64Type
+		}
+	}
+	if count > 0 {
+		switch resultType {
+		case Float64Type:
+			return n
+		case Int64Type:
+			return int64(n)
+		}
+	}
+	return nil
+}
+
+// MapMean computes the count and sum of values in an iterator to be combined by the reducer.
+func MapMean(itr Iterator) interface{} {
+	out := &meanMapOutput{}
+
+	for k, v := itr.Next(); k != -1; k, v = itr.Next() {
+		out.Count++
+		switch n1 := v.(type) {
+		case float64:
+			out.Mean += (n1 - out.Mean) / float64(out.Count)
+		case int64:
+			out.Mean += (float64(n1) - out.Mean) / float64(out.Count)
+			out.ResultType = Int64Type
+		}
+	}
+
+	if out.Count > 0 {
+		return out
+	}
+
+	return nil
+}
+
+type meanMapOutput struct {
+	Count      int
+	Mean       float64
+	ResultType NumberType
+}
+
+// ReduceMean computes the mean of values for each key.
+func ReduceMean(values []interface{}) interface{} {
+	out := &meanMapOutput{}
+	var countSum int
+	for _, v := range values {
+		if v == nil {
+			continue
+		}
+		val := v.(*meanMapOutput)
+		countSum = out.Count + val.Count
+		out.Mean = val.Mean*(float64(val.Count)/float64(countSum)) + out.Mean*(float64(out.Count)/float64(countSum))
+		out.Count = countSum
+	}
+	if out.Count > 0 {
+		return out.Mean
+	}
+	return nil
+}
+
+// ReduceMedian computes the median of values
+func ReduceMedian(values []interface{}) interface{} {
+	var data []float64
+	// Collect all the data points
+	for _, value := range values {
+		if value == nil {
+			continue
+		}
+		data = append(data, value.([]float64)...)
+	}
+
+	length := len(data)
+	if length < 2 {
+		if length == 0 {
+			return nil
+		}
+		return data[0]
+	}
+	middle := length / 2
+	var sortedRange []float64
+	if length%2 == 0 {
+		sortedRange = getSortedRange(data, middle-1, 2)
+		var low, high = sortedRange[0], sortedRange[1]
+		return low + (high-low)/2
+	} else {
+		sortedRange = getSortedRange(data, middle, 1)
+		return sortedRange[0]
+	}
+}
+
+// getSortedRange returns a sorted subset of data. By using discardLowerRange and discardUpperRange to get the target
+// subset (unsorted) and then just sorting that subset, the work can be reduced from O(N lg N), where N is len(data), to
+// O(N + count lg count) for the average case
+// - O(N) to discard the unwanted items
+// - O(count lg count) to sort the count number of extracted items
+// This can be useful for:
+// - finding the median: getSortedRange(data, middle, 1)
+// - finding the top N: getSortedRange(data, len(data) - N, N)
+// - finding the bottom N: getSortedRange(data, 0, N)
+func getSortedRange(data []float64, start int, count int) []float64 {
+	out := discardLowerRange(data, start)
+	k := len(out) - count
+	if k > 0 {
+		out = discardUpperRange(out, k)
+	}
+	sort.Float64s(out)
+
+	return out
+}
+
+// discardLowerRange discards the lower k elements of the sorted data set without sorting all the data. Sorting all of
+// the data would take O(NlgN), where N is len(data), but partitioning to find the kth largest number is O(N) in the
+// average case. The remaining N-k unsorted elements are returned - no kind of ordering is guaranteed on these elements.
+func discardLowerRange(data []float64, k int) []float64 {
+	out := make([]float64, len(data)-k)
+	i := 0
+
+	// discard values lower than the desired range
+	for k > 0 {
+		lows, pivotValue, highs := partition(data)
+
+		lowLength := len(lows)
+		if lowLength > k {
+			// keep all the highs and the pivot
+			out[i] = pivotValue
+			i++
+			copy(out[i:], highs)
+			i += len(highs)
+			// iterate over the lows again
+			data = lows
+		} else {
+			// discard all the lows
+			data = highs
+			k -= lowLength
+			if k == 0 {
+				// if discarded enough lows, keep the pivot
+				out[i] = pivotValue
+				i++
+			} else {
+				// able to discard the pivot too
+				k--
+			}
+		}
+	}
+	copy(out[i:], data)
+	return out
+}
+
+// discardUpperRange discards the upper k elements of the sorted data set without sorting all the data. Sorting all of
+// the data would take O(NlgN), where N is len(data), but partitioning to find the kth largest number is O(N) in the
+// average case. The remaining N-k unsorted elements are returned - no kind of ordering is guaranteed on these elements.
+func discardUpperRange(data []float64, k int) []float64 {
+	out := make([]float64, len(data)-k)
+	i := 0
+
+	// discard values higher than the desired range
+	for k > 0 {
+		lows, pivotValue, highs := partition(data)
+
+		highLength := len(highs)
+		if highLength > k {
+			// keep all the lows and the pivot
+			out[i] = pivotValue
+			i++
+			copy(out[i:], lows)
+			i += len(lows)
+			// iterate over the highs again
+			data = highs
+		} else {
+			// discard all the highs
+			data = lows
+			k -= highLength
+			if k == 0 {
+				// if discarded enough highs, keep the pivot
+				out[i] = pivotValue
+				i++
+			} else {
+				// able to discard the pivot too
+				k--
+			}
+		}
+	}
+	copy(out[i:], data)
+	return out
+}
+
+// partition takes a list of data, chooses a random pivot index and returns a list of elements lower than the
+// pivotValue, the pivotValue, and a list of elements higher than the pivotValue.  partition mutates data.
+func partition(data []float64) (lows []float64, pivotValue float64, highs []float64) {
+	length := len(data)
+	// there are better (more complex) ways to calculate pivotIndex (e.g. median of 3, median of 3 medians) if this
+	// proves to be inadequate.
+	pivotIndex := rand.Int() % length
+	pivotValue = data[pivotIndex]
+	low, high := 1, length-1
+
+	// put the pivot in the first position
+	data[pivotIndex], data[0] = data[0], data[pivotIndex]
+
+	// partition the data around the pivot
+	for low <= high {
+		for low <= high && data[low] <= pivotValue {
+			low++
+		}
+		for high >= low && data[high] >= pivotValue {
+			high--
+		}
+		if low < high {
+			data[low], data[high] = data[high], data[low]
+		}
+	}
+
+	return data[1:low], pivotValue, data[high+1:]
+}
+
+type minMaxMapOut struct {
+	Val  float64
+	Type NumberType
+}
+
+// MapMin collects the values to pass to the reducer
+func MapMin(itr Iterator) interface{} {
+	min := &minMaxMapOut{}
+
+	pointsYielded := false
+	var val float64
+
+	for k, v := itr.Next(); k != -1; k, v = itr.Next() {
+		switch n := v.(type) {
+		case float64:
+			val = n
+		case int64:
+			val = float64(n)
+			min.Type = Int64Type
+		}
+
+		// Initialize min
+		if !pointsYielded {
+			min.Val = val
+			pointsYielded = true
+		}
+		min.Val = math.Min(min.Val, val)
+	}
+	if pointsYielded {
+		return min
+	}
+	return nil
+}
+
+// ReduceMin computes the min of value.
+func ReduceMin(values []interface{}) interface{} {
+	min := &minMaxMapOut{}
+	pointsYielded := false
+
+	for _, value := range values {
+		if value == nil {
+			continue
+		}
+
+		v, ok := value.(*minMaxMapOut)
+		if !ok {
+			continue
+		}
+
+		// Initialize min
+		if !pointsYielded {
+			min.Val = v.Val
+			min.Type = v.Type
+			pointsYielded = true
+		}
+		min.Val = math.Min(min.Val, v.Val)
+	}
+	if pointsYielded {
+		switch min.Type {
+		case Float64Type:
+			return min.Val
+		case Int64Type:
+			return int64(min.Val)
+		}
+	}
+	return nil
+}
+
+// MapMax collects the values to pass to the reducer
+func MapMax(itr Iterator) interface{} {
+	max := &minMaxMapOut{}
+
+	pointsYielded := false
+	var val float64
+
+	for k, v := itr.Next(); k != -1; k, v = itr.Next() {
+		switch n := v.(type) {
+		case float64:
+			val = n
+		case int64:
+			val = float64(n)
+			max.Type = Int64Type
+		}
+
+		// Initialize max
+		if !pointsYielded {
+			max.Val = val
+			pointsYielded = true
+		}
+		max.Val = math.Max(max.Val, val)
+	}
+	if pointsYielded {
+		return max
+	}
+	return nil
+}
+
+// ReduceMax computes the max of value.
+func ReduceMax(values []interface{}) interface{} {
+	max := &minMaxMapOut{}
+	pointsYielded := false
+
+	for _, value := range values {
+		if value == nil {
+			continue
+		}
+
+		v, ok := value.(*minMaxMapOut)
+		if !ok {
+			continue
+		}
+
+		// Initialize max
+		if !pointsYielded {
+			max.Val = v.Val
+			max.Type = v.Type
+			pointsYielded = true
+		}
+		max.Val = math.Max(max.Val, v.Val)
+	}
+	if pointsYielded {
+		switch max.Type {
+		case Float64Type:
+			return max.Val
+		case Int64Type:
+			return int64(max.Val)
+		}
+	}
+	return nil
+}
+
+type spreadMapOutput struct {
+	Min, Max float64
+	Type     NumberType
+}
+
+// MapSpread collects the values to pass to the reducer
+func MapSpread(itr Iterator) interface{} {
+	out := &spreadMapOutput{}
+	pointsYielded := false
+	var val float64
+
+	for k, v := itr.Next(); k != -1; k, v = itr.Next() {
+		switch n := v.(type) {
+		case float64:
+			val = n
+		case int64:
+			val = float64(n)
+			out.Type = Int64Type
+		}
+
+		// Initialize
+		if !pointsYielded {
+			out.Max = val
+			out.Min = val
+			pointsYielded = true
+		}
+		out.Max = math.Max(out.Max, val)
+		out.Min = math.Min(out.Min, val)
+	}
+	if pointsYielded {
+		return out
+	}
+	return nil
+}
+
+// ReduceSpread computes the spread of values.
+func ReduceSpread(values []interface{}) interface{} {
+	result := &spreadMapOutput{}
+	pointsYielded := false
+
+	for _, v := range values {
+		if v == nil {
+			continue
+		}
+		val := v.(*spreadMapOutput)
+		// Initialize
+		if !pointsYielded {
+			result.Max = val.Max
+			result.Min = val.Min
+			result.Type = val.Type
+			pointsYielded = true
+		}
+		result.Max = math.Max(result.Max, val.Max)
+		result.Min = math.Min(result.Min, val.Min)
+	}
+	if pointsYielded {
+		switch result.Type {
+		case Float64Type:
+			return result.Max - result.Min
+		case Int64Type:
+			return int64(result.Max - result.Min)
+		}
+	}
+	return nil
+}
+
+// MapStddev collects the values to pass to the reducer
+func MapStddev(itr Iterator) interface{} {
+	var values []float64
+
+	for k, v := itr.Next(); k != -1; k, v = itr.Next() {
+		switch n := v.(type) {
+		case float64:
+			values = append(values, n)
+		case int64:
+			values = append(values, float64(n))
+		}
+	}
+
+	return values
+}
+
+// ReduceStddev computes the stddev of values.
+func ReduceStddev(values []interface{}) interface{} {
+	var data []float64
+	// Collect all the data points
+	for _, value := range values {
+		if value == nil {
+			continue
+		}
+		data = append(data, value.([]float64)...)
+	}
+
+	// If no data or we only have one point, it's nil or undefined
+	if len(data) < 2 {
+		return nil
+	}
+
+	// Get the mean
+	var mean float64
+	var count int
+	for _, v := range data {
+		count++
+		mean += (v - mean) / float64(count)
+	}
+	// Get the variance
+	var variance float64
+	for _, v := range data {
+		dif := v - mean
+		sq := math.Pow(dif, 2)
+		variance += sq
+	}
+	variance = variance / float64(count-1)
+	stddev := math.Sqrt(variance)
+
+	return stddev
+}
+
+type firstLastMapOutput struct {
+	Time int64
+	Val  interface{}
+}
+
+// MapFirst collects the values to pass to the reducer
+func MapFirst(itr Iterator) interface{} {
+	out := &firstLastMapOutput{}
+	pointsYielded := false
+
+	for k, v := itr.Next(); k != -1; k, v = itr.Next() {
+		// Initialize first
+		if !pointsYielded {
+			out.Time = k
+			out.Val = v
+			pointsYielded = true
+		}
+		if k < out.Time {
+			out.Time = k
+			out.Val = v
+		}
+	}
+	if pointsYielded {
+		return out
+	}
+	return nil
+}
+
+// ReduceFirst computes the first of value.
+func ReduceFirst(values []interface{}) interface{} {
+	out := &firstLastMapOutput{}
+	pointsYielded := false
+
+	for _, v := range values {
+		if v == nil {
+			continue
+		}
+		val := v.(*firstLastMapOutput)
+		// Initialize first
+		if !pointsYielded {
+			out.Time = val.Time
+			out.Val = val.Val
+			pointsYielded = true
+		}
+		if val.Time < out.Time {
+			out.Time = val.Time
+			out.Val = val.Val
+		}
+	}
+	if pointsYielded {
+		return out.Val
+	}
+	return nil
+}
+
+// MapLast collects the values to pass to the reducer
+func MapLast(itr Iterator) interface{} {
+	out := &firstLastMapOutput{}
+	pointsYielded := false
+
+	for k, v := itr.Next(); k != -1; k, v = itr.Next() {
+		// Initialize last
+		if !pointsYielded {
+			out.Time = k
+			out.Val = v
+			pointsYielded = true
+		}
+		if k > out.Time {
+			out.Time = k
+			out.Val = v
+		}
+	}
+	if pointsYielded {
+		return out
+	}
+	return nil
+}
+
+// ReduceLast computes the last of value.
+func ReduceLast(values []interface{}) interface{} {
+	out := &firstLastMapOutput{}
+	pointsYielded := false
+
+	for _, v := range values {
+		if v == nil {
+			continue
+		}
+
+		val := v.(*firstLastMapOutput)
+		// Initialize last
+		if !pointsYielded {
+			out.Time = val.Time
+			out.Val = val.Val
+			pointsYielded = true
+		}
+		if val.Time > out.Time {
+			out.Time = val.Time
+			out.Val = val.Val
+		}
+	}
+	if pointsYielded {
+		return out.Val
+	}
+	return nil
+}
+
+// MapEcho emits the data points for each group by interval
+func MapEcho(itr Iterator) interface{} {
+	var values []interface{}
+
+	for k, v := itr.Next(); k != -1; k, v = itr.Next() {
+		values = append(values, v)
+	}
+	return values
+}
+
+// ReducePercentile computes the percentile of values for each key.
+func ReducePercentile(percentile float64) ReduceFunc {
+	return func(values []interface{}) interface{} {
+		var allValues []float64
+
+		for _, v := range values {
+			if v == nil {
+				continue
+			}
+
+			vals := v.([]interface{})
+			for _, v := range vals {
+				switch v.(type) {
+				case int64:
+					allValues = append(allValues, float64(v.(int64)))
+				case float64:
+					allValues = append(allValues, v.(float64))
+				}
+			}
+		}
+
+		sort.Float64s(allValues)
+		length := len(allValues)
+		index := int(math.Floor(float64(length)*percentile/100.0+0.5)) - 1
+
+		if index < 0 || index >= len(allValues) {
+			return nil
+		}
+
+		return allValues[index]
+	}
+}
+
+// IsNumeric returns whether a given aggregate can only be run on numeric fields.
+func IsNumeric(c *Call) bool {
+	switch c.Name {
+	case "count", "first", "last", "distinct":
+		return false
+	default:
+		return true
+	}
+}
+
+// MapRawQuery is for queries without aggregates
+func MapRawQuery(itr Iterator) interface{} {
+	var values []*rawQueryMapOutput
+	for k, v := itr.Next(); k != -1; k, v = itr.Next() {
+		val := &rawQueryMapOutput{k, v}
+		values = append(values, val)
+	}
+	return values
+}
+
+type rawQueryMapOutput struct {
+	Time   int64
+	Values interface{}
+}
+
+func (r *rawQueryMapOutput) String() string {
+	return fmt.Sprintf("{%#v %#v}", r.Time, r.Values)
+}
+
+type rawOutputs []*rawQueryMapOutput
+
+func (a rawOutputs) Len() int           { return len(a) }
+func (a rawOutputs) Less(i, j int) bool { return a[i].Time < a[j].Time }
+func (a rawOutputs) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/parser.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/parser.go
new file mode 100644
index 00000000000..366cbbec0a3
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/parser.go
@@ -0,0 +1,2238 @@
+package influxql
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"io"
+	"math"
+	"regexp"
+	"strconv"
+	"strings"
+	"time"
+)
+
+const (
+	// DateFormat represents the format for date literals.
+	DateFormat = "2006-01-02"
+
+	// DateTimeFormat represents the format for date time literals.
+	DateTimeFormat = "2006-01-02 15:04:05.999999"
+)
+
+// Parser represents an InfluxQL parser.
+type Parser struct {
+	s *bufScanner
+}
+
+// NewParser returns a new instance of Parser.
+func NewParser(r io.Reader) *Parser {
+	return &Parser{s: newBufScanner(r)}
+}
+
+// ParseQuery parses a query string and returns its AST representation.
+func ParseQuery(s string) (*Query, error) { return NewParser(strings.NewReader(s)).ParseQuery() }
+
+// ParseStatement parses a statement string and returns its AST representation.
+func ParseStatement(s string) (Statement, error) {
+	return NewParser(strings.NewReader(s)).ParseStatement()
+}
+
+// MustParseStatement parses a statement string and returns its AST. Panic on error.
+func MustParseStatement(s string) Statement {
+	stmt, err := ParseStatement(s)
+	if err != nil {
+		panic(err.Error())
+	}
+	return stmt
+}
+
+// ParseExpr parses an expression string and returns its AST representation.
+func ParseExpr(s string) (Expr, error) { return NewParser(strings.NewReader(s)).ParseExpr() }
+
+// ParseQuery parses an InfluxQL string and returns a Query AST object.
+func (p *Parser) ParseQuery() (*Query, error) {
+	var statements Statements
+	var semi bool
+
+	for {
+		if tok, _, _ := p.scanIgnoreWhitespace(); tok == EOF {
+			return &Query{Statements: statements}, nil
+		} else if !semi && tok == SEMICOLON {
+			semi = true
+		} else {
+			p.unscan()
+			s, err := p.ParseStatement()
+			if err != nil {
+				return nil, err
+			}
+			statements = append(statements, s)
+			semi = false
+		}
+	}
+}
+
+// ParseStatement parses an InfluxQL string and returns a Statement AST object.
+func (p *Parser) ParseStatement() (Statement, error) {
+	// Inspect the first token.
+	tok, pos, lit := p.scanIgnoreWhitespace()
+	switch tok {
+	case SELECT:
+		return p.parseSelectStatement(targetNotRequired)
+	case DELETE:
+		return p.parseDeleteStatement()
+	case SHOW:
+		return p.parseShowStatement()
+	case CREATE:
+		return p.parseCreateStatement()
+	case DROP:
+		return p.parseDropStatement()
+	case GRANT:
+		return p.parseGrantStatement()
+	case REVOKE:
+		return p.parseRevokeStatement()
+	case ALTER:
+		return p.parseAlterStatement()
+	case SET:
+		return p.parseSetPasswordUserStatement()
+	default:
+		return nil, newParseError(tokstr(tok, lit), []string{"SELECT", "DELETE", "SHOW", "CREATE", "DROP", "GRANT", "REVOKE", "ALTER", "SET"}, pos)
+	}
+}
+
+// parseShowStatement parses a string and returns a list statement.
+// This function assumes the SHOW token has already been consumed.
+func (p *Parser) parseShowStatement() (Statement, error) {
+	tok, pos, lit := p.scanIgnoreWhitespace()
+	switch tok {
+	case CONTINUOUS:
+		return p.parseShowContinuousQueriesStatement()
+	case GRANTS:
+		return p.parseGrantsForUserStatement()
+	case DATABASES:
+		return p.parseShowDatabasesStatement()
+	case SERVERS:
+		return p.parseShowServersStatement()
+	case FIELD:
+		tok, pos, lit := p.scanIgnoreWhitespace()
+		if tok == KEYS {
+			return p.parseShowFieldKeysStatement()
+		}
+		return nil, newParseError(tokstr(tok, lit), []string{"KEYS", "VALUES"}, pos)
+	case MEASUREMENTS:
+		return p.parseShowMeasurementsStatement()
+	case RETENTION:
+		tok, pos, lit := p.scanIgnoreWhitespace()
+		if tok == POLICIES {
+			return p.parseShowRetentionPoliciesStatement()
+		}
+		return nil, newParseError(tokstr(tok, lit), []string{"POLICIES"}, pos)
+	case SERIES:
+		return p.parseShowSeriesStatement()
+	case STATS:
+		return p.parseShowStatsStatement()
+	case DIAGNOSTICS:
+		return p.parseShowDiagnosticsStatement()
+	case TAG:
+		tok, pos, lit := p.scanIgnoreWhitespace()
+		if tok == KEYS {
+			return p.parseShowTagKeysStatement()
+		} else if tok == VALUES {
+			return p.parseShowTagValuesStatement()
+		}
+		return nil, newParseError(tokstr(tok, lit), []string{"KEYS", "VALUES"}, pos)
+	case USERS:
+		return p.parseShowUsersStatement()
+	}
+
+	return nil, newParseError(tokstr(tok, lit), []string{"CONTINUOUS", "DATABASES", "FIELD", "GRANTS", "MEASUREMENTS", "RETENTION", "SERIES", "SERVERS", "TAG", "USERS"}, pos)
+}
+
+// parseCreateStatement parses a string and returns a create statement.
+// This function assumes the CREATE token has already been consumed.
+func (p *Parser) parseCreateStatement() (Statement, error) {
+	tok, pos, lit := p.scanIgnoreWhitespace()
+	if tok == CONTINUOUS {
+		return p.parseCreateContinuousQueryStatement()
+	} else if tok == DATABASE {
+		return p.parseCreateDatabaseStatement()
+	} else if tok == USER {
+		return p.parseCreateUserStatement()
+	} else if tok == RETENTION {
+		tok, pos, lit = p.scanIgnoreWhitespace()
+		if tok != POLICY {
+			return nil, newParseError(tokstr(tok, lit), []string{"POLICY"}, pos)
+		}
+		return p.parseCreateRetentionPolicyStatement()
+	}
+
+	return nil, newParseError(tokstr(tok, lit), []string{"CONTINUOUS", "DATABASE", "USER", "RETENTION"}, pos)
+}
+
+// parseDropStatement parses a string and returns a drop statement.
+// This function assumes the DROP token has already been consumed.
+func (p *Parser) parseDropStatement() (Statement, error) {
+	tok, pos, lit := p.scanIgnoreWhitespace()
+	if tok == SERIES {
+		return p.parseDropSeriesStatement()
+	} else if tok == MEASUREMENT {
+		return p.parseDropMeasurementStatement()
+	} else if tok == CONTINUOUS {
+		return p.parseDropContinuousQueryStatement()
+	} else if tok == DATABASE {
+		return p.parseDropDatabaseStatement()
+	} else if tok == RETENTION {
+		if tok, pos, lit := p.scanIgnoreWhitespace(); tok != POLICY {
+			return nil, newParseError(tokstr(tok, lit), []string{"POLICY"}, pos)
+		}
+		return p.parseDropRetentionPolicyStatement()
+	} else if tok == USER {
+		return p.parseDropUserStatement()
+	}
+
+	return nil, newParseError(tokstr(tok, lit), []string{"SERIES", "CONTINUOUS", "MEASUREMENT"}, pos)
+}
+
+// parseAlterStatement parses a string and returns an alter statement.
+// This function assumes the ALTER token has already been consumed.
+func (p *Parser) parseAlterStatement() (Statement, error) {
+	tok, pos, lit := p.scanIgnoreWhitespace()
+	if tok == RETENTION {
+		if tok, pos, lit = p.scanIgnoreWhitespace(); tok != POLICY {
+			return nil, newParseError(tokstr(tok, lit), []string{"POLICY"}, pos)
+		}
+		return p.parseAlterRetentionPolicyStatement()
+	}
+
+	return nil, newParseError(tokstr(tok, lit), []string{"RETENTION"}, pos)
+}
+
+// parseSetPasswordUserStatement parses a string and returns a set statement.
+// This function assumes the SET token has already been consumed.
+func (p *Parser) parseSetPasswordUserStatement() (*SetPasswordUserStatement, error) {
+	stmt := &SetPasswordUserStatement{}
+
+	// Consume the required PASSWORD FOR tokens.
+	if err := p.parseTokens([]Token{PASSWORD, FOR}); err != nil {
+		return nil, err
+	}
+
+	// Parse username
+	ident, err := p.parseIdent()
+
+	if err != nil {
+		return nil, err
+	}
+	stmt.Name = ident
+
+	// Consume the required = token.
+	if tok, pos, lit := p.scanIgnoreWhitespace(); tok != EQ {
+		return nil, newParseError(tokstr(tok, lit), []string{"="}, pos)
+	}
+
+	// Parse new user's password
+	if ident, err = p.parseString(); err != nil {
+		return nil, err
+	}
+	stmt.Password = ident
+
+	return stmt, nil
+}
+
+// parseCreateRetentionPolicyStatement parses a string and returns a create retention policy statement.
+// This function assumes the CREATE RETENTION POLICY tokens have already been consumed.
+func (p *Parser) parseCreateRetentionPolicyStatement() (*CreateRetentionPolicyStatement, error) {
+	stmt := &CreateRetentionPolicyStatement{}
+
+	// Parse the retention policy name.
+	ident, err := p.parseIdent()
+	if err != nil {
+		return nil, err
+	}
+	stmt.Name = ident
+
+	// Consume the required ON token.
+	if tok, pos, lit := p.scanIgnoreWhitespace(); tok != ON {
+		return nil, newParseError(tokstr(tok, lit), []string{"ON"}, pos)
+	}
+
+	// Parse the database name.
+	ident, err = p.parseIdent()
+	if err != nil {
+		return nil, err
+	}
+	stmt.Database = ident
+
+	// Parse required DURATION token.
+	tok, pos, lit := p.scanIgnoreWhitespace()
+	if tok != DURATION {
+		return nil, newParseError(tokstr(tok, lit), []string{"DURATION"}, pos)
+	}
+
+	// Parse duration value
+	d, err := p.parseDuration()
+	if err != nil {
+		return nil, err
+	}
+	stmt.Duration = d
+
+	// Parse required REPLICATION token.
+	if tok, pos, lit = p.scanIgnoreWhitespace(); tok != REPLICATION {
+		return nil, newParseError(tokstr(tok, lit), []string{"REPLICATION"}, pos)
+	}
+
+	// Parse replication value.
+	n, err := p.parseInt(1, math.MaxInt32)
+	if err != nil {
+		return nil, err
+	}
+	stmt.Replication = n
+
+	// Parse optional DEFAULT token.
+	if tok, pos, lit = p.scanIgnoreWhitespace(); tok == DEFAULT {
+		stmt.Default = true
+	} else {
+		p.unscan()
+	}
+
+	return stmt, nil
+}
+
+// parseAlterRetentionPolicyStatement parses a string and returns an alter retention policy statement.
+// This function assumes the ALTER RETENTION POLICY tokens have already been consumed.
+func (p *Parser) parseAlterRetentionPolicyStatement() (*AlterRetentionPolicyStatement, error) {
+	stmt := &AlterRetentionPolicyStatement{}
+
+	// Parse the retention policy name.
+	tok, pos, lit := p.scanIgnoreWhitespace()
+	if tok == DEFAULT {
+		stmt.Name = "default"
+	} else if tok == IDENT {
+		stmt.Name = lit
+	} else {
+		return nil, newParseError(tokstr(tok, lit), []string{"identifier"}, pos)
+	}
+
+	// Consume the required ON token.
+	if tok, pos, lit = p.scanIgnoreWhitespace(); tok != ON {
+		return nil, newParseError(tokstr(tok, lit), []string{"ON"}, pos)
+	}
+
+	// Parse the database name.
+	ident, err := p.parseIdent()
+	if err != nil {
+		return nil, err
+	}
+	stmt.Database = ident
+
+	// Loop through option tokens (DURATION, REPLICATION, DEFAULT, etc.).
+	maxNumOptions := 3
+Loop:
+	for i := 0; i < maxNumOptions; i++ {
+		tok, pos, lit := p.scanIgnoreWhitespace()
+		switch tok {
+		case DURATION:
+			d, err := p.parseDuration()
+			if err != nil {
+				return nil, err
+			}
+			stmt.Duration = &d
+		case REPLICATION:
+			n, err := p.parseInt(1, math.MaxInt32)
+			if err != nil {
+				return nil, err
+			}
+			stmt.Replication = &n
+		case DEFAULT:
+			stmt.Default = true
+		default:
+			if i < 1 {
+				return nil, newParseError(tokstr(tok, lit), []string{"DURATION", "RETENTION", "DEFAULT"}, pos)
+			}
+			p.unscan()
+			break Loop
+		}
+	}
+
+	return stmt, nil
+}
+
+// parseInt parses a string and returns an integer literal.
+func (p *Parser) parseInt(min, max int) (int, error) {
+	tok, pos, lit := p.scanIgnoreWhitespace()
+	if tok != NUMBER {
+		return 0, newParseError(tokstr(tok, lit), []string{"number"}, pos)
+	}
+
+	// Return an error if the number has a fractional part.
+	if strings.Contains(lit, ".") {
+		return 0, &ParseError{Message: "number must be an integer", Pos: pos}
+	}
+
+	// Convert string to int.
+	n, err := strconv.Atoi(lit)
+	if err != nil {
+		return 0, &ParseError{Message: err.Error(), Pos: pos}
+	} else if min > n || n > max {
+		return 0, &ParseError{
+			Message: fmt.Sprintf("invalid value %d: must be %d <= n <= %d", n, min, max),
+			Pos:     pos,
+		}
+	}
+
+	return n, nil
+}
+
+// parseUInt32 parses a string and returns a 32-bit unsigned integer literal.
+func (p *Parser) parseUInt32() (uint32, error) {
+	tok, pos, lit := p.scanIgnoreWhitespace()
+	if tok != NUMBER {
+		return 0, newParseError(tokstr(tok, lit), []string{"number"}, pos)
+	}
+
+	// Convert string to unsigned 32-bit integer
+	n, err := strconv.ParseUint(lit, 10, 32)
+	if err != nil {
+		return 0, &ParseError{Message: err.Error(), Pos: pos}
+	}
+
+	return uint32(n), nil
+}
+
+// parseUInt64 parses a string and returns a 64-bit unsigned integer literal.
+func (p *Parser) parseUInt64() (uint64, error) {
+	tok, pos, lit := p.scanIgnoreWhitespace()
+	if tok != NUMBER {
+		return 0, newParseError(tokstr(tok, lit), []string{"number"}, pos)
+	}
+
+	// Convert string to unsigned 64-bit integer
+	n, err := strconv.ParseUint(lit, 10, 64)
+	if err != nil {
+		return 0, &ParseError{Message: err.Error(), Pos: pos}
+	}
+
+	return uint64(n), nil
+}
+
+// parseDuration parses a string and returns a duration literal.
+// This function assumes the DURATION token has already been consumed.
+func (p *Parser) parseDuration() (time.Duration, error) {
+	tok, pos, lit := p.scanIgnoreWhitespace()
+	if tok != DURATION_VAL && tok != INF {
+		return 0, newParseError(tokstr(tok, lit), []string{"duration"}, pos)
+	}
+
+	if tok == INF {
+		return 0, nil
+	}
+
+	d, err := ParseDuration(lit)
+	if err != nil {
+		return 0, &ParseError{Message: err.Error(), Pos: pos}
+	}
+
+	return d, nil
+}
+
+// parseIdent parses an identifier.
+func (p *Parser) parseIdent() (string, error) {
+	tok, pos, lit := p.scanIgnoreWhitespace()
+	if tok != IDENT {
+		return "", newParseError(tokstr(tok, lit), []string{"identifier"}, pos)
+	}
+	return lit, nil
+}
+
+// parseIdentList parses a comma delimited list of identifiers.
+func (p *Parser) parseIdentList() ([]string, error) {
+	// Parse first (required) identifier.
+	ident, err := p.parseIdent()
+	if err != nil {
+		return nil, err
+	}
+	idents := []string{ident}
+
+	// Parse remaining (optional) identifiers.
+	for {
+		if tok, _, _ := p.scanIgnoreWhitespace(); tok != COMMA {
+			p.unscan()
+			return idents, nil
+		}
+
+		if ident, err = p.parseIdent(); err != nil {
+			return nil, err
+		}
+
+		idents = append(idents, ident)
+	}
+}
+
+// parseSegmentedIdents parses a segmented identifiers.
+// e.g.,  "db"."rp".measurement  or  "db"..measurement
+func (p *Parser) parseSegmentedIdents() ([]string, error) {
+	ident, err := p.parseIdent()
+	if err != nil {
+		return nil, err
+	}
+	idents := []string{ident}
+
+	// Parse remaining (optional) identifiers.
+	for {
+		if tok, _, _ := p.scan(); tok != DOT {
+			// No more segments so we're done.
+			p.unscan()
+			break
+		}
+
+		if ch := p.peekRune(); ch == '/' {
+			// Next segment is a regex so we're done.
+			break
+		} else if ch == '.' {
+			// Add an empty identifier.
+			idents = append(idents, "")
+			continue
+		}
+
+		// Parse the next identifier.
+		if ident, err = p.parseIdent(); err != nil {
+			return nil, err
+		}
+
+		idents = append(idents, ident)
+	}
+
+	if len(idents) > 3 {
+		msg := fmt.Sprintf("too many segments in %s", QuoteIdent(idents...))
+		return nil, &ParseError{Message: msg}
+	}
+
+	return idents, nil
+}
+
+// parserString parses a string.
+func (p *Parser) parseString() (string, error) {
+	tok, pos, lit := p.scanIgnoreWhitespace()
+	if tok != STRING {
+		return "", newParseError(tokstr(tok, lit), []string{"string"}, pos)
+	}
+	return lit, nil
+}
+
+// parseRevokeStatement parses a string and returns a revoke statement.
+// This function assumes the REVOKE token has already been consumed.
+func (p *Parser) parseRevokeStatement() (Statement, error) {
+	// Parse the privilege to be revoked.
+	priv, err := p.parsePrivilege()
+	if err != nil {
+		return nil, err
+	}
+
+	// Check for ON or FROM clauses.
+	tok, pos, lit := p.scanIgnoreWhitespace()
+	if tok == ON {
+		stmt, err := p.parseRevokeOnStatement()
+		if err != nil {
+			return nil, err
+		}
+		stmt.Privilege = priv
+		return stmt, nil
+	} else if tok == FROM {
+		// Admin privilege is only revoked on ALL PRIVILEGES.
+		if priv != AllPrivileges {
+			return nil, newParseError(tokstr(tok, lit), []string{"ON"}, pos)
+		}
+		return p.parseRevokeAdminStatement()
+	}
+
+	// Only ON or FROM clauses are allowed after privilege.
+	if priv == AllPrivileges {
+		return nil, newParseError(tokstr(tok, lit), []string{"ON", "FROM"}, pos)
+	}
+	return nil, newParseError(tokstr(tok, lit), []string{"ON"}, pos)
+}
+
+// parseRevokeOnStatement parses a string and returns a revoke statement.
+// This function assumes the [PRIVILEGE] ON tokens have already been consumed.
+func (p *Parser) parseRevokeOnStatement() (*RevokeStatement, error) {
+	stmt := &RevokeStatement{}
+
+	// Parse the name of the database.
+	lit, err := p.parseIdent()
+	if err != nil {
+		return nil, err
+	}
+	stmt.On = lit
+
+	// Parse FROM clause.
+	tok, pos, lit := p.scanIgnoreWhitespace()
+
+	// Check for required FROM token.
+	if tok != FROM {
+		return nil, newParseError(tokstr(tok, lit), []string{"FROM"}, pos)
+	}
+
+	// Parse the name of the user.
+	lit, err = p.parseIdent()
+	if err != nil {
+		return nil, err
+	}
+	stmt.User = lit
+
+	return stmt, nil
+}
+
+// parseRevokeAdminStatement parses a string and returns a revoke admin statement.
+// This function assumes the ALL [PRVILEGES] FROM token has already been consumed.
+func (p *Parser) parseRevokeAdminStatement() (*RevokeAdminStatement, error) {
+	// Admin privilege is always false when revoke admin clause is called.
+	stmt := &RevokeAdminStatement{}
+
+	// Parse the name of the user.
+	lit, err := p.parseIdent()
+	if err != nil {
+		return nil, err
+	}
+	stmt.User = lit
+
+	return stmt, nil
+}
+
+// parseGrantStatement parses a string and returns a grant statement.
+// This function assumes the GRANT token has already been consumed.
+func (p *Parser) parseGrantStatement() (Statement, error) {
+	// Parse the privilege to be granted.
+	priv, err := p.parsePrivilege()
+	if err != nil {
+		return nil, err
+	}
+
+	// Check for ON or TO clauses.
+	tok, pos, lit := p.scanIgnoreWhitespace()
+	if tok == ON {
+		stmt, err := p.parseGrantOnStatement()
+		if err != nil {
+			return nil, err
+		}
+		stmt.Privilege = priv
+		return stmt, nil
+	} else if tok == TO {
+		// Admin privilege is only granted on ALL PRIVILEGES.
+		if priv != AllPrivileges {
+			return nil, newParseError(tokstr(tok, lit), []string{"ON"}, pos)
+		}
+		return p.parseGrantAdminStatement()
+	}
+
+	// Only ON or TO clauses are allowed after privilege.
+	if priv == AllPrivileges {
+		return nil, newParseError(tokstr(tok, lit), []string{"ON", "TO"}, pos)
+	}
+	return nil, newParseError(tokstr(tok, lit), []string{"ON"}, pos)
+}
+
+// parseGrantOnStatement parses a string and returns a grant statement.
+// This function assumes the [PRIVILEGE] ON tokens have already been consumed.
+func (p *Parser) parseGrantOnStatement() (*GrantStatement, error) {
+	stmt := &GrantStatement{}
+
+	// Parse the name of the database.
+	lit, err := p.parseIdent()
+	if err != nil {
+		return nil, err
+	}
+	stmt.On = lit
+
+	// Parse TO clause.
+	tok, pos, lit := p.scanIgnoreWhitespace()
+
+	// Check for required TO token.
+	if tok != TO {
+		return nil, newParseError(tokstr(tok, lit), []string{"TO"}, pos)
+	}
+
+	// Parse the name of the user.
+	lit, err = p.parseIdent()
+	if err != nil {
+		return nil, err
+	}
+	stmt.User = lit
+
+	return stmt, nil
+}
+
+// parseGrantAdminStatement parses a string and returns a grant admin statement.
+// This function assumes the ALL [PRVILEGES] TO tokens have already been consumed.
+func (p *Parser) parseGrantAdminStatement() (*GrantAdminStatement, error) {
+	// Admin privilege is always true when grant admin clause is called.
+	stmt := &GrantAdminStatement{}
+
+	// Parse the name of the user.
+	lit, err := p.parseIdent()
+	if err != nil {
+		return nil, err
+	}
+	stmt.User = lit
+
+	return stmt, nil
+}
+
+// parsePrivilege parses a string and returns a Privilege
+func (p *Parser) parsePrivilege() (Privilege, error) {
+	tok, pos, lit := p.scanIgnoreWhitespace()
+	switch tok {
+	case READ:
+		return ReadPrivilege, nil
+	case WRITE:
+		return WritePrivilege, nil
+	case ALL:
+		// Consume optional PRIVILEGES token
+		tok, pos, lit = p.scanIgnoreWhitespace()
+		if tok != PRIVILEGES {
+			p.unscan()
+		}
+		return AllPrivileges, nil
+	}
+	return 0, newParseError(tokstr(tok, lit), []string{"READ", "WRITE", "ALL [PRIVILEGES]"}, pos)
+}
+
+// parseSelectStatement parses a select string and returns a Statement AST object.
+// This function assumes the SELECT token has already been consumed.
+func (p *Parser) parseSelectStatement(tr targetRequirement) (*SelectStatement, error) {
+	stmt := &SelectStatement{}
+	var err error
+
+	// Parse fields: "FIELD+".
+	if stmt.Fields, err = p.parseFields(); err != nil {
+		return nil, err
+	}
+
+	// Parse target: "INTO"
+	if stmt.Target, err = p.parseTarget(tr); err != nil {
+		return nil, err
+	}
+
+	// Parse source: "FROM".
+	if tok, pos, lit := p.scanIgnoreWhitespace(); tok != FROM {
+		return nil, newParseError(tokstr(tok, lit), []string{"FROM"}, pos)
+	}
+	if stmt.Sources, err = p.parseSources(); err != nil {
+		return nil, err
+	}
+
+	// Parse condition: "WHERE EXPR".
+	if stmt.Condition, err = p.parseCondition(); err != nil {
+		return nil, err
+	}
+
+	// Parse dimensions: "GROUP BY DIMENSION+".
+	if stmt.Dimensions, err = p.parseDimensions(); err != nil {
+		return nil, err
+	}
+
+	// Parse fill options: "fill(<option>)"
+	if stmt.Fill, stmt.FillValue, err = p.parseFill(); err != nil {
+		return nil, err
+	}
+
+	// Parse sort: "ORDER BY FIELD+".
+	if stmt.SortFields, err = p.parseOrderBy(); err != nil {
+		return nil, err
+	}
+
+	// Parse limit: "LIMIT <n>".
+	if stmt.Limit, err = p.parseOptionalTokenAndInt(LIMIT); err != nil {
+		return nil, err
+	}
+
+	// Parse offset: "OFFSET <n>".
+	if stmt.Offset, err = p.parseOptionalTokenAndInt(OFFSET); err != nil {
+		return nil, err
+	}
+
+	// Parse series limit: "SLIMIT <n>".
+	if stmt.SLimit, err = p.parseOptionalTokenAndInt(SLIMIT); err != nil {
+		return nil, err
+	}
+
+	// Parse series offset: "SOFFSET <n>".
+	if stmt.SOffset, err = p.parseOptionalTokenAndInt(SOFFSET); err != nil {
+		return nil, err
+	}
+
+	// Set if the query is a raw data query or one with an aggregate
+	stmt.IsRawQuery = true
+	WalkFunc(stmt.Fields, func(n Node) {
+		if _, ok := n.(*Call); ok {
+			stmt.IsRawQuery = false
+		}
+	})
+
+	if err := stmt.validate(tr); err != nil {
+		return nil, err
+	}
+
+	return stmt, nil
+}
+
+// targetRequirement specifies whether or not a target clause is required.
+type targetRequirement int
+
+const (
+	targetRequired targetRequirement = iota
+	targetNotRequired
+)
+
+// parseTarget parses a string and returns a Target.
+func (p *Parser) parseTarget(tr targetRequirement) (*Target, error) {
+	if tok, pos, lit := p.scanIgnoreWhitespace(); tok != INTO {
+		if tr == targetRequired {
+			return nil, newParseError(tokstr(tok, lit), []string{"INTO"}, pos)
+		}
+		p.unscan()
+		return nil, nil
+	}
+
+	// db, rp, and / or measurement
+	idents, err := p.parseSegmentedIdents()
+	if err != nil {
+		return nil, err
+	}
+
+	t := &Target{Measurement: &Measurement{}}
+
+	switch len(idents) {
+	case 1:
+		t.Measurement.Name = idents[0]
+	case 2:
+		t.Measurement.RetentionPolicy = idents[0]
+		t.Measurement.Name = idents[1]
+	case 3:
+		t.Measurement.Database = idents[0]
+		t.Measurement.RetentionPolicy = idents[1]
+		t.Measurement.Name = idents[2]
+	}
+
+	return t, nil
+}
+
+// parseDeleteStatement parses a delete string and returns a DeleteStatement.
+// This function assumes the DELETE token has already been consumed.
+func (p *Parser) parseDeleteStatement() (*DeleteStatement, error) {
+	stmt := &DeleteStatement{}
+
+	// Parse source
+	if tok, pos, lit := p.scanIgnoreWhitespace(); tok != FROM {
+		return nil, newParseError(tokstr(tok, lit), []string{"FROM"}, pos)
+	}
+	source, err := p.parseSource()
+	if err != nil {
+		return nil, err
+	}
+	stmt.Source = source
+
+	// Parse condition: "WHERE EXPR".
+	condition, err := p.parseCondition()
+	if err != nil {
+		return nil, err
+	}
+	stmt.Condition = condition
+
+	return stmt, nil
+}
+
+// parseShowSeriesStatement parses a string and returns a ShowSeriesStatement.
+// This function assumes the "SHOW SERIES" tokens have already been consumed.
+func (p *Parser) parseShowSeriesStatement() (*ShowSeriesStatement, error) {
+	stmt := &ShowSeriesStatement{}
+	var err error
+
+	// Parse optional FROM.
+	if tok, _, _ := p.scanIgnoreWhitespace(); tok == FROM {
+		if stmt.Sources, err = p.parseSources(); err != nil {
+			return nil, err
+		}
+	} else {
+		p.unscan()
+	}
+
+	// Parse condition: "WHERE EXPR".
+	if stmt.Condition, err = p.parseCondition(); err != nil {
+		return nil, err
+	}
+
+	// Parse sort: "ORDER BY FIELD+".
+	if stmt.SortFields, err = p.parseOrderBy(); err != nil {
+		return nil, err
+	}
+
+	// Parse limit: "LIMIT <n>".
+	if stmt.Limit, err = p.parseOptionalTokenAndInt(LIMIT); err != nil {
+		return nil, err
+	}
+
+	// Parse offset: "OFFSET <n>".
+	if stmt.Offset, err = p.parseOptionalTokenAndInt(OFFSET); err != nil {
+		return nil, err
+	}
+
+	return stmt, nil
+}
+
+// parseShowMeasurementsStatement parses a string and returns a ShowSeriesStatement.
+// This function assumes the "SHOW MEASUREMENTS" tokens have already been consumed.
+func (p *Parser) parseShowMeasurementsStatement() (*ShowMeasurementsStatement, error) {
+	stmt := &ShowMeasurementsStatement{}
+	var err error
+
+	// Parse condition: "WHERE EXPR".
+	if stmt.Condition, err = p.parseCondition(); err != nil {
+		return nil, err
+	}
+
+	// Parse sort: "ORDER BY FIELD+".
+	if stmt.SortFields, err = p.parseOrderBy(); err != nil {
+		return nil, err
+	}
+
+	// Parse limit: "LIMIT <n>".
+	if stmt.Limit, err = p.parseOptionalTokenAndInt(LIMIT); err != nil {
+		return nil, err
+	}
+
+	// Parse offset: "OFFSET <n>".
+	if stmt.Offset, err = p.parseOptionalTokenAndInt(OFFSET); err != nil {
+		return nil, err
+	}
+
+	return stmt, nil
+}
+
+// parseShowRetentionPoliciesStatement parses a string and returns a ShowRetentionPoliciesStatement.
+// This function assumes the "SHOW RETENTION POLICIES" tokens have been consumed.
+func (p *Parser) parseShowRetentionPoliciesStatement() (*ShowRetentionPoliciesStatement, error) {
+	stmt := &ShowRetentionPoliciesStatement{}
+
+	// Expect an "ON" keyword.
+	if tok, pos, lit := p.scanIgnoreWhitespace(); tok != ON {
+		return nil, newParseError(tokstr(tok, lit), []string{"ON"}, pos)
+	}
+
+	// Parse the database.
+	ident, err := p.parseIdent()
+	if err != nil {
+		return nil, err
+	}
+	stmt.Database = ident
+
+	return stmt, nil
+}
+
+// parseShowTagKeysStatement parses a string and returns a ShowSeriesStatement.
+// This function assumes the "SHOW TAG KEYS" tokens have already been consumed.
+func (p *Parser) parseShowTagKeysStatement() (*ShowTagKeysStatement, error) {
+	stmt := &ShowTagKeysStatement{}
+	var err error
+
+	// Parse optional source.
+	if tok, _, _ := p.scanIgnoreWhitespace(); tok == FROM {
+		if stmt.Sources, err = p.parseSources(); err != nil {
+			return nil, err
+		}
+	} else {
+		p.unscan()
+	}
+
+	// Parse condition: "WHERE EXPR".
+	if stmt.Condition, err = p.parseCondition(); err != nil {
+		return nil, err
+	}
+
+	// Parse sort: "ORDER BY FIELD+".
+	if stmt.SortFields, err = p.parseOrderBy(); err != nil {
+		return nil, err
+	}
+
+	// Parse limit: "LIMIT <n>".
+	if stmt.Limit, err = p.parseOptionalTokenAndInt(LIMIT); err != nil {
+		return nil, err
+	}
+
+	// Parse offset: "OFFSET <n>".
+	if stmt.Offset, err = p.parseOptionalTokenAndInt(OFFSET); err != nil {
+		return nil, err
+	}
+
+	return stmt, nil
+}
+
+// parseShowTagValuesStatement parses a string and returns a ShowSeriesStatement.
+// This function assumes the "SHOW TAG VALUES" tokens have already been consumed.
+func (p *Parser) parseShowTagValuesStatement() (*ShowTagValuesStatement, error) {
+	stmt := &ShowTagValuesStatement{}
+	var err error
+
+	// Parse optional source.
+	if tok, _, _ := p.scanIgnoreWhitespace(); tok == FROM {
+		if stmt.Sources, err = p.parseSources(); err != nil {
+			return nil, err
+		}
+	} else {
+		p.unscan()
+	}
+
+	// Parse required WITH KEY.
+	if stmt.TagKeys, err = p.parseTagKeys(); err != nil {
+		return nil, err
+	}
+
+	// Parse condition: "WHERE EXPR".
+	if stmt.Condition, err = p.parseCondition(); err != nil {
+		return nil, err
+	}
+
+	// Parse sort: "ORDER BY FIELD+".
+	if stmt.SortFields, err = p.parseOrderBy(); err != nil {
+		return nil, err
+	}
+
+	// Parse limit: "LIMIT <n>".
+	if stmt.Limit, err = p.parseOptionalTokenAndInt(LIMIT); err != nil {
+		return nil, err
+	}
+
+	// Parse offset: "OFFSET <n>".
+	if stmt.Offset, err = p.parseOptionalTokenAndInt(OFFSET); err != nil {
+		return nil, err
+	}
+
+	return stmt, nil
+}
+
+// parseTagKeys parses a string and returns a list of tag keys.
+func (p *Parser) parseTagKeys() ([]string, error) {
+	var err error
+
+	// Parse required WITH KEY tokens.
+	if err := p.parseTokens([]Token{WITH, KEY}); err != nil {
+		return nil, err
+	}
+
+	var tagKeys []string
+
+	// Parse required IN or EQ token.
+	if tok, pos, lit := p.scanIgnoreWhitespace(); tok == IN {
+		// Parse required ( token.
+		if tok, pos, lit = p.scanIgnoreWhitespace(); tok != LPAREN {
+			return nil, newParseError(tokstr(tok, lit), []string{"("}, pos)
+		}
+
+		// Parse tag key list.
+		if tagKeys, err = p.parseIdentList(); err != nil {
+			return nil, err
+		}
+
+		// Parse required ) token.
+		if tok, pos, lit = p.scanIgnoreWhitespace(); tok != RPAREN {
+			return nil, newParseError(tokstr(tok, lit), []string{"("}, pos)
+		}
+	} else if tok == EQ {
+		// Parse required tag key.
+		ident, err := p.parseIdent()
+		if err != nil {
+			return nil, err
+		}
+		tagKeys = append(tagKeys, ident)
+	} else {
+		return nil, newParseError(tokstr(tok, lit), []string{"IN", "="}, pos)
+	}
+
+	return tagKeys, nil
+}
+
+// parseShowUsersStatement parses a string and returns a ShowUsersStatement.
+// This function assumes the "SHOW USERS" tokens have been consumed.
+func (p *Parser) parseShowUsersStatement() (*ShowUsersStatement, error) {
+	return &ShowUsersStatement{}, nil
+}
+
+// parseShowFieldKeysStatement parses a string and returns a ShowSeriesStatement.
+// This function assumes the "SHOW FIELD KEYS" tokens have already been consumed.
+func (p *Parser) parseShowFieldKeysStatement() (*ShowFieldKeysStatement, error) {
+	stmt := &ShowFieldKeysStatement{}
+	var err error
+
+	// Parse optional source.
+	if tok, _, _ := p.scanIgnoreWhitespace(); tok == FROM {
+		if stmt.Sources, err = p.parseSources(); err != nil {
+			return nil, err
+		}
+	} else {
+		p.unscan()
+	}
+
+	// Parse sort: "ORDER BY FIELD+".
+	if stmt.SortFields, err = p.parseOrderBy(); err != nil {
+		return nil, err
+	}
+
+	// Parse limit: "LIMIT <n>".
+	if stmt.Limit, err = p.parseOptionalTokenAndInt(LIMIT); err != nil {
+		return nil, err
+	}
+
+	// Parse offset: "OFFSET <n>".
+	if stmt.Offset, err = p.parseOptionalTokenAndInt(OFFSET); err != nil {
+		return nil, err
+	}
+
+	return stmt, nil
+}
+
+// parseDropMeasurementStatement parses a string and returns a DropMeasurementStatement.
+// This function assumes the "DROP MEASUREMENT" tokens have already been consumed.
+func (p *Parser) parseDropMeasurementStatement() (*DropMeasurementStatement, error) {
+	stmt := &DropMeasurementStatement{}
+
+	// Parse the name of the measurement to be dropped.
+	lit, err := p.parseIdent()
+	if err != nil {
+		return nil, err
+	}
+	stmt.Name = lit
+
+	return stmt, nil
+}
+
+// parseDropSeriesStatement parses a string and returns a DropSeriesStatement.
+// This function assumes the "DROP SERIES" tokens have already been consumed.
+func (p *Parser) parseDropSeriesStatement() (*DropSeriesStatement, error) {
+	stmt := &DropSeriesStatement{}
+	var err error
+
+	tok, pos, lit := p.scanIgnoreWhitespace()
+
+	if tok == FROM {
+		// Parse source.
+		if stmt.Sources, err = p.parseSources(); err != nil {
+			return nil, err
+		}
+	} else {
+		p.unscan()
+	}
+
+	// Parse condition: "WHERE EXPR".
+	if stmt.Condition, err = p.parseCondition(); err != nil {
+		return nil, err
+	}
+
+	// If they didn't provide a FROM or a WHERE, this query is invalid
+	if stmt.Condition == nil && stmt.Sources == nil {
+		return nil, newParseError(tokstr(tok, lit), []string{"FROM", "WHERE"}, pos)
+	}
+
+	return stmt, nil
+}
+
+// parseShowContinuousQueriesStatement parses a string and returns a ShowContinuousQueriesStatement.
+// This function assumes the "SHOW CONTINUOUS" tokens have already been consumed.
+func (p *Parser) parseShowContinuousQueriesStatement() (*ShowContinuousQueriesStatement, error) {
+	stmt := &ShowContinuousQueriesStatement{}
+
+	// Expect a "QUERIES" token.
+	if tok, pos, lit := p.scanIgnoreWhitespace(); tok != QUERIES {
+		return nil, newParseError(tokstr(tok, lit), []string{"QUERIES"}, pos)
+	}
+
+	return stmt, nil
+}
+
+// parseShowServersStatement parses a string and returns a ShowServersStatement.
+// This function assumes the "SHOW SERVERS" tokens have already been consumed.
+func (p *Parser) parseShowServersStatement() (*ShowServersStatement, error) {
+	stmt := &ShowServersStatement{}
+	return stmt, nil
+}
+
+// parseGrantsForUserStatement parses a string and returns a ShowGrantsForUserStatement.
+// This function assumes the "SHOW GRANTS" tokens have already been consumed.
+func (p *Parser) parseGrantsForUserStatement() (*ShowGrantsForUserStatement, error) {
+	stmt := &ShowGrantsForUserStatement{}
+
+	// Expect a "FOR" token.
+	if tok, pos, lit := p.scanIgnoreWhitespace(); tok != FOR {
+		return nil, newParseError(tokstr(tok, lit), []string{"FOR"}, pos)
+	}
+
+	// Parse the name of the user to be displayed.
+	lit, err := p.parseIdent()
+	if err != nil {
+		return nil, err
+	}
+	stmt.Name = lit
+
+	return stmt, nil
+}
+
+// parseShowDatabasesStatement parses a string and returns a ShowDatabasesStatement.
+// This function assumes the "SHOW DATABASE" tokens have already been consumed.
+func (p *Parser) parseShowDatabasesStatement() (*ShowDatabasesStatement, error) {
+	stmt := &ShowDatabasesStatement{}
+	return stmt, nil
+}
+
+// parseCreateContinuousQueriesStatement parses a string and returns a CreateContinuousQueryStatement.
+// This function assumes the "CREATE CONTINUOUS" tokens have already been consumed.
+func (p *Parser) parseCreateContinuousQueryStatement() (*CreateContinuousQueryStatement, error) {
+	stmt := &CreateContinuousQueryStatement{}
+
+	// Expect a "QUERY" token.
+	if tok, pos, lit := p.scanIgnoreWhitespace(); tok != QUERY {
+		return nil, newParseError(tokstr(tok, lit), []string{"QUERY"}, pos)
+	}
+
+	// Read the id of the query to create.
+	ident, err := p.parseIdent()
+	if err != nil {
+		return nil, err
+	}
+	stmt.Name = ident
+
+	// Expect an "ON" keyword.
+	if tok, pos, lit := p.scanIgnoreWhitespace(); tok != ON {
+		return nil, newParseError(tokstr(tok, lit), []string{"ON"}, pos)
+	}
+
+	// Read the name of the database to create the query on.
+	if ident, err = p.parseIdent(); err != nil {
+		return nil, err
+	}
+	stmt.Database = ident
+
+	// Expect a "BEGIN SELECT" tokens.
+	if err := p.parseTokens([]Token{BEGIN, SELECT}); err != nil {
+		return nil, err
+	}
+
+	// Read the select statement to be used as the source.
+	source, err := p.parseSelectStatement(targetRequired)
+	if err != nil {
+		return nil, err
+	}
+	stmt.Source = source
+
+	// validate that the statement has a non-zero group by interval if it is aggregated
+	if !source.IsRawQuery {
+		d, err := source.GroupByInterval()
+		if d == 0 || err != nil {
+			// rewind so we can output an error with some info
+			p.unscan() // unscan the whitespace
+			p.unscan() // unscan the last token
+			tok, pos, lit := p.scanIgnoreWhitespace()
+			expected := []string{"GROUP BY time(...)"}
+			if err != nil {
+				expected = append(expected, err.Error())
+			}
+			return nil, newParseError(tokstr(tok, lit), expected, pos)
+		}
+	}
+
+	// Expect a "END" keyword.
+	if tok, pos, lit := p.scanIgnoreWhitespace(); tok != END {
+		return nil, newParseError(tokstr(tok, lit), []string{"END"}, pos)
+	}
+
+	return stmt, nil
+}
+
+// parseCreateDatabaseStatement parses a string and returns a CreateDatabaseStatement.
+// This function assumes the "CREATE DATABASE" tokens have already been consumed.
+func (p *Parser) parseCreateDatabaseStatement() (*CreateDatabaseStatement, error) {
+	stmt := &CreateDatabaseStatement{}
+
+	// Parse the name of the database to be created.
+	lit, err := p.parseIdent()
+	if err != nil {
+		return nil, err
+	}
+	stmt.Name = lit
+
+	return stmt, nil
+}
+
+// parseDropDatabaseStatement parses a string and returns a DropDatabaseStatement.
+// This function assumes the DROP DATABASE tokens have already been consumed.
+func (p *Parser) parseDropDatabaseStatement() (*DropDatabaseStatement, error) {
+	stmt := &DropDatabaseStatement{}
+
+	// Parse the name of the database to be dropped.
+	lit, err := p.parseIdent()
+	if err != nil {
+		return nil, err
+	}
+	stmt.Name = lit
+
+	return stmt, nil
+}
+
+// parseDropRetentionPolicyStatement parses a string and returns a DropRetentionPolicyStatement.
+// This function assumes the DROP RETENTION POLICY tokens have been consumed.
+func (p *Parser) parseDropRetentionPolicyStatement() (*DropRetentionPolicyStatement, error) {
+	stmt := &DropRetentionPolicyStatement{}
+
+	// Parse the policy name.
+	ident, err := p.parseIdent()
+	if err != nil {
+		return nil, err
+	}
+	stmt.Name = ident
+
+	// Consume the required ON token.
+	if tok, pos, lit := p.scanIgnoreWhitespace(); tok != ON {
+		return nil, newParseError(tokstr(tok, lit), []string{"ON"}, pos)
+	}
+
+	// Parse the database name.
+	if stmt.Database, err = p.parseIdent(); err != nil {
+		return nil, err
+	}
+
+	return stmt, nil
+}
+
+// parseCreateUserStatement parses a string and returns a CreateUserStatement.
+// This function assumes the "CREATE USER" tokens have already been consumed.
+func (p *Parser) parseCreateUserStatement() (*CreateUserStatement, error) {
+	stmt := &CreateUserStatement{}
+
+	// Parse name of the user to be created.
+	ident, err := p.parseIdent()
+	if err != nil {
+		return nil, err
+	}
+	stmt.Name = ident
+
+	// Consume "WITH PASSWORD" tokens
+	if err := p.parseTokens([]Token{WITH, PASSWORD}); err != nil {
+		return nil, err
+	}
+
+	// Parse new user's password
+	if ident, err = p.parseString(); err != nil {
+		return nil, err
+	}
+	stmt.Password = ident
+
+	// Check for option WITH clause.
+	if tok, _, _ := p.scanIgnoreWhitespace(); tok != WITH {
+		p.unscan()
+		return stmt, nil
+	}
+
+	// "WITH ALL PRIVILEGES" grants the new user admin privilege.
+	// Only admin privilege can be set on user creation.
+	if err := p.parseTokens([]Token{ALL, PRIVILEGES}); err != nil {
+		return nil, err
+	}
+	stmt.Admin = true
+
+	return stmt, nil
+}
+
+// parseDropUserStatement parses a string and returns a DropUserStatement.
+// This function assumes the DROP USER tokens have already been consumed.
+func (p *Parser) parseDropUserStatement() (*DropUserStatement, error) {
+	stmt := &DropUserStatement{}
+
+	// Parse the name of the user to be dropped.
+	lit, err := p.parseIdent()
+	if err != nil {
+		return nil, err
+	}
+	stmt.Name = lit
+
+	return stmt, nil
+}
+
+// parseRetentionPolicy parses a string and returns a retention policy name.
+// This function assumes the "WITH" token has already been consumed.
+func (p *Parser) parseRetentionPolicy() (name string, dfault bool, err error) {
+	// Check for optional DEFAULT token.
+	tok, pos, lit := p.scanIgnoreWhitespace()
+	if tok == DEFAULT {
+		dfault = true
+		tok, pos, lit = p.scanIgnoreWhitespace()
+	}
+
+	// Check for required RETENTION token.
+	if tok != RETENTION {
+		err = newParseError(tokstr(tok, lit), []string{"RETENTION"}, pos)
+		return
+	}
+
+	// Check of required POLICY token.
+	if tok, pos, lit = p.scanIgnoreWhitespace(); tok != POLICY {
+		err = newParseError(tokstr(tok, lit), []string{"POLICY"}, pos)
+		return
+	}
+
+	// Parse retention policy name.
+	name, err = p.parseIdent()
+	if err != nil {
+		return
+	}
+
+	return
+}
+
+// parseShowStatsStatement parses a string and returns a ShowStatsStatement.
+// This function assumes the "SHOW STATS" tokens have already been consumed.
+func (p *Parser) parseShowStatsStatement() (*ShowStatsStatement, error) {
+	stmt := &ShowStatsStatement{}
+	var err error
+
+	if tok, _, _ := p.scanIgnoreWhitespace(); tok == ON {
+		stmt.Host, err = p.parseString()
+	} else {
+		p.unscan()
+	}
+
+	return stmt, err
+}
+
+// parseShowDiagnostics parses a string and returns a ShowDiagnosticsStatement.
+func (p *Parser) parseShowDiagnosticsStatement() (*ShowDiagnosticsStatement, error) {
+	stmt := &ShowDiagnosticsStatement{}
+	return stmt, nil
+}
+
+// parseDropContinuousQueriesStatement parses a string and returns a DropContinuousQueryStatement.
+// This function assumes the "DROP CONTINUOUS" tokens have already been consumed.
+func (p *Parser) parseDropContinuousQueryStatement() (*DropContinuousQueryStatement, error) {
+	stmt := &DropContinuousQueryStatement{}
+
+	// Expect a "QUERY" token.
+	if tok, pos, lit := p.scanIgnoreWhitespace(); tok != QUERY {
+		return nil, newParseError(tokstr(tok, lit), []string{"QUERY"}, pos)
+	}
+
+	// Read the id of the query to drop.
+	ident, err := p.parseIdent()
+	if err != nil {
+		return nil, err
+	}
+	stmt.Name = ident
+
+	// Expect an "ON" keyword.
+	if tok, pos, lit := p.scanIgnoreWhitespace(); tok != ON {
+		return nil, newParseError(tokstr(tok, lit), []string{"ON"}, pos)
+	}
+
+	// Read the name of the database to remove the query from.
+	if ident, err = p.parseIdent(); err != nil {
+		return nil, err
+	}
+	stmt.Database = ident
+
+	return stmt, nil
+}
+
+// parseFields parses a list of one or more fields.
+func (p *Parser) parseFields() (Fields, error) {
+	var fields Fields
+
+	// Check for "*" (i.e., "all fields")
+	if tok, _, _ := p.scanIgnoreWhitespace(); tok == MUL {
+		fields = append(fields, &Field{&Wildcard{}, ""})
+		return fields, nil
+	}
+	p.unscan()
+
+	for {
+		// Parse the field.
+		f, err := p.parseField()
+		if err != nil {
+			return nil, err
+		}
+
+		// Add new field.
+		fields = append(fields, f)
+
+		// If there's not a comma next then stop parsing fields.
+		if tok, _, _ := p.scan(); tok != COMMA {
+			p.unscan()
+			break
+		}
+	}
+	return fields, nil
+}
+
+// parseField parses a single field.
+func (p *Parser) parseField() (*Field, error) {
+	f := &Field{}
+
+	// Parse the expression first.
+	expr, err := p.ParseExpr()
+	if err != nil {
+		return nil, err
+	}
+	f.Expr = expr
+
+	// Parse the alias if the current and next tokens are "WS AS".
+	alias, err := p.parseAlias()
+	if err != nil {
+		return nil, err
+	}
+	f.Alias = alias
+
+	// Consume all trailing whitespace.
+	p.consumeWhitespace()
+
+	return f, nil
+}
+
+// parseAlias parses the "AS (IDENT|STRING)" alias for fields and dimensions.
+func (p *Parser) parseAlias() (string, error) {
+	// Check if the next token is "AS". If not, then unscan and exit.
+	if tok, _, _ := p.scanIgnoreWhitespace(); tok != AS {
+		p.unscan()
+		return "", nil
+	}
+
+	// Then we should have the alias identifier.
+	lit, err := p.parseIdent()
+	if err != nil {
+		return "", err
+	}
+	return lit, nil
+}
+
+// parseSources parses a comma delimited list of sources.
+func (p *Parser) parseSources() (Sources, error) {
+	var sources Sources
+
+	for {
+		s, err := p.parseSource()
+		if err != nil {
+			return nil, err
+		}
+		sources = append(sources, s)
+
+		if tok, _, _ := p.scanIgnoreWhitespace(); tok != COMMA {
+			p.unscan()
+			break
+		}
+	}
+
+	return sources, nil
+}
+
+// peekRune returns the next rune that would be read by the scanner.
+func (p *Parser) peekRune() rune {
+	r, _, _ := p.s.s.r.ReadRune()
+	if r != eof {
+		_ = p.s.s.r.UnreadRune()
+	}
+
+	return r
+}
+
+func (p *Parser) parseSource() (Source, error) {
+	m := &Measurement{}
+
+	// Attempt to parse a regex.
+	re, err := p.parseRegex()
+	if err != nil {
+		return nil, err
+	} else if re != nil {
+		m.Regex = re
+		// Regex is always last so we're done.
+		return m, nil
+	}
+
+	// Didn't find a regex so parse segmented identifiers.
+	idents, err := p.parseSegmentedIdents()
+	if err != nil {
+		return nil, err
+	}
+
+	// If we already have the max allowed idents, we're done.
+	if len(idents) == 3 {
+		m.Database, m.RetentionPolicy, m.Name = idents[0], idents[1], idents[2]
+		return m, nil
+	}
+	// Check again for regex.
+	re, err = p.parseRegex()
+	if err != nil {
+		return nil, err
+	} else if re != nil {
+		m.Regex = re
+	}
+
+	// Assign identifiers to their proper locations.
+	switch len(idents) {
+	case 1:
+		if re != nil {
+			m.RetentionPolicy = idents[0]
+		} else {
+			m.Name = idents[0]
+		}
+	case 2:
+		if re != nil {
+			m.Database, m.RetentionPolicy = idents[0], idents[1]
+		} else {
+			m.RetentionPolicy, m.Name = idents[0], idents[1]
+		}
+	}
+
+	return m, nil
+}
+
+// parseCondition parses the "WHERE" clause of the query, if it exists.
+func (p *Parser) parseCondition() (Expr, error) {
+	// Check if the WHERE token exists.
+	if tok, _, _ := p.scanIgnoreWhitespace(); tok != WHERE {
+		p.unscan()
+		return nil, nil
+	}
+
+	// Scan the identifier for the source.
+	expr, err := p.ParseExpr()
+	if err != nil {
+		return nil, err
+	}
+
+	return expr, nil
+}
+
+// parseDimensions parses the "GROUP BY" clause of the query, if it exists.
+func (p *Parser) parseDimensions() (Dimensions, error) {
+	// If the next token is not GROUP then exit.
+	if tok, _, _ := p.scanIgnoreWhitespace(); tok != GROUP {
+		p.unscan()
+		return nil, nil
+	}
+
+	// Now the next token should be "BY".
+	if tok, pos, lit := p.scanIgnoreWhitespace(); tok != BY {
+		return nil, newParseError(tokstr(tok, lit), []string{"BY"}, pos)
+	}
+
+	var dimensions Dimensions
+	for {
+		// Parse the dimension.
+		d, err := p.parseDimension()
+		if err != nil {
+			return nil, err
+		}
+
+		// Add new dimension.
+		dimensions = append(dimensions, d)
+
+		// If there's not a comma next then stop parsing dimensions.
+		if tok, _, _ := p.scan(); tok != COMMA {
+			p.unscan()
+			break
+		}
+	}
+	return dimensions, nil
+}
+
+// parseDimension parses a single dimension.
+func (p *Parser) parseDimension() (*Dimension, error) {
+	// Parse the expression first.
+	expr, err := p.ParseExpr()
+	if err != nil {
+		return nil, err
+	}
+
+	// Consume all trailing whitespace.
+	p.consumeWhitespace()
+
+	return &Dimension{Expr: expr}, nil
+}
+
+// parseFill parses the fill call and its options.
+func (p *Parser) parseFill() (FillOption, interface{}, error) {
+	// Parse the expression first.
+	expr, err := p.ParseExpr()
+	if err != nil {
+		p.unscan()
+		return NullFill, nil, nil
+	}
+	if lit, ok := expr.(*Call); !ok {
+		p.unscan()
+		return NullFill, nil, nil
+	} else {
+		if strings.ToLower(lit.Name) != "fill" {
+			p.unscan()
+			return NullFill, nil, nil
+		}
+		if len(lit.Args) != 1 {
+			return NullFill, nil, errors.New("fill requires an argument, e.g.: 0, null, none, previous")
+		}
+		switch lit.Args[0].String() {
+		case "null":
+			return NullFill, nil, nil
+		case "none":
+			return NoFill, nil, nil
+		case "previous":
+			return PreviousFill, nil, nil
+		default:
+			num, ok := lit.Args[0].(*NumberLiteral)
+			if !ok {
+				return NullFill, nil, fmt.Errorf("expected number argument in fill()")
+			}
+			return NumberFill, num.Val, nil
+		}
+	}
+}
+
+// parseOptionalTokenAndInt parses the specified token followed
+// by an int, if it exists.
+func (p *Parser) parseOptionalTokenAndInt(t Token) (int, error) {
+	// Check if the token exists.
+	if tok, _, _ := p.scanIgnoreWhitespace(); tok != t {
+		p.unscan()
+		return 0, nil
+	}
+
+	// Scan the number.
+	tok, pos, lit := p.scanIgnoreWhitespace()
+	if tok != NUMBER {
+		return 0, newParseError(tokstr(tok, lit), []string{"number"}, pos)
+	}
+
+	// Return an error if the number has a fractional part.
+	if strings.Contains(lit, ".") {
+		msg := fmt.Sprintf("fractional parts not allowed in %s", t.String())
+		return 0, &ParseError{Message: msg, Pos: pos}
+	}
+
+	// Parse number.
+	n, _ := strconv.ParseInt(lit, 10, 64)
+
+	if n < 0 {
+		msg := fmt.Sprintf("%s must be >= 0", t.String())
+		return 0, &ParseError{Message: msg, Pos: pos}
+	}
+
+	return int(n), nil
+}
+
+// parseOrderBy parses the "ORDER BY" clause of a query, if it exists.
+func (p *Parser) parseOrderBy() (SortFields, error) {
+	// Return nil result and nil error if no ORDER token at this position.
+	if tok, _, _ := p.scanIgnoreWhitespace(); tok != ORDER {
+		p.unscan()
+		return nil, nil
+	}
+
+	// Parse the required BY token.
+	if tok, pos, lit := p.scanIgnoreWhitespace(); tok != BY {
+		return nil, newParseError(tokstr(tok, lit), []string{"BY"}, pos)
+	}
+
+	// Parse the ORDER BY fields.
+	fields, err := p.parseSortFields()
+	if err != nil {
+		return nil, err
+	}
+
+	return fields, nil
+}
+
+// parseSortFields parses the sort fields for an ORDER BY clause.
+func (p *Parser) parseSortFields() (SortFields, error) {
+	var fields SortFields
+
+	// If first token is ASC or DESC, all fields are sorted.
+	if tok, pos, lit := p.scanIgnoreWhitespace(); tok == ASC || tok == DESC {
+		if tok == DESC {
+			// Token must be ASC, until other sort orders are supported.
+			return nil, errors.New("only ORDER BY time ASC supported at this time")
+		}
+		return append(fields, &SortField{Ascending: (tok == ASC)}), nil
+	} else if tok != IDENT {
+		return nil, newParseError(tokstr(tok, lit), []string{"identifier", "ASC", "DESC"}, pos)
+	}
+	p.unscan()
+
+	// At least one field is required.
+	field, err := p.parseSortField()
+	if err != nil {
+		return nil, err
+	}
+	fields = append(fields, field)
+
+	// Parse additional fields.
+	for {
+		tok, _, _ := p.scanIgnoreWhitespace()
+
+		if tok != COMMA {
+			p.unscan()
+			break
+		}
+
+		field, err := p.parseSortField()
+		if err != nil {
+			return nil, err
+		}
+
+		fields = append(fields, field)
+	}
+
+	// First SortField must be time ASC, until other sort orders are supported.
+	if len(fields) > 1 || fields[0].Name != "time" || !fields[0].Ascending {
+		return nil, errors.New("only ORDER BY time ASC supported at this time")
+	}
+
+	return fields, nil
+}
+
+// parseSortField parses one field of an ORDER BY clause.
+func (p *Parser) parseSortField() (*SortField, error) {
+	field := &SortField{}
+
+	// Parse sort field name.
+	ident, err := p.parseIdent()
+	if err != nil {
+		return nil, err
+	}
+	field.Name = ident
+
+	// Check for optional ASC or DESC clause. Default is ASC.
+	tok, _, _ := p.scanIgnoreWhitespace()
+	if tok != ASC && tok != DESC {
+		p.unscan()
+		tok = ASC
+	}
+	field.Ascending = (tok == ASC)
+
+	return field, nil
+}
+
+// parseVarRef parses a reference to a measurement or field.
+func (p *Parser) parseVarRef() (*VarRef, error) {
+	// Parse the segments of the variable ref.
+	segments, err := p.parseSegmentedIdents()
+	if err != nil {
+		return nil, err
+	}
+
+	vr := &VarRef{Val: strings.Join(segments, ".")}
+
+	return vr, nil
+}
+
+// ParseExpr parses an expression.
+func (p *Parser) ParseExpr() (Expr, error) {
+	var err error
+	// Dummy root node.
+	root := &BinaryExpr{}
+
+	// Parse a non-binary expression type to start.
+	// This variable will always be the root of the expression tree.
+	root.RHS, err = p.parseUnaryExpr()
+	if err != nil {
+		return nil, err
+	}
+
+	// Loop over operations and unary exprs and build a tree based on precendence.
+	for {
+		// If the next token is NOT an operator then return the expression.
+		op, _, _ := p.scanIgnoreWhitespace()
+		if !op.isOperator() {
+			p.unscan()
+			return root.RHS, nil
+		}
+
+		// Otherwise parse the next expression.
+		var rhs Expr
+		if IsRegexOp(op) {
+			// RHS of a regex operator must be a regular expression.
+			p.consumeWhitespace()
+			if rhs, err = p.parseRegex(); err != nil {
+				return nil, err
+			}
+			// parseRegex can return an empty type, but we need it to be present
+			if rhs.(*RegexLiteral) == nil {
+				tok, pos, lit := p.scanIgnoreWhitespace()
+				return nil, newParseError(tokstr(tok, lit), []string{"regex"}, pos)
+			}
+		} else {
+			if rhs, err = p.parseUnaryExpr(); err != nil {
+				return nil, err
+			}
+		}
+
+		// Find the right spot in the tree to add the new expression by
+		// descending the RHS of the expression tree until we reach the last
+		// BinaryExpr or a BinaryExpr whose RHS has an operator with
+		// precedence >= the operator being added.
+		for node := root; ; {
+			r, ok := node.RHS.(*BinaryExpr)
+			if !ok || r.Op.Precedence() >= op.Precedence() {
+				// Add the new expression here and break.
+				node.RHS = &BinaryExpr{LHS: node.RHS, RHS: rhs, Op: op}
+				break
+			}
+			node = r
+		}
+	}
+}
+
+// parseUnaryExpr parses an non-binary expression.
+func (p *Parser) parseUnaryExpr() (Expr, error) {
+	// If the first token is a LPAREN then parse it as its own grouped expression.
+	if tok, _, _ := p.scanIgnoreWhitespace(); tok == LPAREN {
+		expr, err := p.ParseExpr()
+		if err != nil {
+			return nil, err
+		}
+
+		// Expect an RPAREN at the end.
+		if tok, pos, lit := p.scanIgnoreWhitespace(); tok != RPAREN {
+			return nil, newParseError(tokstr(tok, lit), []string{")"}, pos)
+		}
+
+		return &ParenExpr{Expr: expr}, nil
+	}
+	p.unscan()
+
+	// Read next token.
+	tok, pos, lit := p.scanIgnoreWhitespace()
+	switch tok {
+	case IDENT:
+		// If the next immediate token is a left parentheses, parse as function call.
+		// Otherwise parse as a variable reference.
+		if tok0, _, _ := p.scan(); tok0 == LPAREN {
+			return p.parseCall(lit)
+		}
+
+		p.unscan() // unscan the last token (wasn't an LPAREN)
+		p.unscan() // unscan the IDENT token
+
+		// Parse it as a VarRef.
+		return p.parseVarRef()
+	case DISTINCT:
+		// If the next immediate token is a left parentheses, parse as function call.
+		// Otherwise parse as a Distinct expression.
+		tok0, pos, lit := p.scan()
+		if tok0 == LPAREN {
+			return p.parseCall("distinct")
+		} else if tok0 == WS {
+			tok1, pos, lit := p.scanIgnoreWhitespace()
+			if tok1 != IDENT {
+				return nil, newParseError(tokstr(tok1, lit), []string{"identifier"}, pos)
+			}
+			return &Distinct{Val: lit}, nil
+		}
+
+		return nil, newParseError(tokstr(tok0, lit), []string{"(", "identifier"}, pos)
+	case STRING:
+		// If literal looks like a date time then parse it as a time literal.
+		if isDateTimeString(lit) {
+			t, err := time.Parse(DateTimeFormat, lit)
+			if err != nil {
+				// try to parse it as an RFCNano time
+				t, err := time.Parse(time.RFC3339Nano, lit)
+				if err != nil {
+					return nil, &ParseError{Message: "unable to parse datetime", Pos: pos}
+				}
+				return &TimeLiteral{Val: t}, nil
+			}
+			return &TimeLiteral{Val: t}, nil
+		} else if isDateString(lit) {
+			t, err := time.Parse(DateFormat, lit)
+			if err != nil {
+				return nil, &ParseError{Message: "unable to parse date", Pos: pos}
+			}
+			return &TimeLiteral{Val: t}, nil
+		}
+		return &StringLiteral{Val: lit}, nil
+	case NUMBER:
+		v, err := strconv.ParseFloat(lit, 64)
+		if err != nil {
+			return nil, &ParseError{Message: "unable to parse number", Pos: pos}
+		}
+		return &NumberLiteral{Val: v}, nil
+	case TRUE, FALSE:
+		return &BooleanLiteral{Val: (tok == TRUE)}, nil
+	case DURATION_VAL:
+		v, _ := ParseDuration(lit)
+		return &DurationLiteral{Val: v}, nil
+	case MUL:
+		return &Wildcard{}, nil
+	case REGEX:
+		re, err := regexp.Compile(lit)
+		if err != nil {
+			return nil, &ParseError{Message: err.Error(), Pos: pos}
+		}
+		return &RegexLiteral{Val: re}, nil
+	default:
+		return nil, newParseError(tokstr(tok, lit), []string{"identifier", "string", "number", "bool"}, pos)
+	}
+}
+
+// parseRegex parses a regular expression.
+func (p *Parser) parseRegex() (*RegexLiteral, error) {
+	nextRune := p.peekRune()
+	if isWhitespace(nextRune) {
+		p.consumeWhitespace()
+	}
+
+	// If the next character is not a '/', then return nils.
+	nextRune = p.peekRune()
+	if nextRune != '/' {
+		return nil, nil
+	}
+
+	tok, pos, lit := p.s.ScanRegex()
+
+	if tok == BADESCAPE {
+		msg := fmt.Sprintf("bad escape: %s", lit)
+		return nil, &ParseError{Message: msg, Pos: pos}
+	} else if tok == BADREGEX {
+		msg := fmt.Sprintf("bad regex: %s", lit)
+		return nil, &ParseError{Message: msg, Pos: pos}
+	} else if tok != REGEX {
+		return nil, newParseError(tokstr(tok, lit), []string{"regex"}, pos)
+	}
+
+	re, err := regexp.Compile(lit)
+	if err != nil {
+		return nil, &ParseError{Message: err.Error(), Pos: pos}
+	}
+
+	return &RegexLiteral{Val: re}, nil
+}
+
+// parseCall parses a function call.
+// This function assumes the function name and LPAREN have been consumed.
+func (p *Parser) parseCall(name string) (*Call, error) {
+	name = strings.ToLower(name)
+	// If there's a right paren then just return immediately.
+	if tok, _, _ := p.scan(); tok == RPAREN {
+		return &Call{Name: name}, nil
+	}
+	p.unscan()
+
+	// Otherwise parse function call arguments.
+	var args []Expr
+	for {
+		// Parse an expression argument.
+		arg, err := p.ParseExpr()
+		if err != nil {
+			return nil, err
+		}
+		args = append(args, arg)
+
+		// If there's not a comma next then stop parsing arguments.
+		if tok, _, _ := p.scan(); tok != COMMA {
+			p.unscan()
+			break
+		}
+	}
+
+	// There should be a right parentheses at the end.
+	if tok, pos, lit := p.scan(); tok != RPAREN {
+		return nil, newParseError(tokstr(tok, lit), []string{")"}, pos)
+	}
+
+	return &Call{Name: name, Args: args}, nil
+}
+
+// scan returns the next token from the underlying scanner.
+func (p *Parser) scan() (tok Token, pos Pos, lit string) { return p.s.Scan() }
+
+// scanIgnoreWhitespace scans the next non-whitespace token.
+func (p *Parser) scanIgnoreWhitespace() (tok Token, pos Pos, lit string) {
+	tok, pos, lit = p.scan()
+	if tok == WS {
+		tok, pos, lit = p.scan()
+	}
+	return
+}
+
+// consumeWhitespace scans the next token if it's whitespace.
+func (p *Parser) consumeWhitespace() {
+	if tok, _, _ := p.scan(); tok != WS {
+		p.unscan()
+	}
+}
+
+// unscan pushes the previously read token back onto the buffer.
+func (p *Parser) unscan() { p.s.Unscan() }
+
+// ParseDuration parses a time duration from a string.
+func ParseDuration(s string) (time.Duration, error) {
+	// Return an error if the string is blank.
+	if len(s) == 0 {
+		return 0, ErrInvalidDuration
+	}
+
+	// If there's only character then it must be a digit (in microseconds).
+	if len(s) == 1 {
+		if n, err := strconv.ParseInt(s, 10, 64); err == nil {
+			return time.Duration(n) * time.Microsecond, nil
+		}
+		return 0, ErrInvalidDuration
+	}
+
+	// Split string into individual runes.
+	a := split(s)
+
+	// Extract the unit of measure.
+	// If the last character is a digit then parse the whole string as microseconds.
+	// If the last two characters are "ms" the parse as milliseconds.
+	// Otherwise just use the last character as the unit of measure.
+	var num, uom string
+	if isDigit(rune(a[len(a)-1])) {
+		num, uom = s, "u"
+	} else if len(s) > 2 && s[len(s)-2:] == "ms" {
+		num, uom = string(a[:len(a)-2]), "ms"
+	} else {
+		num, uom = string(a[:len(a)-1]), string(a[len(a)-1:])
+	}
+
+	// Parse the numeric part.
+	n, err := strconv.ParseInt(num, 10, 64)
+	if err != nil {
+		return 0, ErrInvalidDuration
+	}
+
+	// Multiply by the unit of measure.
+	switch uom {
+	case "u", "µ":
+		return time.Duration(n) * time.Microsecond, nil
+	case "ms":
+		return time.Duration(n) * time.Millisecond, nil
+	case "s":
+		return time.Duration(n) * time.Second, nil
+	case "m":
+		return time.Duration(n) * time.Minute, nil
+	case "h":
+		return time.Duration(n) * time.Hour, nil
+	case "d":
+		return time.Duration(n) * 24 * time.Hour, nil
+	case "w":
+		return time.Duration(n) * 7 * 24 * time.Hour, nil
+	default:
+		return 0, ErrInvalidDuration
+	}
+}
+
+// FormatDuration formats a duration to a string.
+func FormatDuration(d time.Duration) string {
+	if d == 0 {
+		return "0s"
+	} else if d%(7*24*time.Hour) == 0 {
+		return fmt.Sprintf("%dw", d/(7*24*time.Hour))
+	} else if d%(24*time.Hour) == 0 {
+		return fmt.Sprintf("%dd", d/(24*time.Hour))
+	} else if d%time.Hour == 0 {
+		return fmt.Sprintf("%dh", d/time.Hour)
+	} else if d%time.Minute == 0 {
+		return fmt.Sprintf("%dm", d/time.Minute)
+	} else if d%time.Second == 0 {
+		return fmt.Sprintf("%ds", d/time.Second)
+	} else if d%time.Millisecond == 0 {
+		return fmt.Sprintf("%dms", d/time.Millisecond)
+	}
+	return fmt.Sprintf("%d", d/time.Microsecond)
+}
+
+// parseTokens consumes an expected sequence of tokens.
+func (p *Parser) parseTokens(toks []Token) error {
+	for _, expected := range toks {
+		if tok, pos, lit := p.scanIgnoreWhitespace(); tok != expected {
+			return newParseError(tokstr(tok, lit), []string{tokens[expected]}, pos)
+		}
+	}
+	return nil
+}
+
+// QuoteString returns a quoted string.
+func QuoteString(s string) string {
+	return `'` + strings.NewReplacer("\n", `\n`, `\`, `\\`, `'`, `\'`).Replace(s) + `'`
+}
+
+// QuoteIdent returns a quoted identifier from multiple bare identifiers.
+func QuoteIdent(segments ...string) string {
+	r := strings.NewReplacer("\n", `\n`, `\`, `\\`, `"`, `\"`)
+
+	var buf bytes.Buffer
+	for i, segment := range segments {
+		needQuote := IdentNeedsQuotes(segment) ||
+			((i < len(segments)-1) && segment != "") // not last segment && not ""
+
+		if needQuote {
+			_ = buf.WriteByte('"')
+		}
+
+		_, _ = buf.WriteString(r.Replace(segment))
+
+		if needQuote {
+			_ = buf.WriteByte('"')
+		}
+
+		if i < len(segments)-1 {
+			_ = buf.WriteByte('.')
+		}
+	}
+	return buf.String()
+}
+
+// IdentNeedsQuotes returns true if the ident string given would require quotes.
+func IdentNeedsQuotes(ident string) bool {
+	for i, r := range ident {
+		if i == 0 && !isIdentFirstChar(r) {
+			return true
+		} else if i > 0 && !isIdentChar(r) {
+			return true
+		}
+	}
+	return false
+}
+
+// split splits a string into a slice of runes.
+func split(s string) (a []rune) {
+	for _, ch := range s {
+		a = append(a, ch)
+	}
+	return
+}
+
+// isDateString returns true if the string looks like a date-only time literal.
+func isDateString(s string) bool { return dateStringRegexp.MatchString(s) }
+
+// isDateTimeString returns true if the string looks like a date+time time literal.
+func isDateTimeString(s string) bool { return dateTimeStringRegexp.MatchString(s) }
+
+var dateStringRegexp = regexp.MustCompile(`^\d{4}-\d{2}-\d{2}$`)
+var dateTimeStringRegexp = regexp.MustCompile(`^\d{4}-\d{2}-\d{2}.+`)
+
+// ErrInvalidDuration is returned when parsing a malformatted duration.
+var ErrInvalidDuration = errors.New("invalid duration")
+
+// ParseError represents an error that occurred during parsing.
+type ParseError struct {
+	Message  string
+	Found    string
+	Expected []string
+	Pos      Pos
+}
+
+// newParseError returns a new instance of ParseError.
+func newParseError(found string, expected []string, pos Pos) *ParseError {
+	return &ParseError{Found: found, Expected: expected, Pos: pos}
+}
+
+// Error returns the string representation of the error.
+func (e *ParseError) Error() string {
+	if e.Message != "" {
+		return fmt.Sprintf("%s at line %d, char %d", e.Message, e.Pos.Line+1, e.Pos.Char+1)
+	}
+	return fmt.Sprintf("found %s, expected %s at line %d, char %d", e.Found, strings.Join(e.Expected, ", "), e.Pos.Line+1, e.Pos.Char+1)
+}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/result.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/result.go
new file mode 100644
index 00000000000..94d10205443
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/result.go
@@ -0,0 +1,223 @@
+package influxql
+
+import (
+	"encoding/json"
+	"errors"
+	"hash/fnv"
+	"sort"
+)
+
+// TagSet is a fundamental concept within the query system. It represents a composite series,
+// composed of multiple individual series that share a set of tag attributes.
+type TagSet struct {
+	Tags       map[string]string
+	Filters    []Expr
+	SeriesKeys []string
+	Key        []byte
+}
+
+// AddFilter adds a series-level filter to the Tagset.
+func (t *TagSet) AddFilter(key string, filter Expr) {
+	t.SeriesKeys = append(t.SeriesKeys, key)
+	t.Filters = append(t.Filters, filter)
+}
+
+// Row represents a single row returned from the execution of a statement.
+type Row struct {
+	Name    string            `json:"name,omitempty"`
+	Tags    map[string]string `json:"tags,omitempty"`
+	Columns []string          `json:"columns,omitempty"`
+	Values  [][]interface{}   `json:"values,omitempty"`
+	Err     error             `json:"err,omitempty"`
+}
+
+// tagsHash returns a hash of tag key/value pairs.
+func (r *Row) tagsHash() uint64 {
+	h := fnv.New64a()
+	keys := r.tagsKeys()
+	for _, k := range keys {
+		h.Write([]byte(k))
+		h.Write([]byte(r.Tags[k]))
+	}
+	return h.Sum64()
+}
+
+// tagKeys returns a sorted list of tag keys.
+func (r *Row) tagsKeys() []string {
+	a := make([]string, 0, len(r.Tags))
+	for k := range r.Tags {
+		a = append(a, k)
+	}
+	sort.Strings(a)
+	return a
+}
+
+// Rows represents a list of rows that can be sorted consistently by name/tag.
+type Rows []*Row
+
+func (p Rows) Len() int { return len(p) }
+
+func (p Rows) Less(i, j int) bool {
+	// Sort by name first.
+	if p[i].Name != p[j].Name {
+		return p[i].Name < p[j].Name
+	}
+
+	// Sort by tag set hash. Tags don't have a meaningful sort order so we
+	// just compute a hash and sort by that instead. This allows the tests
+	// to receive rows in a predictable order every time.
+	return p[i].tagsHash() < p[j].tagsHash()
+}
+
+func (p Rows) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
+
+// Result represents a resultset returned from a single statement.
+type Result struct {
+	// StatementID is just the statement's position in the query. It's used
+	// to combine statement results if they're being buffered in memory.
+	StatementID int `json:"-"`
+	Series      Rows
+	Err         error
+}
+
+// MarshalJSON encodes the result into JSON.
+func (r *Result) MarshalJSON() ([]byte, error) {
+	// Define a struct that outputs "error" as a string.
+	var o struct {
+		Series []*Row `json:"series,omitempty"`
+		Err    string `json:"error,omitempty"`
+	}
+
+	// Copy fields to output struct.
+	o.Series = r.Series
+	if r.Err != nil {
+		o.Err = r.Err.Error()
+	}
+
+	return json.Marshal(&o)
+}
+
+// UnmarshalJSON decodes the data into the Result struct
+func (r *Result) UnmarshalJSON(b []byte) error {
+	var o struct {
+		Series []*Row `json:"series,omitempty"`
+		Err    string `json:"error,omitempty"`
+	}
+
+	err := json.Unmarshal(b, &o)
+	if err != nil {
+		return err
+	}
+	r.Series = o.Series
+	if o.Err != "" {
+		r.Err = errors.New(o.Err)
+	}
+	return nil
+}
+
+func GetProcessor(expr Expr, startIndex int) (Processor, int) {
+	switch expr := expr.(type) {
+	case *VarRef:
+		return newEchoProcessor(startIndex), startIndex + 1
+	case *Call:
+		return newEchoProcessor(startIndex), startIndex + 1
+	case *BinaryExpr:
+		return getBinaryProcessor(expr, startIndex)
+	case *ParenExpr:
+		return GetProcessor(expr.Expr, startIndex)
+	case *NumberLiteral:
+		return newLiteralProcessor(expr.Val), startIndex
+	case *StringLiteral:
+		return newLiteralProcessor(expr.Val), startIndex
+	case *BooleanLiteral:
+		return newLiteralProcessor(expr.Val), startIndex
+	case *TimeLiteral:
+		return newLiteralProcessor(expr.Val), startIndex
+	case *DurationLiteral:
+		return newLiteralProcessor(expr.Val), startIndex
+	}
+	panic("unreachable")
+}
+
+type Processor func(values []interface{}) interface{}
+
+func newEchoProcessor(index int) Processor {
+	return func(values []interface{}) interface{} {
+		return values[index]
+	}
+}
+
+func newLiteralProcessor(val interface{}) Processor {
+	return func(values []interface{}) interface{} {
+		return val
+	}
+}
+
+func getBinaryProcessor(expr *BinaryExpr, startIndex int) (Processor, int) {
+	lhs, index := GetProcessor(expr.LHS, startIndex)
+	rhs, index := GetProcessor(expr.RHS, index)
+
+	return newBinaryExprEvaluator(expr.Op, lhs, rhs), index
+}
+
+func newBinaryExprEvaluator(op Token, lhs, rhs Processor) Processor {
+	switch op {
+	case ADD:
+		return func(values []interface{}) interface{} {
+			l := lhs(values)
+			r := rhs(values)
+			if lv, ok := l.(float64); ok {
+				if rv, ok := r.(float64); ok {
+					if rv != 0 {
+						return lv + rv
+					}
+				}
+			}
+			return nil
+		}
+	case SUB:
+		return func(values []interface{}) interface{} {
+			l := lhs(values)
+			r := rhs(values)
+			if lv, ok := l.(float64); ok {
+				if rv, ok := r.(float64); ok {
+					if rv != 0 {
+						return lv - rv
+					}
+				}
+			}
+			return nil
+		}
+	case MUL:
+		return func(values []interface{}) interface{} {
+			l := lhs(values)
+			r := rhs(values)
+			if lv, ok := l.(float64); ok {
+				if rv, ok := r.(float64); ok {
+					if rv != 0 {
+						return lv * rv
+					}
+				}
+			}
+			return nil
+		}
+	case DIV:
+		return func(values []interface{}) interface{} {
+			l := lhs(values)
+			r := rhs(values)
+			if lv, ok := l.(float64); ok {
+				if rv, ok := r.(float64); ok {
+					if rv != 0 {
+						return lv / rv
+					}
+				}
+			}
+			return nil
+		}
+	default:
+		// we shouldn't get here, but give them back nils if it goes this way
+		return func(values []interface{}) interface{} {
+			return nil
+		}
+	}
+}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/scanner.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/scanner.go
new file mode 100644
index 00000000000..4e15edddc14
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/scanner.go
@@ -0,0 +1,561 @@
+package influxql
+
+import (
+	"bufio"
+	"bytes"
+	"errors"
+	"fmt"
+	"io"
+	"strings"
+)
+
+// Scanner represents a lexical scanner for InfluxQL.
+type Scanner struct {
+	r *reader
+}
+
+// NewScanner returns a new instance of Scanner.
+func NewScanner(r io.Reader) *Scanner {
+	return &Scanner{r: &reader{r: bufio.NewReader(r)}}
+}
+
+// Scan returns the next token and position from the underlying reader.
+// Also returns the literal text read for strings, numbers, and duration tokens
+// since these token types can have different literal representations.
+func (s *Scanner) Scan() (tok Token, pos Pos, lit string) {
+	// Read next code point.
+	ch0, pos := s.r.read()
+
+	// If we see whitespace then consume all contiguous whitespace.
+	// If we see a letter, or certain acceptable special characters, then consume
+	// as an ident or reserved word.
+	if isWhitespace(ch0) {
+		return s.scanWhitespace()
+	} else if isLetter(ch0) || ch0 == '_' {
+		s.r.unread()
+		return s.scanIdent()
+	} else if isDigit(ch0) {
+		return s.scanNumber()
+	}
+
+	// Otherwise parse individual characters.
+	switch ch0 {
+	case eof:
+		return EOF, pos, ""
+	case '"':
+		s.r.unread()
+		return s.scanIdent()
+	case '\'':
+		return s.scanString()
+	case '.':
+		ch1, _ := s.r.read()
+		s.r.unread()
+		if isDigit(ch1) {
+			return s.scanNumber()
+		}
+		return DOT, pos, ""
+	case '+', '-':
+		return s.scanNumber()
+	case '*':
+		return MUL, pos, ""
+	case '/':
+		return DIV, pos, ""
+	case '=':
+		if ch1, _ := s.r.read(); ch1 == '~' {
+			return EQREGEX, pos, ""
+		}
+		s.r.unread()
+		return EQ, pos, ""
+	case '!':
+		if ch1, _ := s.r.read(); ch1 == '=' {
+			return NEQ, pos, ""
+		} else if ch1 == '~' {
+			return NEQREGEX, pos, ""
+		}
+		s.r.unread()
+	case '>':
+		if ch1, _ := s.r.read(); ch1 == '=' {
+			return GTE, pos, ""
+		}
+		s.r.unread()
+		return GT, pos, ""
+	case '<':
+		if ch1, _ := s.r.read(); ch1 == '=' {
+			return LTE, pos, ""
+		} else if ch1 == '>' {
+			return NEQ, pos, ""
+		}
+		s.r.unread()
+		return LT, pos, ""
+	case '(':
+		return LPAREN, pos, ""
+	case ')':
+		return RPAREN, pos, ""
+	case ',':
+		return COMMA, pos, ""
+	case ';':
+		return SEMICOLON, pos, ""
+	}
+
+	return ILLEGAL, pos, string(ch0)
+}
+
+// scanWhitespace consumes the current rune and all contiguous whitespace.
+func (s *Scanner) scanWhitespace() (tok Token, pos Pos, lit string) {
+	// Create a buffer and read the current character into it.
+	var buf bytes.Buffer
+	ch, pos := s.r.curr()
+	_, _ = buf.WriteRune(ch)
+
+	// Read every subsequent whitespace character into the buffer.
+	// Non-whitespace characters and EOF will cause the loop to exit.
+	for {
+		ch, _ = s.r.read()
+		if ch == eof {
+			break
+		} else if !isWhitespace(ch) {
+			s.r.unread()
+			break
+		} else {
+			_, _ = buf.WriteRune(ch)
+		}
+	}
+
+	return WS, pos, buf.String()
+}
+
+func (s *Scanner) scanIdent() (tok Token, pos Pos, lit string) {
+	// Save the starting position of the identifier.
+	_, pos = s.r.read()
+	s.r.unread()
+
+	var buf bytes.Buffer
+	for {
+		if ch, _ := s.r.read(); ch == eof {
+			break
+		} else if ch == '"' {
+			tok0, pos0, lit0 := s.scanString()
+			if tok0 == BADSTRING || tok0 == BADESCAPE {
+				return tok0, pos0, lit0
+			}
+			return IDENT, pos, lit0
+		} else if isIdentChar(ch) {
+			s.r.unread()
+			buf.WriteString(ScanBareIdent(s.r))
+		} else {
+			s.r.unread()
+			break
+		}
+	}
+	lit = buf.String()
+
+	// If the literal matches a keyword then return that keyword.
+	if tok = Lookup(lit); tok != IDENT {
+		return tok, pos, ""
+	}
+
+	return IDENT, pos, lit
+}
+
+// scanString consumes a contiguous string of non-quote characters.
+// Quote characters can be consumed if they're first escaped with a backslash.
+func (s *Scanner) scanString() (tok Token, pos Pos, lit string) {
+	s.r.unread()
+	_, pos = s.r.curr()
+
+	var err error
+	lit, err = ScanString(s.r)
+	if err == errBadString {
+		return BADSTRING, pos, lit
+	} else if err == errBadEscape {
+		_, pos = s.r.curr()
+		return BADESCAPE, pos, lit
+	}
+	return STRING, pos, lit
+}
+
+func (s *Scanner) ScanRegex() (tok Token, pos Pos, lit string) {
+	_, pos = s.r.curr()
+
+	// Start & end sentinels.
+	start, end := '/', '/'
+	// Valid escape chars.
+	escapes := map[rune]rune{'/': '/'}
+
+	b, err := ScanDelimited(s.r, start, end, escapes, true)
+
+	if err == errBadEscape {
+		_, pos = s.r.curr()
+		return BADESCAPE, pos, lit
+	} else if err != nil {
+		return BADREGEX, pos, lit
+	}
+	return REGEX, pos, string(b)
+}
+
+// scanNumber consumes anything that looks like the start of a number.
+// Numbers start with a digit, full stop, plus sign or minus sign.
+// This function can return non-number tokens if a scan is a false positive.
+// For example, a minus sign followed by a letter will just return a minus sign.
+func (s *Scanner) scanNumber() (tok Token, pos Pos, lit string) {
+	var buf bytes.Buffer
+
+	// Check if the initial rune is a "+" or "-".
+	ch, pos := s.r.curr()
+	if ch == '+' || ch == '-' {
+		// Peek at the next two runes.
+		ch1, _ := s.r.read()
+		ch2, _ := s.r.read()
+		s.r.unread()
+		s.r.unread()
+
+		// This rune must be followed by a digit or a full stop and a digit.
+		if isDigit(ch1) || (ch1 == '.' && isDigit(ch2)) {
+			_, _ = buf.WriteRune(ch)
+		} else if ch == '+' {
+			return ADD, pos, ""
+		} else if ch == '-' {
+			return SUB, pos, ""
+		}
+	} else if ch == '.' {
+		// Peek and see if the next rune is a digit.
+		ch1, _ := s.r.read()
+		s.r.unread()
+		if !isDigit(ch1) {
+			return ILLEGAL, pos, "."
+		}
+
+		// Unread the full stop so we can read it later.
+		s.r.unread()
+	} else {
+		s.r.unread()
+	}
+
+	// Read as many digits as possible.
+	_, _ = buf.WriteString(s.scanDigits())
+
+	// If next code points are a full stop and digit then consume them.
+	if ch0, _ := s.r.read(); ch0 == '.' {
+		if ch1, _ := s.r.read(); isDigit(ch1) {
+			_, _ = buf.WriteRune(ch0)
+			_, _ = buf.WriteRune(ch1)
+			_, _ = buf.WriteString(s.scanDigits())
+		} else {
+			s.r.unread()
+			s.r.unread()
+		}
+	} else {
+		s.r.unread()
+	}
+
+	// Attempt to read as a duration if it doesn't have a fractional part.
+	if !strings.Contains(buf.String(), ".") {
+		// If the next rune is a duration unit (u,µ,ms,s) then return a duration token
+		if ch0, _ := s.r.read(); ch0 == 'u' || ch0 == 'µ' || ch0 == 's' || ch0 == 'h' || ch0 == 'd' || ch0 == 'w' {
+			_, _ = buf.WriteRune(ch0)
+			return DURATION_VAL, pos, buf.String()
+		} else if ch0 == 'm' {
+			_, _ = buf.WriteRune(ch0)
+			if ch1, _ := s.r.read(); ch1 == 's' {
+				_, _ = buf.WriteRune(ch1)
+			} else {
+				s.r.unread()
+			}
+			return DURATION_VAL, pos, buf.String()
+		}
+		s.r.unread()
+	}
+	return NUMBER, pos, buf.String()
+}
+
+// scanDigits consume a contiguous series of digits.
+func (s *Scanner) scanDigits() string {
+	var buf bytes.Buffer
+	for {
+		ch, _ := s.r.read()
+		if !isDigit(ch) {
+			s.r.unread()
+			break
+		}
+		_, _ = buf.WriteRune(ch)
+	}
+	return buf.String()
+}
+
+// isWhitespace returns true if the rune is a space, tab, or newline.
+func isWhitespace(ch rune) bool { return ch == ' ' || ch == '\t' || ch == '\n' }
+
+// isLetter returns true if the rune is a letter.
+func isLetter(ch rune) bool { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') }
+
+// isDigit returns true if the rune is a digit.
+func isDigit(ch rune) bool { return (ch >= '0' && ch <= '9') }
+
+// isIdentChar returns true if the rune can be used in an unquoted identifier.
+func isIdentChar(ch rune) bool { return isLetter(ch) || isDigit(ch) || ch == '_' }
+
+// isIdentFirstChar returns true if the rune can be used as the first char in an unquoted identifer.
+func isIdentFirstChar(ch rune) bool { return isLetter(ch) || ch == '_' }
+
+// bufScanner represents a wrapper for scanner to add a buffer.
+// It provides a fixed-length circular buffer that can be unread.
+type bufScanner struct {
+	s   *Scanner
+	i   int // buffer index
+	n   int // buffer size
+	buf [3]struct {
+		tok Token
+		pos Pos
+		lit string
+	}
+}
+
+// newBufScanner returns a new buffered scanner for a reader.
+func newBufScanner(r io.Reader) *bufScanner {
+	return &bufScanner{s: NewScanner(r)}
+}
+
+// Scan reads the next token from the scanner.
+func (s *bufScanner) Scan() (tok Token, pos Pos, lit string) {
+	return s.scanFunc(s.s.Scan)
+}
+
+// ScanRegex reads a regex token from the scanner.
+func (s *bufScanner) ScanRegex() (tok Token, pos Pos, lit string) {
+	return s.scanFunc(s.s.ScanRegex)
+}
+
+// scanFunc uses the provided function to scan the next token.
+func (s *bufScanner) scanFunc(scan func() (Token, Pos, string)) (tok Token, pos Pos, lit string) {
+	// If we have unread tokens then read them off the buffer first.
+	if s.n > 0 {
+		s.n--
+		return s.curr()
+	}
+
+	// Move buffer position forward and save the token.
+	s.i = (s.i + 1) % len(s.buf)
+	buf := &s.buf[s.i]
+	buf.tok, buf.pos, buf.lit = scan()
+
+	return s.curr()
+}
+
+// Unscan pushes the previously token back onto the buffer.
+func (s *bufScanner) Unscan() { s.n++ }
+
+// curr returns the last read token.
+func (s *bufScanner) curr() (tok Token, pos Pos, lit string) {
+	buf := &s.buf[(s.i-s.n+len(s.buf))%len(s.buf)]
+	return buf.tok, buf.pos, buf.lit
+}
+
+// reader represents a buffered rune reader used by the scanner.
+// It provides a fixed-length circular buffer that can be unread.
+type reader struct {
+	r   io.RuneScanner
+	i   int // buffer index
+	n   int // buffer char count
+	pos Pos // last read rune position
+	buf [3]struct {
+		ch  rune
+		pos Pos
+	}
+	eof bool // true if reader has ever seen eof.
+}
+
+// ReadRune reads the next rune from the reader.
+// This is a wrapper function to implement the io.RuneReader interface.
+// Note that this function does not return size.
+func (r *reader) ReadRune() (ch rune, size int, err error) {
+	ch, _ = r.read()
+	if ch == eof {
+		err = io.EOF
+	}
+	return
+}
+
+// UnreadRune pushes the previously read rune back onto the buffer.
+// This is a wrapper function to implement the io.RuneScanner interface.
+func (r *reader) UnreadRune() error {
+	r.unread()
+	return nil
+}
+
+// read reads the next rune from the reader.
+func (r *reader) read() (ch rune, pos Pos) {
+	// If we have unread characters then read them off the buffer first.
+	if r.n > 0 {
+		r.n--
+		return r.curr()
+	}
+
+	// Read next rune from underlying reader.
+	// Any error (including io.EOF) should return as EOF.
+	ch, _, err := r.r.ReadRune()
+	if err != nil {
+		ch = eof
+	} else if ch == '\r' {
+		if ch, _, err := r.r.ReadRune(); err != nil {
+			// nop
+		} else if ch != '\n' {
+			_ = r.r.UnreadRune()
+		}
+		ch = '\n'
+	}
+
+	// Save character and position to the buffer.
+	r.i = (r.i + 1) % len(r.buf)
+	buf := &r.buf[r.i]
+	buf.ch, buf.pos = ch, r.pos
+
+	// Update position.
+	// Only count EOF once.
+	if ch == '\n' {
+		r.pos.Line++
+		r.pos.Char = 0
+	} else if !r.eof {
+		r.pos.Char++
+	}
+
+	// Mark the reader as EOF.
+	// This is used so we don't double count EOF characters.
+	if ch == eof {
+		r.eof = true
+	}
+
+	return r.curr()
+}
+
+// unread pushes the previously read rune back onto the buffer.
+func (r *reader) unread() {
+	r.n++
+}
+
+// curr returns the last read character and position.
+func (r *reader) curr() (ch rune, pos Pos) {
+	i := (r.i - r.n + len(r.buf)) % len(r.buf)
+	buf := &r.buf[i]
+	return buf.ch, buf.pos
+}
+
+// eof is a marker code point to signify that the reader can't read any more.
+const eof = rune(0)
+
+func ScanDelimited(r io.RuneScanner, start, end rune, escapes map[rune]rune, escapesPassThru bool) ([]byte, error) {
+	// Scan start delimiter.
+	if ch, _, err := r.ReadRune(); err != nil {
+		return nil, err
+	} else if ch != start {
+		return nil, fmt.Errorf("expected %s; found %s", string(start), string(ch))
+	}
+
+	var buf bytes.Buffer
+	for {
+		ch0, _, err := r.ReadRune()
+		if ch0 == end {
+			return buf.Bytes(), nil
+		} else if err != nil {
+			return buf.Bytes(), err
+		} else if ch0 == '\n' {
+			return nil, errors.New("delimited text contains new line")
+		} else if ch0 == '\\' {
+			// If the next character is an escape then write the escaped char.
+			// If it's not a valid escape then return an error.
+			ch1, _, err := r.ReadRune()
+			if err != nil {
+				return nil, err
+			}
+
+			c, ok := escapes[ch1]
+			if !ok {
+				if escapesPassThru {
+					// Unread ch1 (char after the \)
+					_ = r.UnreadRune()
+					// Write ch0 (\) to the output buffer.
+					_, _ = buf.WriteRune(ch0)
+					continue
+				} else {
+					buf.Reset()
+					_, _ = buf.WriteRune(ch0)
+					_, _ = buf.WriteRune(ch1)
+					return buf.Bytes(), errBadEscape
+				}
+			}
+
+			_, _ = buf.WriteRune(c)
+		} else {
+			_, _ = buf.WriteRune(ch0)
+		}
+	}
+}
+
+// ScanString reads a quoted string from a rune reader.
+func ScanString(r io.RuneScanner) (string, error) {
+	ending, _, err := r.ReadRune()
+	if err != nil {
+		return "", errBadString
+	}
+
+	var buf bytes.Buffer
+	for {
+		ch0, _, err := r.ReadRune()
+		if ch0 == ending {
+			return buf.String(), nil
+		} else if err != nil || ch0 == '\n' {
+			return buf.String(), errBadString
+		} else if ch0 == '\\' {
+			// If the next character is an escape then write the escaped char.
+			// If it's not a valid escape then return an error.
+			ch1, _, _ := r.ReadRune()
+			if ch1 == 'n' {
+				_, _ = buf.WriteRune('\n')
+			} else if ch1 == '\\' {
+				_, _ = buf.WriteRune('\\')
+			} else if ch1 == '"' {
+				_, _ = buf.WriteRune('"')
+			} else {
+				return string(ch0) + string(ch1), errBadEscape
+			}
+		} else {
+			_, _ = buf.WriteRune(ch0)
+		}
+	}
+}
+
+var errBadString = errors.New("bad string")
+var errBadEscape = errors.New("bad escape")
+var errBadRegex = errors.New("bad regex")
+
+// ScanBareIdent reads bare identifier from a rune reader.
+func ScanBareIdent(r io.RuneScanner) string {
+	// Read every ident character into the buffer.
+	// Non-ident characters and EOF will cause the loop to exit.
+	var buf bytes.Buffer
+	for {
+		ch, _, err := r.ReadRune()
+		if err != nil {
+			break
+		} else if !isIdentChar(ch) {
+			r.UnreadRune()
+			break
+		} else {
+			_, _ = buf.WriteRune(ch)
+		}
+	}
+	return buf.String()
+}
+
+var errInvalidIdentifier = errors.New("invalid identifier")
+
+// IsRegexOp returns true if the operator accepts a regex operand.
+func IsRegexOp(t Token) bool {
+	return (t == EQREGEX || t == NEQREGEX)
+}
+
+// assert will panic with a given formatted message if the given condition is false.
+func assert(condition bool, msg string, v ...interface{}) {
+	if !condition {
+		panic(fmt.Sprintf("assert failed: "+msg, v...))
+	}
+}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/token.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/token.go
new file mode 100644
index 00000000000..0eb5b7a9a6d
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/token.go
@@ -0,0 +1,296 @@
+package influxql
+
+import (
+	"strings"
+)
+
+// Token is a lexical token of the InfluxQL language.
+type Token int
+
+const (
+	// Special tokens
+	ILLEGAL Token = iota
+	EOF
+	WS
+
+	literal_beg
+	// Literals
+	IDENT        // main
+	NUMBER       // 12345.67
+	DURATION_VAL // 13h
+	STRING       // "abc"
+	BADSTRING    // "abc
+	BADESCAPE    // \q
+	TRUE         // true
+	FALSE        // false
+	REGEX        // Regular expressions
+	BADREGEX     // `.*
+	literal_end
+
+	operator_beg
+	// Operators
+	ADD // +
+	SUB // -
+	MUL // *
+	DIV // /
+
+	AND // AND
+	OR  // OR
+
+	EQ       // =
+	NEQ      // !=
+	EQREGEX  // =~
+	NEQREGEX // !~
+	LT       // <
+	LTE      // <=
+	GT       // >
+	GTE      // >=
+	operator_end
+
+	LPAREN    // (
+	RPAREN    // )
+	COMMA     // ,
+	SEMICOLON // ;
+	DOT       // .
+
+	keyword_beg
+	// Keywords
+	ALL
+	ALTER
+	AS
+	ASC
+	BEGIN
+	BY
+	CREATE
+	CONTINUOUS
+	DATABASE
+	DATABASES
+	DEFAULT
+	DELETE
+	DESC
+	DISTINCT
+	DROP
+	DURATION
+	END
+	EXISTS
+	EXPLAIN
+	FIELD
+	FOR
+	FROM
+	GRANT
+	GRANTS
+	GROUP
+	IF
+	IN
+	INF
+	INNER
+	INSERT
+	INTO
+	KEY
+	KEYS
+	LIMIT
+	MEASUREMENT
+	MEASUREMENTS
+	OFFSET
+	ON
+	ORDER
+	PASSWORD
+	POLICY
+	POLICIES
+	PRIVILEGES
+	QUERIES
+	QUERY
+	READ
+	REPLICATION
+	RETENTION
+	REVOKE
+	SELECT
+	SERIES
+	SERVERS
+	SET
+	SHOW
+	SLIMIT
+	STATS
+	DIAGNOSTICS
+	SOFFSET
+	TAG
+	TO
+	USER
+	USERS
+	VALUES
+	WHERE
+	WITH
+	WRITE
+	keyword_end
+)
+
+var tokens = [...]string{
+	ILLEGAL: "ILLEGAL",
+	EOF:     "EOF",
+	WS:      "WS",
+
+	IDENT:        "IDENT",
+	NUMBER:       "NUMBER",
+	DURATION_VAL: "DURATION_VAL",
+	STRING:       "STRING",
+	BADSTRING:    "BADSTRING",
+	BADESCAPE:    "BADESCAPE",
+	TRUE:         "TRUE",
+	FALSE:        "FALSE",
+	REGEX:        "REGEX",
+
+	ADD: "+",
+	SUB: "-",
+	MUL: "*",
+	DIV: "/",
+
+	AND: "AND",
+	OR:  "OR",
+
+	EQ:       "=",
+	NEQ:      "!=",
+	EQREGEX:  "=~",
+	NEQREGEX: "!~",
+	LT:       "<",
+	LTE:      "<=",
+	GT:       ">",
+	GTE:      ">=",
+
+	LPAREN:    "(",
+	RPAREN:    ")",
+	COMMA:     ",",
+	SEMICOLON: ";",
+	DOT:       ".",
+
+	ALL:          "ALL",
+	ALTER:        "ALTER",
+	AS:           "AS",
+	ASC:          "ASC",
+	BEGIN:        "BEGIN",
+	BY:           "BY",
+	CREATE:       "CREATE",
+	CONTINUOUS:   "CONTINUOUS",
+	DATABASE:     "DATABASE",
+	DATABASES:    "DATABASES",
+	DEFAULT:      "DEFAULT",
+	DELETE:       "DELETE",
+	DESC:         "DESC",
+	DROP:         "DROP",
+	DISTINCT:     "DISTINCT",
+	DURATION:     "DURATION",
+	END:          "END",
+	EXISTS:       "EXISTS",
+	EXPLAIN:      "EXPLAIN",
+	FIELD:        "FIELD",
+	FOR:          "FOR",
+	FROM:         "FROM",
+	GRANT:        "GRANT",
+	GRANTS:       "GRANTS",
+	GROUP:        "GROUP",
+	IF:           "IF",
+	IN:           "IN",
+	INF:          "INF",
+	INNER:        "INNER",
+	INSERT:       "INSERT",
+	INTO:         "INTO",
+	KEY:          "KEY",
+	KEYS:         "KEYS",
+	LIMIT:        "LIMIT",
+	MEASUREMENT:  "MEASUREMENT",
+	MEASUREMENTS: "MEASUREMENTS",
+	OFFSET:       "OFFSET",
+	ON:           "ON",
+	ORDER:        "ORDER",
+	PASSWORD:     "PASSWORD",
+	POLICY:       "POLICY",
+	POLICIES:     "POLICIES",
+	PRIVILEGES:   "PRIVILEGES",
+	QUERIES:      "QUERIES",
+	QUERY:        "QUERY",
+	READ:         "READ",
+	REPLICATION:  "REPLICATION",
+	RETENTION:    "RETENTION",
+	REVOKE:       "REVOKE",
+	SELECT:       "SELECT",
+	SERIES:       "SERIES",
+	SERVERS:      "SERVERS",
+	SET:          "SET",
+	SHOW:         "SHOW",
+	SLIMIT:       "SLIMIT",
+	SOFFSET:      "SOFFSET",
+	STATS:        "STATS",
+	DIAGNOSTICS:  "DIAGNOSTICS",
+	TAG:          "TAG",
+	TO:           "TO",
+	USER:         "USER",
+	USERS:        "USERS",
+	VALUES:       "VALUES",
+	WHERE:        "WHERE",
+	WITH:         "WITH",
+	WRITE:        "WRITE",
+}
+
+var keywords map[string]Token
+
+func init() {
+	keywords = make(map[string]Token)
+	for tok := keyword_beg + 1; tok < keyword_end; tok++ {
+		keywords[strings.ToLower(tokens[tok])] = tok
+	}
+	for _, tok := range []Token{AND, OR} {
+		keywords[strings.ToLower(tokens[tok])] = tok
+	}
+	keywords["true"] = TRUE
+	keywords["false"] = FALSE
+}
+
+// String returns the string representation of the token.
+func (tok Token) String() string {
+	if tok >= 0 && tok < Token(len(tokens)) {
+		return tokens[tok]
+	}
+	return ""
+}
+
+// Precedence returns the operator precedence of the binary operator token.
+func (tok Token) Precedence() int {
+	switch tok {
+	case OR:
+		return 1
+	case AND:
+		return 2
+	case EQ, NEQ, EQREGEX, NEQREGEX, LT, LTE, GT, GTE:
+		return 3
+	case ADD, SUB:
+		return 4
+	case MUL, DIV:
+		return 5
+	}
+	return 0
+}
+
+// isOperator returns true for operator tokens.
+func (tok Token) isOperator() bool { return tok > operator_beg && tok < operator_end }
+
+// tokstr returns a literal if provided, otherwise returns the token string.
+func tokstr(tok Token, lit string) string {
+	if lit != "" {
+		return lit
+	}
+	return tok.String()
+}
+
+// Lookup returns the token associated with a given string.
+func Lookup(ident string) Token {
+	if tok, ok := keywords[strings.ToLower(ident)]; ok {
+		return tok
+	}
+	return IDENT
+}
+
+// Pos specifies the line and character position of a token.
+// The Char and Line are both zero-based indexes.
+type Pos struct {
+	Line int
+	Char int
+}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/config.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/config.go
new file mode 100644
index 00000000000..5512f059b65
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/config.go
@@ -0,0 +1,52 @@
+package meta
+
+import (
+	"time"
+
+	"github.com/influxdb/influxdb/toml"
+)
+
+const (
+	// DefaultHostname is the default hostname if one is not provided.
+	DefaultHostname = "localhost"
+
+	// DefaultBindAddress is the default address to bind to.
+	DefaultBindAddress = ":8088"
+
+	// DefaultHeartbeatTimeout is the default heartbeat timeout for the store.
+	DefaultHeartbeatTimeout = 1000 * time.Millisecond
+
+	// DefaultElectionTimeout is the default election timeout for the store.
+	DefaultElectionTimeout = 1000 * time.Millisecond
+
+	// DefaultLeaderLeaseTimeout is the default leader lease for the store.
+	DefaultLeaderLeaseTimeout = 500 * time.Millisecond
+
+	// DefaultCommitTimeout is the default commit timeout for the store.
+	DefaultCommitTimeout = 50 * time.Millisecond
+)
+
+// Config represents the meta configuration.
+type Config struct {
+	Dir                 string        `toml:"dir"`
+	Hostname            string        `toml:"hostname"`
+	BindAddress         string        `toml:"bind-address"`
+	Peers               []string      `toml:"peers"`
+	RetentionAutoCreate bool          `toml:"retention-autocreate"`
+	ElectionTimeout     toml.Duration `toml:"election-timeout"`
+	HeartbeatTimeout    toml.Duration `toml:"heartbeat-timeout"`
+	LeaderLeaseTimeout  toml.Duration `toml:"leader-lease-timeout"`
+	CommitTimeout       toml.Duration `toml:"commit-timeout"`
+}
+
+func NewConfig() Config {
+	return Config{
+		Hostname:            DefaultHostname,
+		BindAddress:         DefaultBindAddress,
+		RetentionAutoCreate: true,
+		ElectionTimeout:     toml.Duration(DefaultElectionTimeout),
+		HeartbeatTimeout:    toml.Duration(DefaultHeartbeatTimeout),
+		LeaderLeaseTimeout:  toml.Duration(DefaultLeaderLeaseTimeout),
+		CommitTimeout:       toml.Duration(DefaultCommitTimeout),
+	}
+}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/data.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/data.go
new file mode 100644
index 00000000000..35a3f6e0654
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/data.go
@@ -0,0 +1,1055 @@
+package meta
+
+import (
+	"sort"
+	"time"
+
+	"github.com/gogo/protobuf/proto"
+	"github.com/influxdb/influxdb/influxql"
+	"github.com/influxdb/influxdb/meta/internal"
+)
+
+//go:generate protoc --gogo_out=. internal/meta.proto
+
+const (
+	// DefaultRetentionPolicyReplicaN is the default value of RetentionPolicyInfo.ReplicaN.
+	DefaultRetentionPolicyReplicaN = 1
+
+	// DefaultRetentionPolicyDuration is the default value of RetentionPolicyInfo.Duration.
+	DefaultRetentionPolicyDuration = 7 * (24 * time.Hour)
+
+	// MinRetentionPolicyDuration represents the minimum duration for a policy.
+	MinRetentionPolicyDuration = time.Hour
+)
+
+// Data represents the top level collection of all metadata.
+type Data struct {
+	Term      uint64 // associated raft term
+	Index     uint64 // associated raft index
+	ClusterID uint64
+	Nodes     []NodeInfo
+	Databases []DatabaseInfo
+	Users     []UserInfo
+
+	MaxNodeID       uint64
+	MaxShardGroupID uint64
+	MaxShardID      uint64
+}
+
+// Node returns a node by id.
+func (data *Data) Node(id uint64) *NodeInfo {
+	for i := range data.Nodes {
+		if data.Nodes[i].ID == id {
+			return &data.Nodes[i]
+		}
+	}
+	return nil
+}
+
+// NodeByHost returns a node by hostname.
+func (data *Data) NodeByHost(host string) *NodeInfo {
+	for i := range data.Nodes {
+		if data.Nodes[i].Host == host {
+			return &data.Nodes[i]
+		}
+	}
+	return nil
+}
+
+// CreateNode adds a node to the metadata.
+func (data *Data) CreateNode(host string) error {
+	// Ensure a node with the same host doesn't already exist.
+	if data.NodeByHost(host) != nil {
+		return ErrNodeExists
+	}
+
+	// Append new node.
+	data.MaxNodeID++
+	data.Nodes = append(data.Nodes, NodeInfo{
+		ID:   data.MaxNodeID,
+		Host: host,
+	})
+
+	return nil
+}
+
+// DeleteNode removes a node from the metadata.
+func (data *Data) DeleteNode(id uint64) error {
+	for i := range data.Nodes {
+		if data.Nodes[i].ID == id {
+			data.Nodes = append(data.Nodes[:i], data.Nodes[i+1:]...)
+			return nil
+		}
+	}
+	return ErrNodeNotFound
+}
+
+// Database returns a database by name.
+func (data *Data) Database(name string) *DatabaseInfo {
+	for i := range data.Databases {
+		if data.Databases[i].Name == name {
+			return &data.Databases[i]
+		}
+	}
+	return nil
+}
+
+// CreateDatabase creates a new database.
+// Returns an error if name is blank or if a database with the same name already exists.
+func (data *Data) CreateDatabase(name string) error {
+	if name == "" {
+		return ErrDatabaseNameRequired
+	} else if data.Database(name) != nil {
+		return ErrDatabaseExists
+	}
+
+	// Append new node.
+	data.Databases = append(data.Databases, DatabaseInfo{Name: name})
+
+	return nil
+}
+
+// DropDatabase removes a database by name.
+func (data *Data) DropDatabase(name string) error {
+	for i := range data.Databases {
+		if data.Databases[i].Name == name {
+			data.Databases = append(data.Databases[:i], data.Databases[i+1:]...)
+			return nil
+		}
+	}
+	return ErrDatabaseNotFound
+}
+
+// RetentionPolicy returns a retention policy for a database by name.
+func (data *Data) RetentionPolicy(database, name string) (*RetentionPolicyInfo, error) {
+	di := data.Database(database)
+	if di == nil {
+		return nil, ErrDatabaseNotFound
+	}
+
+	for i := range di.RetentionPolicies {
+		if di.RetentionPolicies[i].Name == name {
+			return &di.RetentionPolicies[i], nil
+		}
+	}
+	return nil, ErrRetentionPolicyNotFound
+}
+
+// CreateRetentionPolicy creates a new retention policy on a database.
+// Returns an error if name is blank or if a database does not exist.
+func (data *Data) CreateRetentionPolicy(database string, rpi *RetentionPolicyInfo) error {
+	// Validate retention policy.
+	if rpi.Name == "" {
+		return ErrRetentionPolicyNameRequired
+	} else if rpi.ReplicaN != len(data.Nodes) {
+		return ErrReplicationFactorMismatch
+	}
+
+	// Find database.
+	di := data.Database(database)
+	if di == nil {
+		return ErrDatabaseNotFound
+	} else if di.RetentionPolicy(rpi.Name) != nil {
+		return ErrRetentionPolicyExists
+	}
+
+	// Append new policy.
+	di.RetentionPolicies = append(di.RetentionPolicies, RetentionPolicyInfo{
+		Name:               rpi.Name,
+		Duration:           rpi.Duration,
+		ShardGroupDuration: shardGroupDuration(rpi.Duration),
+		ReplicaN:           rpi.ReplicaN,
+	})
+
+	return nil
+}
+
+// DropRetentionPolicy removes a retention policy from a database by name.
+func (data *Data) DropRetentionPolicy(database, name string) error {
+	// Find database.
+	di := data.Database(database)
+	if di == nil {
+		return ErrDatabaseNotFound
+	}
+
+	// Remove from list.
+	for i := range di.RetentionPolicies {
+		if di.RetentionPolicies[i].Name == name {
+			di.RetentionPolicies = append(di.RetentionPolicies[:i], di.RetentionPolicies[i+1:]...)
+			return nil
+		}
+	}
+	return ErrRetentionPolicyNotFound
+}
+
+// UpdateRetentionPolicy updates an existing retention policy.
+func (data *Data) UpdateRetentionPolicy(database, name string, rpu *RetentionPolicyUpdate) error {
+	// Find database.
+	di := data.Database(database)
+	if di == nil {
+		return ErrDatabaseNotFound
+	}
+
+	// Find policy.
+	rpi := di.RetentionPolicy(name)
+	if rpi == nil {
+		return ErrRetentionPolicyNotFound
+	}
+
+	// Ensure new policy doesn't match an existing policy.
+	if rpu.Name != nil && *rpu.Name != name && di.RetentionPolicy(*rpu.Name) != nil {
+		return ErrRetentionPolicyNameExists
+	}
+
+	// Enforce duration of at least MinRetentionPolicyDuration
+	if rpu.Duration != nil && *rpu.Duration < MinRetentionPolicyDuration && *rpu.Duration != 0 {
+		return ErrRetentionPolicyDurationTooLow
+	}
+
+	// Update fields.
+	if rpu.Name != nil {
+		rpi.Name = *rpu.Name
+	}
+	if rpu.Duration != nil {
+		rpi.Duration = *rpu.Duration
+	}
+	if rpu.ReplicaN != nil {
+		rpi.ReplicaN = *rpu.ReplicaN
+	}
+
+	return nil
+}
+
+// SetDefaultRetentionPolicy sets the default retention policy for a database.
+func (data *Data) SetDefaultRetentionPolicy(database, name string) error {
+	// Find database and verify policy exists.
+	di := data.Database(database)
+	if di == nil {
+		return ErrDatabaseNotFound
+	} else if di.RetentionPolicy(name) == nil {
+		return ErrRetentionPolicyNotFound
+	}
+
+	// Set default policy.
+	di.DefaultRetentionPolicy = name
+
+	return nil
+}
+
+// ShardGroup returns a list of all shard groups on a database and policy.
+func (data *Data) ShardGroups(database, policy string) ([]ShardGroupInfo, error) {
+	// Find retention policy.
+	rpi, err := data.RetentionPolicy(database, policy)
+	if err != nil {
+		return nil, err
+	} else if rpi == nil {
+		return nil, ErrRetentionPolicyNotFound
+	}
+	groups := make([]ShardGroupInfo, 0, len(rpi.ShardGroups))
+	for _, g := range rpi.ShardGroups {
+		if g.Deleted() {
+			continue
+		}
+		groups = append(groups, g)
+	}
+	return groups, nil
+}
+
+// ShardGroupsByTimeRange returns a list of all shard groups on a database and policy that may contain data
+// for the specified time range. Shard groups are sorted by start time.
+func (data *Data) ShardGroupsByTimeRange(database, policy string, tmin, tmax time.Time) ([]ShardGroupInfo, error) {
+	// Find retention policy.
+	rpi, err := data.RetentionPolicy(database, policy)
+	if err != nil {
+		return nil, err
+	} else if rpi == nil {
+		return nil, ErrRetentionPolicyNotFound
+	}
+	groups := make([]ShardGroupInfo, 0, len(rpi.ShardGroups))
+	for _, g := range rpi.ShardGroups {
+		if g.Deleted() || !g.Overlaps(tmin, tmax) {
+			continue
+		}
+		groups = append(groups, g)
+	}
+	sort.Sort(ShardGroupInfos(groups))
+	return groups, nil
+}
+
+// ShardGroupByTimestamp returns the shard group on a database and policy for a given timestamp.
+func (data *Data) ShardGroupByTimestamp(database, policy string, timestamp time.Time) (*ShardGroupInfo, error) {
+	// Find retention policy.
+	rpi, err := data.RetentionPolicy(database, policy)
+	if err != nil {
+		return nil, err
+	} else if rpi == nil {
+		return nil, ErrRetentionPolicyNotFound
+	}
+
+	return rpi.ShardGroupByTimestamp(timestamp), nil
+}
+
+// CreateShardGroup creates a shard group on a database and policy for a given timestamp.
+func (data *Data) CreateShardGroup(database, policy string, timestamp time.Time) error {
+	// Ensure there are nodes in the metadata.
+	if len(data.Nodes) == 0 {
+		return ErrNodesRequired
+	}
+
+	// Find retention policy.
+	rpi, err := data.RetentionPolicy(database, policy)
+	if err != nil {
+		return err
+	} else if rpi == nil {
+		return ErrRetentionPolicyNotFound
+	}
+
+	// Verify that shard group doesn't already exist for this timestamp.
+	if rpi.ShardGroupByTimestamp(timestamp) != nil {
+		return ErrShardGroupExists
+	}
+
+	// Require at least one replica but no more replicas than nodes.
+	replicaN := rpi.ReplicaN
+	if replicaN == 0 {
+		replicaN = 1
+	} else if replicaN > len(data.Nodes) {
+		replicaN = len(data.Nodes)
+	}
+
+	// Determine shard count by node count divided by replication factor.
+	// This will ensure nodes will get distributed across nodes evenly and
+	// replicated the correct number of times.
+	shardN := len(data.Nodes) / replicaN
+
+	// Create the shard group.
+	data.MaxShardGroupID++
+	sgi := ShardGroupInfo{}
+	sgi.ID = data.MaxShardGroupID
+	sgi.StartTime = timestamp.Truncate(rpi.ShardGroupDuration).UTC()
+	sgi.EndTime = sgi.StartTime.Add(rpi.ShardGroupDuration).UTC()
+
+	// Create shards on the group.
+	sgi.Shards = make([]ShardInfo, shardN)
+	for i := range sgi.Shards {
+		data.MaxShardID++
+		sgi.Shards[i] = ShardInfo{ID: data.MaxShardID}
+	}
+
+	// Assign data nodes to shards via round robin.
+	// Start from a repeatably "random" place in the node list.
+	nodeIndex := int(data.Index % uint64(len(data.Nodes)))
+	for i := range sgi.Shards {
+		si := &sgi.Shards[i]
+		for j := 0; j < replicaN; j++ {
+			nodeID := data.Nodes[nodeIndex%len(data.Nodes)].ID
+			si.OwnerIDs = append(si.OwnerIDs, nodeID)
+			nodeIndex++
+		}
+	}
+
+	// Retention policy has a new shard group, so update the policy.
+	rpi.ShardGroups = append(rpi.ShardGroups, sgi)
+
+	return nil
+}
+
+// DeleteShardGroup removes a shard group from a database and retention policy by id.
+func (data *Data) DeleteShardGroup(database, policy string, id uint64) error {
+	// Find retention policy.
+	rpi, err := data.RetentionPolicy(database, policy)
+	if err != nil {
+		return err
+	} else if rpi == nil {
+		return ErrRetentionPolicyNotFound
+	}
+
+	// Find shard group by ID and set its deletion timestamp.
+	for i := range rpi.ShardGroups {
+		if rpi.ShardGroups[i].ID == id {
+			rpi.ShardGroups[i].DeletedAt = time.Now().UTC()
+			return nil
+		}
+	}
+
+	return ErrShardGroupNotFound
+}
+
+// CreateContinuousQuery adds a named continuous query to a database.
+func (data *Data) CreateContinuousQuery(database, name, query string) error {
+	di := data.Database(database)
+	if di == nil {
+		return ErrDatabaseNotFound
+	}
+
+	// Ensure the name doesn't already exist.
+	for i := range di.ContinuousQueries {
+		if di.ContinuousQueries[i].Name == name {
+			return ErrContinuousQueryExists
+		}
+	}
+
+	// Append new query.
+	di.ContinuousQueries = append(di.ContinuousQueries, ContinuousQueryInfo{
+		Name:  name,
+		Query: query,
+	})
+
+	return nil
+}
+
+// DropContinuousQuery removes a continuous query.
+func (data *Data) DropContinuousQuery(database, name string) error {
+	di := data.Database(database)
+	if di == nil {
+		return ErrDatabaseNotFound
+	}
+
+	for i := range di.ContinuousQueries {
+		if di.ContinuousQueries[i].Name == name {
+			di.ContinuousQueries = append(di.ContinuousQueries[:i], di.ContinuousQueries[i+1:]...)
+			return nil
+		}
+	}
+	return ErrContinuousQueryNotFound
+}
+
+// User returns a user by username.
+func (data *Data) User(username string) *UserInfo {
+	for i := range data.Users {
+		if data.Users[i].Name == username {
+			return &data.Users[i]
+		}
+	}
+	return nil
+}
+
+// CreateUser creates a new user.
+func (data *Data) CreateUser(name, hash string, admin bool) error {
+	// Ensure the user doesn't already exist.
+	if name == "" {
+		return ErrUsernameRequired
+	} else if data.User(name) != nil {
+		return ErrUserExists
+	}
+
+	// Append new user.
+	data.Users = append(data.Users, UserInfo{
+		Name:  name,
+		Hash:  hash,
+		Admin: admin,
+	})
+
+	return nil
+}
+
+// DropUser removes an existing user by name.
+func (data *Data) DropUser(name string) error {
+	for i := range data.Users {
+		if data.Users[i].Name == name {
+			data.Users = append(data.Users[:i], data.Users[i+1:]...)
+			return nil
+		}
+	}
+	return ErrUserNotFound
+}
+
+// UpdateUser updates the password hash of an existing user.
+func (data *Data) UpdateUser(name, hash string) error {
+	for i := range data.Users {
+		if data.Users[i].Name == name {
+			data.Users[i].Hash = hash
+			return nil
+		}
+	}
+	return ErrUserNotFound
+}
+
+// SetPrivilege sets a privilege for a user on a database.
+func (data *Data) SetPrivilege(name, database string, p influxql.Privilege) error {
+	ui := data.User(name)
+	if ui == nil {
+		return ErrUserNotFound
+	}
+
+	if ui.Privileges == nil {
+		ui.Privileges = make(map[string]influxql.Privilege)
+	}
+	ui.Privileges[database] = p
+
+	return nil
+}
+
+// SetAdminPrivilege sets the admin privilege for a user.
+func (data *Data) SetAdminPrivilege(name string, admin bool) error {
+	ui := data.User(name)
+	if ui == nil {
+		return ErrUserNotFound
+	}
+
+	ui.Admin = admin
+
+	return nil
+}
+
+// UserPrivileges gets the privileges for a user.
+func (data *Data) UserPrivileges(name string) (map[string]influxql.Privilege, error) {
+	ui := data.User(name)
+	if ui == nil {
+		return nil, ErrUserNotFound
+	}
+
+	return ui.Privileges, nil
+}
+
+// UserPrivilege gets the privilege for a user on a database.
+func (data *Data) UserPrivilege(name, database string) (*influxql.Privilege, error) {
+	ui := data.User(name)
+	if ui == nil {
+		return nil, ErrUserNotFound
+	}
+
+	for db, p := range ui.Privileges {
+		if db == database {
+			return &p, nil
+		}
+	}
+
+	return influxql.NewPrivilege(influxql.NoPrivileges), nil
+}
+
+// Clone returns a copy of data with a new version.
+func (data *Data) Clone() *Data {
+	other := *data
+
+	// Copy nodes.
+	if data.Nodes != nil {
+		other.Nodes = make([]NodeInfo, len(data.Nodes))
+		for i := range data.Nodes {
+			other.Nodes[i] = data.Nodes[i].clone()
+		}
+	}
+
+	// Deep copy databases.
+	if data.Databases != nil {
+		other.Databases = make([]DatabaseInfo, len(data.Databases))
+		for i := range data.Databases {
+			other.Databases[i] = data.Databases[i].clone()
+		}
+	}
+
+	// Copy users.
+	if data.Users != nil {
+		other.Users = make([]UserInfo, len(data.Users))
+		for i := range data.Users {
+			other.Users[i] = data.Users[i].clone()
+		}
+	}
+
+	return &other
+}
+
+// marshal serializes to a protobuf representation.
+func (data *Data) marshal() *internal.Data {
+	pb := &internal.Data{
+		Term:      proto.Uint64(data.Term),
+		Index:     proto.Uint64(data.Index),
+		ClusterID: proto.Uint64(data.ClusterID),
+
+		MaxNodeID:       proto.Uint64(data.MaxNodeID),
+		MaxShardGroupID: proto.Uint64(data.MaxShardGroupID),
+		MaxShardID:      proto.Uint64(data.MaxShardID),
+	}
+
+	pb.Nodes = make([]*internal.NodeInfo, len(data.Nodes))
+	for i := range data.Nodes {
+		pb.Nodes[i] = data.Nodes[i].marshal()
+	}
+
+	pb.Databases = make([]*internal.DatabaseInfo, len(data.Databases))
+	for i := range data.Databases {
+		pb.Databases[i] = data.Databases[i].marshal()
+	}
+
+	pb.Users = make([]*internal.UserInfo, len(data.Users))
+	for i := range data.Users {
+		pb.Users[i] = data.Users[i].marshal()
+	}
+
+	return pb
+}
+
+// unmarshal deserializes from a protobuf representation.
+func (data *Data) unmarshal(pb *internal.Data) {
+	data.Term = pb.GetTerm()
+	data.Index = pb.GetIndex()
+	data.ClusterID = pb.GetClusterID()
+
+	data.MaxNodeID = pb.GetMaxNodeID()
+	data.MaxShardGroupID = pb.GetMaxShardGroupID()
+	data.MaxShardID = pb.GetMaxShardID()
+
+	data.Nodes = make([]NodeInfo, len(pb.GetNodes()))
+	for i, x := range pb.GetNodes() {
+		data.Nodes[i].unmarshal(x)
+	}
+
+	data.Databases = make([]DatabaseInfo, len(pb.GetDatabases()))
+	for i, x := range pb.GetDatabases() {
+		data.Databases[i].unmarshal(x)
+	}
+
+	data.Users = make([]UserInfo, len(pb.GetUsers()))
+	for i, x := range pb.GetUsers() {
+		data.Users[i].unmarshal(x)
+	}
+}
+
+// MarshalBinary encodes the metadata to a binary format.
+func (data *Data) MarshalBinary() ([]byte, error) {
+	return proto.Marshal(data.marshal())
+}
+
+// UnmarshalBinary decodes the object from a binary format.
+func (data *Data) UnmarshalBinary(buf []byte) error {
+	var pb internal.Data
+	if err := proto.Unmarshal(buf, &pb); err != nil {
+		return err
+	}
+	data.unmarshal(&pb)
+	return nil
+}
+
+// NodeInfo represents information about a single node in the cluster.
+type NodeInfo struct {
+	ID   uint64
+	Host string
+}
+
+// clone returns a deep copy of ni.
+func (ni NodeInfo) clone() NodeInfo { return ni }
+
+// marshal serializes to a protobuf representation.
+func (ni NodeInfo) marshal() *internal.NodeInfo {
+	pb := &internal.NodeInfo{}
+	pb.ID = proto.Uint64(ni.ID)
+	pb.Host = proto.String(ni.Host)
+	return pb
+}
+
+// unmarshal deserializes from a protobuf representation.
+func (ni *NodeInfo) unmarshal(pb *internal.NodeInfo) {
+	ni.ID = pb.GetID()
+	ni.Host = pb.GetHost()
+}
+
+// DatabaseInfo represents information about a database in the system.
+type DatabaseInfo struct {
+	Name                   string
+	DefaultRetentionPolicy string
+	RetentionPolicies      []RetentionPolicyInfo
+	ContinuousQueries      []ContinuousQueryInfo
+}
+
+// RetentionPolicy returns a retention policy by name.
+func (di DatabaseInfo) RetentionPolicy(name string) *RetentionPolicyInfo {
+	for i := range di.RetentionPolicies {
+		if di.RetentionPolicies[i].Name == name {
+			return &di.RetentionPolicies[i]
+		}
+	}
+	return nil
+}
+
+// clone returns a deep copy of di.
+func (di DatabaseInfo) clone() DatabaseInfo {
+	other := di
+
+	if di.RetentionPolicies != nil {
+		other.RetentionPolicies = make([]RetentionPolicyInfo, len(di.RetentionPolicies))
+		for i := range di.RetentionPolicies {
+			other.RetentionPolicies[i] = di.RetentionPolicies[i].clone()
+		}
+	}
+
+	// Copy continuous queries.
+	if di.ContinuousQueries != nil {
+		other.ContinuousQueries = make([]ContinuousQueryInfo, len(di.ContinuousQueries))
+		for i := range di.ContinuousQueries {
+			other.ContinuousQueries[i] = di.ContinuousQueries[i].clone()
+		}
+	}
+
+	return other
+}
+
+// marshal serializes to a protobuf representation.
+func (di DatabaseInfo) marshal() *internal.DatabaseInfo {
+	pb := &internal.DatabaseInfo{}
+	pb.Name = proto.String(di.Name)
+	pb.DefaultRetentionPolicy = proto.String(di.DefaultRetentionPolicy)
+
+	pb.RetentionPolicies = make([]*internal.RetentionPolicyInfo, len(di.RetentionPolicies))
+	for i := range di.RetentionPolicies {
+		pb.RetentionPolicies[i] = di.RetentionPolicies[i].marshal()
+	}
+
+	pb.ContinuousQueries = make([]*internal.ContinuousQueryInfo, len(di.ContinuousQueries))
+	for i := range di.ContinuousQueries {
+		pb.ContinuousQueries[i] = di.ContinuousQueries[i].marshal()
+	}
+	return pb
+}
+
+// unmarshal deserializes from a protobuf representation.
+func (di *DatabaseInfo) unmarshal(pb *internal.DatabaseInfo) {
+	di.Name = pb.GetName()
+	di.DefaultRetentionPolicy = pb.GetDefaultRetentionPolicy()
+
+	di.RetentionPolicies = make([]RetentionPolicyInfo, len(pb.GetRetentionPolicies()))
+	for i, x := range pb.GetRetentionPolicies() {
+		di.RetentionPolicies[i].unmarshal(x)
+	}
+
+	di.ContinuousQueries = make([]ContinuousQueryInfo, len(pb.GetContinuousQueries()))
+	for i, x := range pb.GetContinuousQueries() {
+		di.ContinuousQueries[i].unmarshal(x)
+	}
+}
+
+// RetentionPolicyInfo represents metadata about a retention policy.
+type RetentionPolicyInfo struct {
+	Name               string
+	ReplicaN           int
+	Duration           time.Duration
+	ShardGroupDuration time.Duration
+	ShardGroups        []ShardGroupInfo
+}
+
+// NewRetentionPolicyInfo returns a new instance of RetentionPolicyInfo with defaults set.
+func NewRetentionPolicyInfo(name string) *RetentionPolicyInfo {
+	return &RetentionPolicyInfo{
+		Name:     name,
+		ReplicaN: DefaultRetentionPolicyReplicaN,
+		Duration: DefaultRetentionPolicyDuration,
+	}
+}
+
+// ShardGroupByTimestamp returns the shard group in the policy that contains the timestamp.
+func (rpi *RetentionPolicyInfo) ShardGroupByTimestamp(timestamp time.Time) *ShardGroupInfo {
+	for i := range rpi.ShardGroups {
+		if rpi.ShardGroups[i].Contains(timestamp) && !rpi.ShardGroups[i].Deleted() {
+			return &rpi.ShardGroups[i]
+		}
+	}
+	return nil
+}
+
+// ExpiredShardGroups returns the Shard Groups which are considered expired, for the given time.
+func (rpi *RetentionPolicyInfo) ExpiredShardGroups(t time.Time) []*ShardGroupInfo {
+	groups := make([]*ShardGroupInfo, 0)
+	for i := range rpi.ShardGroups {
+		if rpi.ShardGroups[i].Deleted() {
+			continue
+		}
+		if rpi.Duration != 0 && rpi.ShardGroups[i].EndTime.Add(rpi.Duration).Before(t) {
+			groups = append(groups, &rpi.ShardGroups[i])
+		}
+	}
+	return groups
+}
+
+// DeletedShardGroups returns the Shard Groups which are marked as deleted.
+func (rpi *RetentionPolicyInfo) DeletedShardGroups() []*ShardGroupInfo {
+	groups := make([]*ShardGroupInfo, 0)
+	for i := range rpi.ShardGroups {
+		if rpi.ShardGroups[i].Deleted() {
+			groups = append(groups, &rpi.ShardGroups[i])
+		}
+	}
+	return groups
+}
+
+// marshal serializes to a protobuf representation.
+func (rpi *RetentionPolicyInfo) marshal() *internal.RetentionPolicyInfo {
+	pb := &internal.RetentionPolicyInfo{
+		Name:               proto.String(rpi.Name),
+		ReplicaN:           proto.Uint32(uint32(rpi.ReplicaN)),
+		Duration:           proto.Int64(int64(rpi.Duration)),
+		ShardGroupDuration: proto.Int64(int64(rpi.ShardGroupDuration)),
+	}
+
+	pb.ShardGroups = make([]*internal.ShardGroupInfo, len(rpi.ShardGroups))
+	for i, sgi := range rpi.ShardGroups {
+		pb.ShardGroups[i] = sgi.marshal()
+	}
+
+	return pb
+}
+
+// unmarshal deserializes from a protobuf representation.
+func (rpi *RetentionPolicyInfo) unmarshal(pb *internal.RetentionPolicyInfo) {
+	rpi.Name = pb.GetName()
+	rpi.ReplicaN = int(pb.GetReplicaN())
+	rpi.Duration = time.Duration(pb.GetDuration())
+	rpi.ShardGroupDuration = time.Duration(pb.GetShardGroupDuration())
+
+	rpi.ShardGroups = make([]ShardGroupInfo, len(pb.GetShardGroups()))
+	for i, x := range pb.GetShardGroups() {
+		rpi.ShardGroups[i].unmarshal(x)
+	}
+}
+
+// clone returns a deep copy of rpi.
+func (rpi RetentionPolicyInfo) clone() RetentionPolicyInfo {
+	other := rpi
+
+	if rpi.ShardGroups != nil {
+		other.ShardGroups = make([]ShardGroupInfo, len(rpi.ShardGroups))
+		for i := range rpi.ShardGroups {
+			other.ShardGroups[i] = rpi.ShardGroups[i].clone()
+		}
+	}
+
+	return other
+}
+
+// shardGroupDuration returns the duration for a shard group based on a policy duration.
+func shardGroupDuration(d time.Duration) time.Duration {
+	if d >= 180*24*time.Hour || d == 0 { // 6 months or 0
+		return 7 * 24 * time.Hour
+	} else if d >= 2*24*time.Hour { // 2 days
+		return 1 * 24 * time.Hour
+	}
+	return 1 * time.Hour
+}
+
+// ShardGroupInfo represents metadata about a shard group. The DeletedAt field is important
+// because it makes it clear that a ShardGroup has been marked as deleted, and allow the system
+// to be sure that a ShardGroup is not simply missing. If the DeletedAt is set, the system can
+// safely delete any associated shards.
+type ShardGroupInfo struct {
+	ID        uint64
+	StartTime time.Time
+	EndTime   time.Time
+	DeletedAt time.Time
+	Shards    []ShardInfo
+}
+
+type ShardGroupInfos []ShardGroupInfo
+
+func (a ShardGroupInfos) Len() int           { return len(a) }
+func (a ShardGroupInfos) Less(i, j int) bool { return a[i].StartTime.Before(a[j].StartTime) }
+func (a ShardGroupInfos) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+
+// Contains return true if the shard group contains data for the timestamp.
+func (sgi *ShardGroupInfo) Contains(timestamp time.Time) bool {
+	return !sgi.StartTime.After(timestamp) && sgi.EndTime.After(timestamp)
+}
+
+// Overlaps return whether the shard group contains data for the time range between min and max
+func (sgi *ShardGroupInfo) Overlaps(min, max time.Time) bool {
+	return !sgi.StartTime.After(max) && sgi.EndTime.After(min)
+}
+
+// Deleted returns whether this ShardGroup has been deleted.
+func (sgi *ShardGroupInfo) Deleted() bool {
+	return !sgi.DeletedAt.IsZero()
+}
+
+// clone returns a deep copy of sgi.
+func (sgi ShardGroupInfo) clone() ShardGroupInfo {
+	other := sgi
+
+	if sgi.Shards != nil {
+		other.Shards = make([]ShardInfo, len(sgi.Shards))
+		for i := range sgi.Shards {
+			other.Shards[i] = sgi.Shards[i].clone()
+		}
+	}
+
+	return other
+}
+
+// ShardFor returns the ShardInfo for a Point hash
+func (s *ShardGroupInfo) ShardFor(hash uint64) ShardInfo {
+	return s.Shards[hash%uint64(len(s.Shards))]
+}
+
+// marshal serializes to a protobuf representation.
+func (sgi *ShardGroupInfo) marshal() *internal.ShardGroupInfo {
+	pb := &internal.ShardGroupInfo{
+		ID:        proto.Uint64(sgi.ID),
+		StartTime: proto.Int64(MarshalTime(sgi.StartTime)),
+		EndTime:   proto.Int64(MarshalTime(sgi.EndTime)),
+		DeletedAt: proto.Int64(MarshalTime(sgi.DeletedAt)),
+	}
+
+	pb.Shards = make([]*internal.ShardInfo, len(sgi.Shards))
+	for i := range sgi.Shards {
+		pb.Shards[i] = sgi.Shards[i].marshal()
+	}
+
+	return pb
+}
+
+// unmarshal deserializes from a protobuf representation.
+func (sgi *ShardGroupInfo) unmarshal(pb *internal.ShardGroupInfo) {
+	sgi.ID = pb.GetID()
+	sgi.StartTime = UnmarshalTime(pb.GetStartTime())
+	sgi.EndTime = UnmarshalTime(pb.GetEndTime())
+	sgi.DeletedAt = UnmarshalTime(pb.GetDeletedAt())
+
+	sgi.Shards = make([]ShardInfo, len(pb.GetShards()))
+	for i, x := range pb.GetShards() {
+		sgi.Shards[i].unmarshal(x)
+	}
+}
+
+// ShardInfo represents metadata about a shard.
+type ShardInfo struct {
+	ID       uint64
+	OwnerIDs []uint64
+}
+
+// OwnedBy returns whether the shard's owner IDs includes nodeID.
+func (si ShardInfo) OwnedBy(nodeID uint64) bool {
+	for _, id := range si.OwnerIDs {
+		if id == nodeID {
+			return true
+		}
+	}
+	return false
+}
+
+// clone returns a deep copy of si.
+func (si ShardInfo) clone() ShardInfo {
+	other := si
+
+	if si.OwnerIDs != nil {
+		other.OwnerIDs = make([]uint64, len(si.OwnerIDs))
+		copy(other.OwnerIDs, si.OwnerIDs)
+	}
+
+	return other
+}
+
+// marshal serializes to a protobuf representation.
+func (si ShardInfo) marshal() *internal.ShardInfo {
+	pb := &internal.ShardInfo{
+		ID: proto.Uint64(si.ID),
+	}
+
+	pb.OwnerIDs = make([]uint64, len(si.OwnerIDs))
+	copy(pb.OwnerIDs, si.OwnerIDs)
+
+	return pb
+}
+
+// unmarshal deserializes from a protobuf representation.
+func (si *ShardInfo) unmarshal(pb *internal.ShardInfo) {
+	si.ID = pb.GetID()
+	si.OwnerIDs = make([]uint64, len(pb.GetOwnerIDs()))
+	copy(si.OwnerIDs, pb.GetOwnerIDs())
+}
+
+// ContinuousQueryInfo represents metadata about a continuous query.
+type ContinuousQueryInfo struct {
+	Name  string
+	Query string
+}
+
+// clone returns a deep copy of cqi.
+func (cqi ContinuousQueryInfo) clone() ContinuousQueryInfo { return cqi }
+
+// marshal serializes to a protobuf representation.
+func (cqi ContinuousQueryInfo) marshal() *internal.ContinuousQueryInfo {
+	return &internal.ContinuousQueryInfo{
+		Name:  proto.String(cqi.Name),
+		Query: proto.String(cqi.Query),
+	}
+}
+
+// unmarshal deserializes from a protobuf representation.
+func (cqi *ContinuousQueryInfo) unmarshal(pb *internal.ContinuousQueryInfo) {
+	cqi.Name = pb.GetName()
+	cqi.Query = pb.GetQuery()
+}
+
+// UserInfo represents metadata about a user in the system.
+type UserInfo struct {
+	Name       string
+	Hash       string
+	Admin      bool
+	Privileges map[string]influxql.Privilege
+}
+
+// Authorize returns true if the user is authorized and false if not.
+func (ui *UserInfo) Authorize(privilege influxql.Privilege, database string) bool {
+	if ui.Admin {
+		return true
+	}
+	p, ok := ui.Privileges[database]
+	return ok && (p == privilege || p == influxql.AllPrivileges)
+}
+
+// clone returns a deep copy of si.
+func (ui UserInfo) clone() UserInfo {
+	other := ui
+
+	if ui.Privileges != nil {
+		other.Privileges = make(map[string]influxql.Privilege)
+		for k, v := range ui.Privileges {
+			other.Privileges[k] = v
+		}
+	}
+
+	return other
+}
+
+// marshal serializes to a protobuf representation.
+func (ui UserInfo) marshal() *internal.UserInfo {
+	pb := &internal.UserInfo{
+		Name:  proto.String(ui.Name),
+		Hash:  proto.String(ui.Hash),
+		Admin: proto.Bool(ui.Admin),
+	}
+
+	for database, privilege := range ui.Privileges {
+		pb.Privileges = append(pb.Privileges, &internal.UserPrivilege{
+			Database:  proto.String(database),
+			Privilege: proto.Int32(int32(privilege)),
+		})
+	}
+
+	return pb
+}
+
+// unmarshal deserializes from a protobuf representation.
+func (ui *UserInfo) unmarshal(pb *internal.UserInfo) {
+	ui.Name = pb.GetName()
+	ui.Hash = pb.GetHash()
+	ui.Admin = pb.GetAdmin()
+
+	ui.Privileges = make(map[string]influxql.Privilege)
+	for _, p := range pb.GetPrivileges() {
+		ui.Privileges[p.GetDatabase()] = influxql.Privilege(p.GetPrivilege())
+	}
+}
+
+// MarshalTime converts t to nanoseconds since epoch. A zero time returns 0.
+func MarshalTime(t time.Time) int64 {
+	if t.IsZero() {
+		return 0
+	}
+	return t.UnixNano()
+}
+
+// UnmarshalTime converts nanoseconds since epoch to time.
+// A zero value returns a zero time.
+func UnmarshalTime(v int64) time.Time {
+	if v == 0 {
+		return time.Time{}
+	}
+	return time.Unix(0, v).UTC()
+}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/errors.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/errors.go
new file mode 100644
index 00000000000..dfc69aa360f
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/errors.go
@@ -0,0 +1,116 @@
+package meta
+
+import (
+	"errors"
+	"fmt"
+)
+
+var (
+	// ErrStoreOpen is returned when opening an already open store.
+	ErrStoreOpen = errors.New("store already open")
+
+	// ErrStoreClosed is returned when closing an already closed store.
+	ErrStoreClosed = errors.New("raft store already closed")
+
+	// ErrTooManyPeers is returned when more than 3 peers are used.
+	ErrTooManyPeers = errors.New("too many peers; influxdb v0.9.0 is limited to 3 nodes in a cluster")
+)
+
+var (
+	// ErrNodeExists is returned when creating an already existing node.
+	ErrNodeExists = errors.New("node already exists")
+
+	// ErrNodeNotFound is returned when mutating a node that doesn't exist.
+	ErrNodeNotFound = errors.New("node not found")
+
+	// ErrNodesRequired is returned when at least one node is required for an operation.
+	// This occurs when creating a shard group.
+	ErrNodesRequired = errors.New("at least one node required")
+)
+
+var (
+	// ErrDatabaseExists is returned when creating an already existing database.
+	ErrDatabaseExists = errors.New("database already exists")
+
+	// ErrDatabaseNotFound is returned when mutating a database that doesn't exist.
+	ErrDatabaseNotFound = errors.New("database not found")
+
+	// ErrDatabaseNameRequired is returned when creating a database without a name.
+	ErrDatabaseNameRequired = errors.New("database name required")
+)
+
+var (
+	// ErrRetentionPolicyExists is returned when creating an already existing policy.
+	ErrRetentionPolicyExists = errors.New("retention policy already exists")
+
+	// ErrRetentionPolicyNotFound is returned when mutating a policy that doesn't exist.
+	ErrRetentionPolicyNotFound = errors.New("retention policy not found")
+
+	// ErrRetentionPolicyNameRequired is returned when creating a policy without a name.
+	ErrRetentionPolicyNameRequired = errors.New("retention policy name required")
+
+	// ErrRetentionPolicyNameExists is returned when renaming a policy to
+	// the same name as another existing policy.
+	ErrRetentionPolicyNameExists = errors.New("retention policy name already exists")
+
+	// ErrRetentionPolicyDurationTooLow is returned when updating a retention
+	// policy that has a duration lower than the allowed minimum.
+	ErrRetentionPolicyDurationTooLow = errors.New(fmt.Sprintf("retention policy duration must be at least %s",
+		RetentionPolicyMinDuration))
+
+	// ErrReplicationFactorMismatch is returned when the replication factor
+	// does not match the number of nodes in the cluster. This is a temporary
+	// restriction until v0.9.1 is released.
+	ErrReplicationFactorMismatch = errors.New("replication factor must match cluster size; this limitation will be lifted in v0.9.1")
+)
+
+var (
+	// ErrShardGroupExists is returned when creating an already existing shard group.
+	ErrShardGroupExists = errors.New("shard group already exists")
+
+	// ErrShardGroupNotFound is returned when mutating a shard group that doesn't exist.
+	ErrShardGroupNotFound = errors.New("shard group not found")
+)
+
+var (
+	// ErrContinuousQueryExists is returned when creating an already existing continuous query.
+	ErrContinuousQueryExists = errors.New("continuous query already exists")
+
+	// ErrContinuousQueryNotFound is returned when removing a continuous query that doesn't exist.
+	ErrContinuousQueryNotFound = errors.New("continuous query not found")
+)
+
+var (
+	// ErrUserExists is returned when creating an already existing user.
+	ErrUserExists = errors.New("user already exists")
+
+	// ErrUserNotFound is returned when mutating a user that doesn't exist.
+	ErrUserNotFound = errors.New("user not found")
+
+	// ErrUsernameRequired is returned when creating a user without a username.
+	ErrUsernameRequired = errors.New("username required")
+)
+
+var errs = [...]error{
+	ErrStoreOpen, ErrStoreClosed,
+	ErrNodeExists, ErrNodeNotFound,
+	ErrDatabaseExists, ErrDatabaseNotFound, ErrDatabaseNameRequired,
+}
+
+// errLookup stores a mapping of error strings to well defined error types.
+var errLookup = make(map[string]error)
+
+func init() {
+	for _, err := range errs {
+		errLookup[err.Error()] = err
+	}
+}
+
+// lookupError returns a known error reference, if one exists.
+// Otherwise returns err.
+func lookupError(err error) error {
+	if e, ok := errLookup[err.Error()]; ok {
+		return e
+	}
+	return err
+}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/internal/meta.pb.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/internal/meta.pb.go
new file mode 100644
index 00000000000..979c842b9ed
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/internal/meta.pb.go
@@ -0,0 +1,1167 @@
+// Code generated by protoc-gen-gogo.
+// source: internal/meta.proto
+// DO NOT EDIT!
+
+/*
+Package internal is a generated protocol buffer package.
+
+It is generated from these files:
+	internal/meta.proto
+
+It has these top-level messages:
+	Data
+	NodeInfo
+	DatabaseInfo
+	RetentionPolicyInfo
+	ShardGroupInfo
+	ShardInfo
+	ContinuousQueryInfo
+	UserInfo
+	UserPrivilege
+	Command
+	CreateNodeCommand
+	DeleteNodeCommand
+	CreateDatabaseCommand
+	DropDatabaseCommand
+	CreateRetentionPolicyCommand
+	DropRetentionPolicyCommand
+	SetDefaultRetentionPolicyCommand
+	UpdateRetentionPolicyCommand
+	CreateShardGroupCommand
+	DeleteShardGroupCommand
+	CreateContinuousQueryCommand
+	DropContinuousQueryCommand
+	CreateUserCommand
+	DropUserCommand
+	UpdateUserCommand
+	SetPrivilegeCommand
+	SetDataCommand
+	SetAdminPrivilegeCommand
+	Response
+*/
+package internal
+
+import proto "github.com/gogo/protobuf/proto"
+import math "math"
+
+// Reference imports to suppress errors if they are not otherwise used.
+var _ = proto.Marshal
+var _ = math.Inf
+
+type Command_Type int32
+
+const (
+	Command_CreateNodeCommand                Command_Type = 1
+	Command_DeleteNodeCommand                Command_Type = 2
+	Command_CreateDatabaseCommand            Command_Type = 3
+	Command_DropDatabaseCommand              Command_Type = 4
+	Command_CreateRetentionPolicyCommand     Command_Type = 5
+	Command_DropRetentionPolicyCommand       Command_Type = 6
+	Command_SetDefaultRetentionPolicyCommand Command_Type = 7
+	Command_UpdateRetentionPolicyCommand     Command_Type = 8
+	Command_CreateShardGroupCommand          Command_Type = 9
+	Command_DeleteShardGroupCommand          Command_Type = 10
+	Command_CreateContinuousQueryCommand     Command_Type = 11
+	Command_DropContinuousQueryCommand       Command_Type = 12
+	Command_CreateUserCommand                Command_Type = 13
+	Command_DropUserCommand                  Command_Type = 14
+	Command_UpdateUserCommand                Command_Type = 15
+	Command_SetPrivilegeCommand              Command_Type = 16
+	Command_SetDataCommand                   Command_Type = 17
+	Command_SetAdminPrivilegeCommand         Command_Type = 18
+)
+
+var Command_Type_name = map[int32]string{
+	1:  "CreateNodeCommand",
+	2:  "DeleteNodeCommand",
+	3:  "CreateDatabaseCommand",
+	4:  "DropDatabaseCommand",
+	5:  "CreateRetentionPolicyCommand",
+	6:  "DropRetentionPolicyCommand",
+	7:  "SetDefaultRetentionPolicyCommand",
+	8:  "UpdateRetentionPolicyCommand",
+	9:  "CreateShardGroupCommand",
+	10: "DeleteShardGroupCommand",
+	11: "CreateContinuousQueryCommand",
+	12: "DropContinuousQueryCommand",
+	13: "CreateUserCommand",
+	14: "DropUserCommand",
+	15: "UpdateUserCommand",
+	16: "SetPrivilegeCommand",
+	17: "SetDataCommand",
+	18: "SetAdminPrivilegeCommand",
+}
+var Command_Type_value = map[string]int32{
+	"CreateNodeCommand":                1,
+	"DeleteNodeCommand":                2,
+	"CreateDatabaseCommand":            3,
+	"DropDatabaseCommand":              4,
+	"CreateRetentionPolicyCommand":     5,
+	"DropRetentionPolicyCommand":       6,
+	"SetDefaultRetentionPolicyCommand": 7,
+	"UpdateRetentionPolicyCommand":     8,
+	"CreateShardGroupCommand":          9,
+	"DeleteShardGroupCommand":          10,
+	"CreateContinuousQueryCommand":     11,
+	"DropContinuousQueryCommand":       12,
+	"CreateUserCommand":                13,
+	"DropUserCommand":                  14,
+	"UpdateUserCommand":                15,
+	"SetPrivilegeCommand":              16,
+	"SetDataCommand":                   17,
+	"SetAdminPrivilegeCommand":         18,
+}
+
+func (x Command_Type) Enum() *Command_Type {
+	p := new(Command_Type)
+	*p = x
+	return p
+}
+func (x Command_Type) String() string {
+	return proto.EnumName(Command_Type_name, int32(x))
+}
+func (x *Command_Type) UnmarshalJSON(data []byte) error {
+	value, err := proto.UnmarshalJSONEnum(Command_Type_value, data, "Command_Type")
+	if err != nil {
+		return err
+	}
+	*x = Command_Type(value)
+	return nil
+}
+
+type Data struct {
+	Term             *uint64         `protobuf:"varint,1,req" json:"Term,omitempty"`
+	Index            *uint64         `protobuf:"varint,2,req" json:"Index,omitempty"`
+	ClusterID        *uint64         `protobuf:"varint,3,req" json:"ClusterID,omitempty"`
+	Nodes            []*NodeInfo     `protobuf:"bytes,4,rep" json:"Nodes,omitempty"`
+	Databases        []*DatabaseInfo `protobuf:"bytes,5,rep" json:"Databases,omitempty"`
+	Users            []*UserInfo     `protobuf:"bytes,6,rep" json:"Users,omitempty"`
+	MaxNodeID        *uint64         `protobuf:"varint,7,req" json:"MaxNodeID,omitempty"`
+	MaxShardGroupID  *uint64         `protobuf:"varint,8,req" json:"MaxShardGroupID,omitempty"`
+	MaxShardID       *uint64         `protobuf:"varint,9,req" json:"MaxShardID,omitempty"`
+	XXX_unrecognized []byte          `json:"-"`
+}
+
+func (m *Data) Reset()         { *m = Data{} }
+func (m *Data) String() string { return proto.CompactTextString(m) }
+func (*Data) ProtoMessage()    {}
+
+func (m *Data) GetTerm() uint64 {
+	if m != nil && m.Term != nil {
+		return *m.Term
+	}
+	return 0
+}
+
+func (m *Data) GetIndex() uint64 {
+	if m != nil && m.Index != nil {
+		return *m.Index
+	}
+	return 0
+}
+
+func (m *Data) GetClusterID() uint64 {
+	if m != nil && m.ClusterID != nil {
+		return *m.ClusterID
+	}
+	return 0
+}
+
+func (m *Data) GetNodes() []*NodeInfo {
+	if m != nil {
+		return m.Nodes
+	}
+	return nil
+}
+
+func (m *Data) GetDatabases() []*DatabaseInfo {
+	if m != nil {
+		return m.Databases
+	}
+	return nil
+}
+
+func (m *Data) GetUsers() []*UserInfo {
+	if m != nil {
+		return m.Users
+	}
+	return nil
+}
+
+func (m *Data) GetMaxNodeID() uint64 {
+	if m != nil && m.MaxNodeID != nil {
+		return *m.MaxNodeID
+	}
+	return 0
+}
+
+func (m *Data) GetMaxShardGroupID() uint64 {
+	if m != nil && m.MaxShardGroupID != nil {
+		return *m.MaxShardGroupID
+	}
+	return 0
+}
+
+func (m *Data) GetMaxShardID() uint64 {
+	if m != nil && m.MaxShardID != nil {
+		return *m.MaxShardID
+	}
+	return 0
+}
+
+type NodeInfo struct {
+	ID               *uint64 `protobuf:"varint,1,req" json:"ID,omitempty"`
+	Host             *string `protobuf:"bytes,2,req" json:"Host,omitempty"`
+	XXX_unrecognized []byte  `json:"-"`
+}
+
+func (m *NodeInfo) Reset()         { *m = NodeInfo{} }
+func (m *NodeInfo) String() string { return proto.CompactTextString(m) }
+func (*NodeInfo) ProtoMessage()    {}
+
+func (m *NodeInfo) GetID() uint64 {
+	if m != nil && m.ID != nil {
+		return *m.ID
+	}
+	return 0
+}
+
+func (m *NodeInfo) GetHost() string {
+	if m != nil && m.Host != nil {
+		return *m.Host
+	}
+	return ""
+}
+
+type DatabaseInfo struct {
+	Name                   *string                `protobuf:"bytes,1,req" json:"Name,omitempty"`
+	DefaultRetentionPolicy *string                `protobuf:"bytes,2,req" json:"DefaultRetentionPolicy,omitempty"`
+	RetentionPolicies      []*RetentionPolicyInfo `protobuf:"bytes,3,rep" json:"RetentionPolicies,omitempty"`
+	ContinuousQueries      []*ContinuousQueryInfo `protobuf:"bytes,4,rep" json:"ContinuousQueries,omitempty"`
+	XXX_unrecognized       []byte                 `json:"-"`
+}
+
+func (m *DatabaseInfo) Reset()         { *m = DatabaseInfo{} }
+func (m *DatabaseInfo) String() string { return proto.CompactTextString(m) }
+func (*DatabaseInfo) ProtoMessage()    {}
+
+func (m *DatabaseInfo) GetName() string {
+	if m != nil && m.Name != nil {
+		return *m.Name
+	}
+	return ""
+}
+
+func (m *DatabaseInfo) GetDefaultRetentionPolicy() string {
+	if m != nil && m.DefaultRetentionPolicy != nil {
+		return *m.DefaultRetentionPolicy
+	}
+	return ""
+}
+
+func (m *DatabaseInfo) GetRetentionPolicies() []*RetentionPolicyInfo {
+	if m != nil {
+		return m.RetentionPolicies
+	}
+	return nil
+}
+
+func (m *DatabaseInfo) GetContinuousQueries() []*ContinuousQueryInfo {
+	if m != nil {
+		return m.ContinuousQueries
+	}
+	return nil
+}
+
+type RetentionPolicyInfo struct {
+	Name               *string           `protobuf:"bytes,1,req" json:"Name,omitempty"`
+	Duration           *int64            `protobuf:"varint,2,req" json:"Duration,omitempty"`
+	ShardGroupDuration *int64            `protobuf:"varint,3,req" json:"ShardGroupDuration,omitempty"`
+	ReplicaN           *uint32           `protobuf:"varint,4,req" json:"ReplicaN,omitempty"`
+	ShardGroups        []*ShardGroupInfo `protobuf:"bytes,5,rep" json:"ShardGroups,omitempty"`
+	XXX_unrecognized   []byte            `json:"-"`
+}
+
+func (m *RetentionPolicyInfo) Reset()         { *m = RetentionPolicyInfo{} }
+func (m *RetentionPolicyInfo) String() string { return proto.CompactTextString(m) }
+func (*RetentionPolicyInfo) ProtoMessage()    {}
+
+func (m *RetentionPolicyInfo) GetName() string {
+	if m != nil && m.Name != nil {
+		return *m.Name
+	}
+	return ""
+}
+
+func (m *RetentionPolicyInfo) GetDuration() int64 {
+	if m != nil && m.Duration != nil {
+		return *m.Duration
+	}
+	return 0
+}
+
+func (m *RetentionPolicyInfo) GetShardGroupDuration() int64 {
+	if m != nil && m.ShardGroupDuration != nil {
+		return *m.ShardGroupDuration
+	}
+	return 0
+}
+
+func (m *RetentionPolicyInfo) GetReplicaN() uint32 {
+	if m != nil && m.ReplicaN != nil {
+		return *m.ReplicaN
+	}
+	return 0
+}
+
+func (m *RetentionPolicyInfo) GetShardGroups() []*ShardGroupInfo {
+	if m != nil {
+		return m.ShardGroups
+	}
+	return nil
+}
+
+type ShardGroupInfo struct {
+	ID               *uint64      `protobuf:"varint,1,req" json:"ID,omitempty"`
+	StartTime        *int64       `protobuf:"varint,2,req" json:"StartTime,omitempty"`
+	EndTime          *int64       `protobuf:"varint,3,req" json:"EndTime,omitempty"`
+	DeletedAt        *int64       `protobuf:"varint,4,req" json:"DeletedAt,omitempty"`
+	Shards           []*ShardInfo `protobuf:"bytes,5,rep" json:"Shards,omitempty"`
+	XXX_unrecognized []byte       `json:"-"`
+}
+
+func (m *ShardGroupInfo) Reset()         { *m = ShardGroupInfo{} }
+func (m *ShardGroupInfo) String() string { return proto.CompactTextString(m) }
+func (*ShardGroupInfo) ProtoMessage()    {}
+
+func (m *ShardGroupInfo) GetID() uint64 {
+	if m != nil && m.ID != nil {
+		return *m.ID
+	}
+	return 0
+}
+
+func (m *ShardGroupInfo) GetStartTime() int64 {
+	if m != nil && m.StartTime != nil {
+		return *m.StartTime
+	}
+	return 0
+}
+
+func (m *ShardGroupInfo) GetEndTime() int64 {
+	if m != nil && m.EndTime != nil {
+		return *m.EndTime
+	}
+	return 0
+}
+
+func (m *ShardGroupInfo) GetDeletedAt() int64 {
+	if m != nil && m.DeletedAt != nil {
+		return *m.DeletedAt
+	}
+	return 0
+}
+
+func (m *ShardGroupInfo) GetShards() []*ShardInfo {
+	if m != nil {
+		return m.Shards
+	}
+	return nil
+}
+
+type ShardInfo struct {
+	ID               *uint64  `protobuf:"varint,1,req" json:"ID,omitempty"`
+	OwnerIDs         []uint64 `protobuf:"varint,2,rep" json:"OwnerIDs,omitempty"`
+	XXX_unrecognized []byte   `json:"-"`
+}
+
+func (m *ShardInfo) Reset()         { *m = ShardInfo{} }
+func (m *ShardInfo) String() string { return proto.CompactTextString(m) }
+func (*ShardInfo) ProtoMessage()    {}
+
+func (m *ShardInfo) GetID() uint64 {
+	if m != nil && m.ID != nil {
+		return *m.ID
+	}
+	return 0
+}
+
+func (m *ShardInfo) GetOwnerIDs() []uint64 {
+	if m != nil {
+		return m.OwnerIDs
+	}
+	return nil
+}
+
+type ContinuousQueryInfo struct {
+	Name             *string `protobuf:"bytes,1,req" json:"Name,omitempty"`
+	Query            *string `protobuf:"bytes,2,req" json:"Query,omitempty"`
+	XXX_unrecognized []byte  `json:"-"`
+}
+
+func (m *ContinuousQueryInfo) Reset()         { *m = ContinuousQueryInfo{} }
+func (m *ContinuousQueryInfo) String() string { return proto.CompactTextString(m) }
+func (*ContinuousQueryInfo) ProtoMessage()    {}
+
+func (m *ContinuousQueryInfo) GetName() string {
+	if m != nil && m.Name != nil {
+		return *m.Name
+	}
+	return ""
+}
+
+func (m *ContinuousQueryInfo) GetQuery() string {
+	if m != nil && m.Query != nil {
+		return *m.Query
+	}
+	return ""
+}
+
+type UserInfo struct {
+	Name             *string          `protobuf:"bytes,1,req" json:"Name,omitempty"`
+	Hash             *string          `protobuf:"bytes,2,req" json:"Hash,omitempty"`
+	Admin            *bool            `protobuf:"varint,3,req" json:"Admin,omitempty"`
+	Privileges       []*UserPrivilege `protobuf:"bytes,4,rep" json:"Privileges,omitempty"`
+	XXX_unrecognized []byte           `json:"-"`
+}
+
+func (m *UserInfo) Reset()         { *m = UserInfo{} }
+func (m *UserInfo) String() string { return proto.CompactTextString(m) }
+func (*UserInfo) ProtoMessage()    {}
+
+func (m *UserInfo) GetName() string {
+	if m != nil && m.Name != nil {
+		return *m.Name
+	}
+	return ""
+}
+
+func (m *UserInfo) GetHash() string {
+	if m != nil && m.Hash != nil {
+		return *m.Hash
+	}
+	return ""
+}
+
+func (m *UserInfo) GetAdmin() bool {
+	if m != nil && m.Admin != nil {
+		return *m.Admin
+	}
+	return false
+}
+
+func (m *UserInfo) GetPrivileges() []*UserPrivilege {
+	if m != nil {
+		return m.Privileges
+	}
+	return nil
+}
+
+type UserPrivilege struct {
+	Database         *string `protobuf:"bytes,1,req" json:"Database,omitempty"`
+	Privilege        *int32  `protobuf:"varint,2,req" json:"Privilege,omitempty"`
+	XXX_unrecognized []byte  `json:"-"`
+}
+
+func (m *UserPrivilege) Reset()         { *m = UserPrivilege{} }
+func (m *UserPrivilege) String() string { return proto.CompactTextString(m) }
+func (*UserPrivilege) ProtoMessage()    {}
+
+func (m *UserPrivilege) GetDatabase() string {
+	if m != nil && m.Database != nil {
+		return *m.Database
+	}
+	return ""
+}
+
+func (m *UserPrivilege) GetPrivilege() int32 {
+	if m != nil && m.Privilege != nil {
+		return *m.Privilege
+	}
+	return 0
+}
+
+type Command struct {
+	Type             *Command_Type             `protobuf:"varint,1,req,name=type,enum=internal.Command_Type" json:"type,omitempty"`
+	XXX_extensions   map[int32]proto.Extension `json:"-"`
+	XXX_unrecognized []byte                    `json:"-"`
+}
+
+func (m *Command) Reset()         { *m = Command{} }
+func (m *Command) String() string { return proto.CompactTextString(m) }
+func (*Command) ProtoMessage()    {}
+
+var extRange_Command = []proto.ExtensionRange{
+	{100, 536870911},
+}
+
+func (*Command) ExtensionRangeArray() []proto.ExtensionRange {
+	return extRange_Command
+}
+func (m *Command) ExtensionMap() map[int32]proto.Extension {
+	if m.XXX_extensions == nil {
+		m.XXX_extensions = make(map[int32]proto.Extension)
+	}
+	return m.XXX_extensions
+}
+
+func (m *Command) GetType() Command_Type {
+	if m != nil && m.Type != nil {
+		return *m.Type
+	}
+	return Command_CreateNodeCommand
+}
+
+type CreateNodeCommand struct {
+	Host             *string `protobuf:"bytes,1,req" json:"Host,omitempty"`
+	Rand             *uint64 `protobuf:"varint,2,req" json:"Rand,omitempty"`
+	XXX_unrecognized []byte  `json:"-"`
+}
+
+func (m *CreateNodeCommand) Reset()         { *m = CreateNodeCommand{} }
+func (m *CreateNodeCommand) String() string { return proto.CompactTextString(m) }
+func (*CreateNodeCommand) ProtoMessage()    {}
+
+func (m *CreateNodeCommand) GetHost() string {
+	if m != nil && m.Host != nil {
+		return *m.Host
+	}
+	return ""
+}
+
+func (m *CreateNodeCommand) GetRand() uint64 {
+	if m != nil && m.Rand != nil {
+		return *m.Rand
+	}
+	return 0
+}
+
+var E_CreateNodeCommand_Command = &proto.ExtensionDesc{
+	ExtendedType:  (*Command)(nil),
+	ExtensionType: (*CreateNodeCommand)(nil),
+	Field:         101,
+	Name:          "internal.CreateNodeCommand.command",
+	Tag:           "bytes,101,opt,name=command",
+}
+
+type DeleteNodeCommand struct {
+	ID               *uint64 `protobuf:"varint,1,req" json:"ID,omitempty"`
+	XXX_unrecognized []byte  `json:"-"`
+}
+
+func (m *DeleteNodeCommand) Reset()         { *m = DeleteNodeCommand{} }
+func (m *DeleteNodeCommand) String() string { return proto.CompactTextString(m) }
+func (*DeleteNodeCommand) ProtoMessage()    {}
+
+func (m *DeleteNodeCommand) GetID() uint64 {
+	if m != nil && m.ID != nil {
+		return *m.ID
+	}
+	return 0
+}
+
+var E_DeleteNodeCommand_Command = &proto.ExtensionDesc{
+	ExtendedType:  (*Command)(nil),
+	ExtensionType: (*DeleteNodeCommand)(nil),
+	Field:         102,
+	Name:          "internal.DeleteNodeCommand.command",
+	Tag:           "bytes,102,opt,name=command",
+}
+
+type CreateDatabaseCommand struct {
+	Name             *string `protobuf:"bytes,1,req" json:"Name,omitempty"`
+	XXX_unrecognized []byte  `json:"-"`
+}
+
+func (m *CreateDatabaseCommand) Reset()         { *m = CreateDatabaseCommand{} }
+func (m *CreateDatabaseCommand) String() string { return proto.CompactTextString(m) }
+func (*CreateDatabaseCommand) ProtoMessage()    {}
+
+func (m *CreateDatabaseCommand) GetName() string {
+	if m != nil && m.Name != nil {
+		return *m.Name
+	}
+	return ""
+}
+
+var E_CreateDatabaseCommand_Command = &proto.ExtensionDesc{
+	ExtendedType:  (*Command)(nil),
+	ExtensionType: (*CreateDatabaseCommand)(nil),
+	Field:         103,
+	Name:          "internal.CreateDatabaseCommand.command",
+	Tag:           "bytes,103,opt,name=command",
+}
+
+type DropDatabaseCommand struct {
+	Name             *string `protobuf:"bytes,1,req" json:"Name,omitempty"`
+	XXX_unrecognized []byte  `json:"-"`
+}
+
+func (m *DropDatabaseCommand) Reset()         { *m = DropDatabaseCommand{} }
+func (m *DropDatabaseCommand) String() string { return proto.CompactTextString(m) }
+func (*DropDatabaseCommand) ProtoMessage()    {}
+
+func (m *DropDatabaseCommand) GetName() string {
+	if m != nil && m.Name != nil {
+		return *m.Name
+	}
+	return ""
+}
+
+var E_DropDatabaseCommand_Command = &proto.ExtensionDesc{
+	ExtendedType:  (*Command)(nil),
+	ExtensionType: (*DropDatabaseCommand)(nil),
+	Field:         104,
+	Name:          "internal.DropDatabaseCommand.command",
+	Tag:           "bytes,104,opt,name=command",
+}
+
+type CreateRetentionPolicyCommand struct {
+	Database         *string              `protobuf:"bytes,1,req" json:"Database,omitempty"`
+	RetentionPolicy  *RetentionPolicyInfo `protobuf:"bytes,2,req" json:"RetentionPolicy,omitempty"`
+	XXX_unrecognized []byte               `json:"-"`
+}
+
+func (m *CreateRetentionPolicyCommand) Reset()         { *m = CreateRetentionPolicyCommand{} }
+func (m *CreateRetentionPolicyCommand) String() string { return proto.CompactTextString(m) }
+func (*CreateRetentionPolicyCommand) ProtoMessage()    {}
+
+func (m *CreateRetentionPolicyCommand) GetDatabase() string {
+	if m != nil && m.Database != nil {
+		return *m.Database
+	}
+	return ""
+}
+
+func (m *CreateRetentionPolicyCommand) GetRetentionPolicy() *RetentionPolicyInfo {
+	if m != nil {
+		return m.RetentionPolicy
+	}
+	return nil
+}
+
+var E_CreateRetentionPolicyCommand_Command = &proto.ExtensionDesc{
+	ExtendedType:  (*Command)(nil),
+	ExtensionType: (*CreateRetentionPolicyCommand)(nil),
+	Field:         105,
+	Name:          "internal.CreateRetentionPolicyCommand.command",
+	Tag:           "bytes,105,opt,name=command",
+}
+
+type DropRetentionPolicyCommand struct {
+	Database         *string `protobuf:"bytes,1,req" json:"Database,omitempty"`
+	Name             *string `protobuf:"bytes,2,req" json:"Name,omitempty"`
+	XXX_unrecognized []byte  `json:"-"`
+}
+
+func (m *DropRetentionPolicyCommand) Reset()         { *m = DropRetentionPolicyCommand{} }
+func (m *DropRetentionPolicyCommand) String() string { return proto.CompactTextString(m) }
+func (*DropRetentionPolicyCommand) ProtoMessage()    {}
+
+func (m *DropRetentionPolicyCommand) GetDatabase() string {
+	if m != nil && m.Database != nil {
+		return *m.Database
+	}
+	return ""
+}
+
+func (m *DropRetentionPolicyCommand) GetName() string {
+	if m != nil && m.Name != nil {
+		return *m.Name
+	}
+	return ""
+}
+
+var E_DropRetentionPolicyCommand_Command = &proto.ExtensionDesc{
+	ExtendedType:  (*Command)(nil),
+	ExtensionType: (*DropRetentionPolicyCommand)(nil),
+	Field:         106,
+	Name:          "internal.DropRetentionPolicyCommand.command",
+	Tag:           "bytes,106,opt,name=command",
+}
+
+type SetDefaultRetentionPolicyCommand struct {
+	Database         *string `protobuf:"bytes,1,req" json:"Database,omitempty"`
+	Name             *string `protobuf:"bytes,2,req" json:"Name,omitempty"`
+	XXX_unrecognized []byte  `json:"-"`
+}
+
+func (m *SetDefaultRetentionPolicyCommand) Reset()         { *m = SetDefaultRetentionPolicyCommand{} }
+func (m *SetDefaultRetentionPolicyCommand) String() string { return proto.CompactTextString(m) }
+func (*SetDefaultRetentionPolicyCommand) ProtoMessage()    {}
+
+func (m *SetDefaultRetentionPolicyCommand) GetDatabase() string {
+	if m != nil && m.Database != nil {
+		return *m.Database
+	}
+	return ""
+}
+
+func (m *SetDefaultRetentionPolicyCommand) GetName() string {
+	if m != nil && m.Name != nil {
+		return *m.Name
+	}
+	return ""
+}
+
+var E_SetDefaultRetentionPolicyCommand_Command = &proto.ExtensionDesc{
+	ExtendedType:  (*Command)(nil),
+	ExtensionType: (*SetDefaultRetentionPolicyCommand)(nil),
+	Field:         107,
+	Name:          "internal.SetDefaultRetentionPolicyCommand.command",
+	Tag:           "bytes,107,opt,name=command",
+}
+
+type UpdateRetentionPolicyCommand struct {
+	Database         *string `protobuf:"bytes,1,req" json:"Database,omitempty"`
+	Name             *string `protobuf:"bytes,2,req" json:"Name,omitempty"`
+	NewName          *string `protobuf:"bytes,3,opt" json:"NewName,omitempty"`
+	Duration         *int64  `protobuf:"varint,4,opt" json:"Duration,omitempty"`
+	ReplicaN         *uint32 `protobuf:"varint,5,opt" json:"ReplicaN,omitempty"`
+	XXX_unrecognized []byte  `json:"-"`
+}
+
+func (m *UpdateRetentionPolicyCommand) Reset()         { *m = UpdateRetentionPolicyCommand{} }
+func (m *UpdateRetentionPolicyCommand) String() string { return proto.CompactTextString(m) }
+func (*UpdateRetentionPolicyCommand) ProtoMessage()    {}
+
+func (m *UpdateRetentionPolicyCommand) GetDatabase() string {
+	if m != nil && m.Database != nil {
+		return *m.Database
+	}
+	return ""
+}
+
+func (m *UpdateRetentionPolicyCommand) GetName() string {
+	if m != nil && m.Name != nil {
+		return *m.Name
+	}
+	return ""
+}
+
+func (m *UpdateRetentionPolicyCommand) GetNewName() string {
+	if m != nil && m.NewName != nil {
+		return *m.NewName
+	}
+	return ""
+}
+
+func (m *UpdateRetentionPolicyCommand) GetDuration() int64 {
+	if m != nil && m.Duration != nil {
+		return *m.Duration
+	}
+	return 0
+}
+
+func (m *UpdateRetentionPolicyCommand) GetReplicaN() uint32 {
+	if m != nil && m.ReplicaN != nil {
+		return *m.ReplicaN
+	}
+	return 0
+}
+
+var E_UpdateRetentionPolicyCommand_Command = &proto.ExtensionDesc{
+	ExtendedType:  (*Command)(nil),
+	ExtensionType: (*UpdateRetentionPolicyCommand)(nil),
+	Field:         108,
+	Name:          "internal.UpdateRetentionPolicyCommand.command",
+	Tag:           "bytes,108,opt,name=command",
+}
+
+type CreateShardGroupCommand struct {
+	Database         *string `protobuf:"bytes,1,req" json:"Database,omitempty"`
+	Policy           *string `protobuf:"bytes,2,req" json:"Policy,omitempty"`
+	Timestamp        *int64  `protobuf:"varint,3,req" json:"Timestamp,omitempty"`
+	XXX_unrecognized []byte  `json:"-"`
+}
+
+func (m *CreateShardGroupCommand) Reset()         { *m = CreateShardGroupCommand{} }
+func (m *CreateShardGroupCommand) String() string { return proto.CompactTextString(m) }
+func (*CreateShardGroupCommand) ProtoMessage()    {}
+
+func (m *CreateShardGroupCommand) GetDatabase() string {
+	if m != nil && m.Database != nil {
+		return *m.Database
+	}
+	return ""
+}
+
+func (m *CreateShardGroupCommand) GetPolicy() string {
+	if m != nil && m.Policy != nil {
+		return *m.Policy
+	}
+	return ""
+}
+
+func (m *CreateShardGroupCommand) GetTimestamp() int64 {
+	if m != nil && m.Timestamp != nil {
+		return *m.Timestamp
+	}
+	return 0
+}
+
+var E_CreateShardGroupCommand_Command = &proto.ExtensionDesc{
+	ExtendedType:  (*Command)(nil),
+	ExtensionType: (*CreateShardGroupCommand)(nil),
+	Field:         109,
+	Name:          "internal.CreateShardGroupCommand.command",
+	Tag:           "bytes,109,opt,name=command",
+}
+
+type DeleteShardGroupCommand struct {
+	Database         *string `protobuf:"bytes,1,req" json:"Database,omitempty"`
+	Policy           *string `protobuf:"bytes,2,req" json:"Policy,omitempty"`
+	ShardGroupID     *uint64 `protobuf:"varint,3,req" json:"ShardGroupID,omitempty"`
+	XXX_unrecognized []byte  `json:"-"`
+}
+
+func (m *DeleteShardGroupCommand) Reset()         { *m = DeleteShardGroupCommand{} }
+func (m *DeleteShardGroupCommand) String() string { return proto.CompactTextString(m) }
+func (*DeleteShardGroupCommand) ProtoMessage()    {}
+
+func (m *DeleteShardGroupCommand) GetDatabase() string {
+	if m != nil && m.Database != nil {
+		return *m.Database
+	}
+	return ""
+}
+
+func (m *DeleteShardGroupCommand) GetPolicy() string {
+	if m != nil && m.Policy != nil {
+		return *m.Policy
+	}
+	return ""
+}
+
+func (m *DeleteShardGroupCommand) GetShardGroupID() uint64 {
+	if m != nil && m.ShardGroupID != nil {
+		return *m.ShardGroupID
+	}
+	return 0
+}
+
+var E_DeleteShardGroupCommand_Command = &proto.ExtensionDesc{
+	ExtendedType:  (*Command)(nil),
+	ExtensionType: (*DeleteShardGroupCommand)(nil),
+	Field:         110,
+	Name:          "internal.DeleteShardGroupCommand.command",
+	Tag:           "bytes,110,opt,name=command",
+}
+
+type CreateContinuousQueryCommand struct {
+	Database         *string `protobuf:"bytes,1,req" json:"Database,omitempty"`
+	Name             *string `protobuf:"bytes,2,req" json:"Name,omitempty"`
+	Query            *string `protobuf:"bytes,3,req" json:"Query,omitempty"`
+	XXX_unrecognized []byte  `json:"-"`
+}
+
+func (m *CreateContinuousQueryCommand) Reset()         { *m = CreateContinuousQueryCommand{} }
+func (m *CreateContinuousQueryCommand) String() string { return proto.CompactTextString(m) }
+func (*CreateContinuousQueryCommand) ProtoMessage()    {}
+
+func (m *CreateContinuousQueryCommand) GetDatabase() string {
+	if m != nil && m.Database != nil {
+		return *m.Database
+	}
+	return ""
+}
+
+func (m *CreateContinuousQueryCommand) GetName() string {
+	if m != nil && m.Name != nil {
+		return *m.Name
+	}
+	return ""
+}
+
+func (m *CreateContinuousQueryCommand) GetQuery() string {
+	if m != nil && m.Query != nil {
+		return *m.Query
+	}
+	return ""
+}
+
+var E_CreateContinuousQueryCommand_Command = &proto.ExtensionDesc{
+	ExtendedType:  (*Command)(nil),
+	ExtensionType: (*CreateContinuousQueryCommand)(nil),
+	Field:         111,
+	Name:          "internal.CreateContinuousQueryCommand.command",
+	Tag:           "bytes,111,opt,name=command",
+}
+
+type DropContinuousQueryCommand struct {
+	Database         *string `protobuf:"bytes,1,req" json:"Database,omitempty"`
+	Name             *string `protobuf:"bytes,2,req" json:"Name,omitempty"`
+	XXX_unrecognized []byte  `json:"-"`
+}
+
+func (m *DropContinuousQueryCommand) Reset()         { *m = DropContinuousQueryCommand{} }
+func (m *DropContinuousQueryCommand) String() string { return proto.CompactTextString(m) }
+func (*DropContinuousQueryCommand) ProtoMessage()    {}
+
+func (m *DropContinuousQueryCommand) GetDatabase() string {
+	if m != nil && m.Database != nil {
+		return *m.Database
+	}
+	return ""
+}
+
+func (m *DropContinuousQueryCommand) GetName() string {
+	if m != nil && m.Name != nil {
+		return *m.Name
+	}
+	return ""
+}
+
+var E_DropContinuousQueryCommand_Command = &proto.ExtensionDesc{
+	ExtendedType:  (*Command)(nil),
+	ExtensionType: (*DropContinuousQueryCommand)(nil),
+	Field:         112,
+	Name:          "internal.DropContinuousQueryCommand.command",
+	Tag:           "bytes,112,opt,name=command",
+}
+
+type CreateUserCommand struct {
+	Name             *string `protobuf:"bytes,1,req" json:"Name,omitempty"`
+	Hash             *string `protobuf:"bytes,2,req" json:"Hash,omitempty"`
+	Admin            *bool   `protobuf:"varint,3,req" json:"Admin,omitempty"`
+	XXX_unrecognized []byte  `json:"-"`
+}
+
+func (m *CreateUserCommand) Reset()         { *m = CreateUserCommand{} }
+func (m *CreateUserCommand) String() string { return proto.CompactTextString(m) }
+func (*CreateUserCommand) ProtoMessage()    {}
+
+func (m *CreateUserCommand) GetName() string {
+	if m != nil && m.Name != nil {
+		return *m.Name
+	}
+	return ""
+}
+
+func (m *CreateUserCommand) GetHash() string {
+	if m != nil && m.Hash != nil {
+		return *m.Hash
+	}
+	return ""
+}
+
+func (m *CreateUserCommand) GetAdmin() bool {
+	if m != nil && m.Admin != nil {
+		return *m.Admin
+	}
+	return false
+}
+
+var E_CreateUserCommand_Command = &proto.ExtensionDesc{
+	ExtendedType:  (*Command)(nil),
+	ExtensionType: (*CreateUserCommand)(nil),
+	Field:         113,
+	Name:          "internal.CreateUserCommand.command",
+	Tag:           "bytes,113,opt,name=command",
+}
+
+type DropUserCommand struct {
+	Name             *string `protobuf:"bytes,1,req" json:"Name,omitempty"`
+	XXX_unrecognized []byte  `json:"-"`
+}
+
+func (m *DropUserCommand) Reset()         { *m = DropUserCommand{} }
+func (m *DropUserCommand) String() string { return proto.CompactTextString(m) }
+func (*DropUserCommand) ProtoMessage()    {}
+
+func (m *DropUserCommand) GetName() string {
+	if m != nil && m.Name != nil {
+		return *m.Name
+	}
+	return ""
+}
+
+var E_DropUserCommand_Command = &proto.ExtensionDesc{
+	ExtendedType:  (*Command)(nil),
+	ExtensionType: (*DropUserCommand)(nil),
+	Field:         114,
+	Name:          "internal.DropUserCommand.command",
+	Tag:           "bytes,114,opt,name=command",
+}
+
+type UpdateUserCommand struct {
+	Name             *string `protobuf:"bytes,1,req" json:"Name,omitempty"`
+	Hash             *string `protobuf:"bytes,2,req" json:"Hash,omitempty"`
+	XXX_unrecognized []byte  `json:"-"`
+}
+
+func (m *UpdateUserCommand) Reset()         { *m = UpdateUserCommand{} }
+func (m *UpdateUserCommand) String() string { return proto.CompactTextString(m) }
+func (*UpdateUserCommand) ProtoMessage()    {}
+
+func (m *UpdateUserCommand) GetName() string {
+	if m != nil && m.Name != nil {
+		return *m.Name
+	}
+	return ""
+}
+
+func (m *UpdateUserCommand) GetHash() string {
+	if m != nil && m.Hash != nil {
+		return *m.Hash
+	}
+	return ""
+}
+
+var E_UpdateUserCommand_Command = &proto.ExtensionDesc{
+	ExtendedType:  (*Command)(nil),
+	ExtensionType: (*UpdateUserCommand)(nil),
+	Field:         115,
+	Name:          "internal.UpdateUserCommand.command",
+	Tag:           "bytes,115,opt,name=command",
+}
+
+type SetPrivilegeCommand struct {
+	Username         *string `protobuf:"bytes,1,req" json:"Username,omitempty"`
+	Database         *string `protobuf:"bytes,2,req" json:"Database,omitempty"`
+	Privilege        *int32  `protobuf:"varint,3,req" json:"Privilege,omitempty"`
+	XXX_unrecognized []byte  `json:"-"`
+}
+
+func (m *SetPrivilegeCommand) Reset()         { *m = SetPrivilegeCommand{} }
+func (m *SetPrivilegeCommand) String() string { return proto.CompactTextString(m) }
+func (*SetPrivilegeCommand) ProtoMessage()    {}
+
+func (m *SetPrivilegeCommand) GetUsername() string {
+	if m != nil && m.Username != nil {
+		return *m.Username
+	}
+	return ""
+}
+
+func (m *SetPrivilegeCommand) GetDatabase() string {
+	if m != nil && m.Database != nil {
+		return *m.Database
+	}
+	return ""
+}
+
+func (m *SetPrivilegeCommand) GetPrivilege() int32 {
+	if m != nil && m.Privilege != nil {
+		return *m.Privilege
+	}
+	return 0
+}
+
+var E_SetPrivilegeCommand_Command = &proto.ExtensionDesc{
+	ExtendedType:  (*Command)(nil),
+	ExtensionType: (*SetPrivilegeCommand)(nil),
+	Field:         116,
+	Name:          "internal.SetPrivilegeCommand.command",
+	Tag:           "bytes,116,opt,name=command",
+}
+
+type SetDataCommand struct {
+	Data             *Data  `protobuf:"bytes,1,req" json:"Data,omitempty"`
+	XXX_unrecognized []byte `json:"-"`
+}
+
+func (m *SetDataCommand) Reset()         { *m = SetDataCommand{} }
+func (m *SetDataCommand) String() string { return proto.CompactTextString(m) }
+func (*SetDataCommand) ProtoMessage()    {}
+
+func (m *SetDataCommand) GetData() *Data {
+	if m != nil {
+		return m.Data
+	}
+	return nil
+}
+
+var E_SetDataCommand_Command = &proto.ExtensionDesc{
+	ExtendedType:  (*Command)(nil),
+	ExtensionType: (*SetDataCommand)(nil),
+	Field:         117,
+	Name:          "internal.SetDataCommand.command",
+	Tag:           "bytes,117,opt,name=command",
+}
+
+type SetAdminPrivilegeCommand struct {
+	Username         *string `protobuf:"bytes,1,req" json:"Username,omitempty"`
+	Admin            *bool   `protobuf:"varint,2,req" json:"Admin,omitempty"`
+	XXX_unrecognized []byte  `json:"-"`
+}
+
+func (m *SetAdminPrivilegeCommand) Reset()         { *m = SetAdminPrivilegeCommand{} }
+func (m *SetAdminPrivilegeCommand) String() string { return proto.CompactTextString(m) }
+func (*SetAdminPrivilegeCommand) ProtoMessage()    {}
+
+func (m *SetAdminPrivilegeCommand) GetUsername() string {
+	if m != nil && m.Username != nil {
+		return *m.Username
+	}
+	return ""
+}
+
+func (m *SetAdminPrivilegeCommand) GetAdmin() bool {
+	if m != nil && m.Admin != nil {
+		return *m.Admin
+	}
+	return false
+}
+
+var E_SetAdminPrivilegeCommand_Command = &proto.ExtensionDesc{
+	ExtendedType:  (*Command)(nil),
+	ExtensionType: (*SetAdminPrivilegeCommand)(nil),
+	Field:         118,
+	Name:          "internal.SetAdminPrivilegeCommand.command",
+	Tag:           "bytes,118,opt,name=command",
+}
+
+type Response struct {
+	OK               *bool   `protobuf:"varint,1,req" json:"OK,omitempty"`
+	Error            *string `protobuf:"bytes,2,opt" json:"Error,omitempty"`
+	Index            *uint64 `protobuf:"varint,3,opt" json:"Index,omitempty"`
+	XXX_unrecognized []byte  `json:"-"`
+}
+
+func (m *Response) Reset()         { *m = Response{} }
+func (m *Response) String() string { return proto.CompactTextString(m) }
+func (*Response) ProtoMessage()    {}
+
+func (m *Response) GetOK() bool {
+	if m != nil && m.OK != nil {
+		return *m.OK
+	}
+	return false
+}
+
+func (m *Response) GetError() string {
+	if m != nil && m.Error != nil {
+		return *m.Error
+	}
+	return ""
+}
+
+func (m *Response) GetIndex() uint64 {
+	if m != nil && m.Index != nil {
+		return *m.Index
+	}
+	return 0
+}
+
+func init() {
+	proto.RegisterEnum("internal.Command_Type", Command_Type_name, Command_Type_value)
+	proto.RegisterExtension(E_CreateNodeCommand_Command)
+	proto.RegisterExtension(E_DeleteNodeCommand_Command)
+	proto.RegisterExtension(E_CreateDatabaseCommand_Command)
+	proto.RegisterExtension(E_DropDatabaseCommand_Command)
+	proto.RegisterExtension(E_CreateRetentionPolicyCommand_Command)
+	proto.RegisterExtension(E_DropRetentionPolicyCommand_Command)
+	proto.RegisterExtension(E_SetDefaultRetentionPolicyCommand_Command)
+	proto.RegisterExtension(E_UpdateRetentionPolicyCommand_Command)
+	proto.RegisterExtension(E_CreateShardGroupCommand_Command)
+	proto.RegisterExtension(E_DeleteShardGroupCommand_Command)
+	proto.RegisterExtension(E_CreateContinuousQueryCommand_Command)
+	proto.RegisterExtension(E_DropContinuousQueryCommand_Command)
+	proto.RegisterExtension(E_CreateUserCommand_Command)
+	proto.RegisterExtension(E_DropUserCommand_Command)
+	proto.RegisterExtension(E_UpdateUserCommand_Command)
+	proto.RegisterExtension(E_SetPrivilegeCommand_Command)
+	proto.RegisterExtension(E_SetDataCommand_Command)
+	proto.RegisterExtension(E_SetAdminPrivilegeCommand_Command)
+}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/internal/meta.proto b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/internal/meta.proto
new file mode 100644
index 00000000000..d5f5bf1fec6
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/internal/meta.proto
@@ -0,0 +1,257 @@
+package internal;
+
+//========================================================================
+//
+// Metadata
+//
+//========================================================================
+
+message Data {
+	required uint64 Term = 1;
+	required uint64 Index = 2;
+	required uint64 ClusterID = 3;
+
+	repeated NodeInfo Nodes = 4;
+	repeated DatabaseInfo Databases = 5;
+	repeated UserInfo Users = 6;
+
+	required uint64 MaxNodeID = 7;
+	required uint64 MaxShardGroupID = 8;
+	required uint64 MaxShardID = 9;
+}
+
+message NodeInfo {
+	required uint64 ID = 1;
+	required string Host = 2;
+}
+
+message DatabaseInfo {
+	required string Name = 1;
+	required string DefaultRetentionPolicy = 2;
+	repeated RetentionPolicyInfo RetentionPolicies = 3;
+	repeated ContinuousQueryInfo ContinuousQueries = 4;
+}
+
+message RetentionPolicyInfo {
+	required string Name = 1;
+	required int64 Duration = 2;
+	required int64 ShardGroupDuration = 3;
+	required uint32 ReplicaN = 4;
+	repeated ShardGroupInfo ShardGroups = 5;
+}
+
+message ShardGroupInfo {
+	required uint64 ID = 1;
+	required int64 StartTime = 2;
+	required int64 EndTime = 3;
+	required int64 DeletedAt = 4;
+	repeated ShardInfo Shards = 5;
+}
+
+message ShardInfo {
+	required uint64 ID = 1;
+	repeated uint64 OwnerIDs = 2;
+}
+
+message ContinuousQueryInfo {
+	required string Name = 1;
+	required string Query = 2;
+}
+
+message UserInfo {
+	required string Name = 1;
+	required string Hash = 2;
+	required bool Admin = 3;
+	repeated UserPrivilege Privileges = 4;
+}
+
+message UserPrivilege {
+	required string Database = 1;
+	required int32 Privilege = 2;
+}
+
+
+//========================================================================
+//
+// COMMANDS
+//
+//========================================================================
+
+message Command {
+    extensions 100 to max;
+
+    enum Type {
+		CreateNodeCommand                = 1;
+		DeleteNodeCommand                = 2;
+		CreateDatabaseCommand            = 3;
+		DropDatabaseCommand              = 4;
+		CreateRetentionPolicyCommand     = 5;
+		DropRetentionPolicyCommand       = 6;
+		SetDefaultRetentionPolicyCommand = 7;
+		UpdateRetentionPolicyCommand     = 8;
+		CreateShardGroupCommand          = 9;
+		DeleteShardGroupCommand          = 10;
+		CreateContinuousQueryCommand     = 11;
+		DropContinuousQueryCommand       = 12;
+		CreateUserCommand                = 13;
+		DropUserCommand                  = 14;
+		UpdateUserCommand                = 15;
+		SetPrivilegeCommand              = 16;
+		SetDataCommand                   = 17;
+    }
+
+    required Type type = 1;
+}
+
+message CreateNodeCommand {
+    extend Command {
+        optional CreateNodeCommand command = 101;
+    }
+	required string Host = 1;
+	required uint64 Rand = 2;
+}
+
+message DeleteNodeCommand {
+    extend Command {
+        optional DeleteNodeCommand command = 102;
+    }
+	required uint64 ID = 1;
+}
+
+message CreateDatabaseCommand {
+    extend Command {
+        optional CreateDatabaseCommand command = 103;
+    }
+	required string Name = 1;
+}
+
+message DropDatabaseCommand {
+    extend Command {
+        optional DropDatabaseCommand command = 104;
+    }
+	required string Name = 1;
+}
+
+message CreateRetentionPolicyCommand {
+    extend Command {
+        optional CreateRetentionPolicyCommand command = 105;
+    }
+	required string Database = 1;
+	required RetentionPolicyInfo RetentionPolicy = 2;
+}
+
+message DropRetentionPolicyCommand {
+    extend Command {
+        optional DropRetentionPolicyCommand command = 106;
+    }
+	required string Database = 1;
+	required string Name = 2;
+}
+
+message SetDefaultRetentionPolicyCommand {
+    extend Command {
+        optional SetDefaultRetentionPolicyCommand command = 107;
+    }
+	required string Database = 1;
+	required string Name = 2;
+}
+
+message UpdateRetentionPolicyCommand {
+    extend Command {
+        optional UpdateRetentionPolicyCommand command = 108;
+    }
+	required string Database = 1;
+	required string Name = 2;
+	optional string NewName = 3;
+	optional int64 Duration = 4;
+	optional uint32 ReplicaN = 5;
+}
+
+message CreateShardGroupCommand {
+    extend Command {
+        optional CreateShardGroupCommand command = 109;
+    }
+    required string Database = 1;
+    required string Policy = 2;
+    required int64 Timestamp = 3;
+}
+
+message DeleteShardGroupCommand {
+    extend Command {
+        optional DeleteShardGroupCommand command = 110;
+    }
+    required string Database = 1;
+    required string Policy = 2;
+    required uint64 ShardGroupID = 3;
+}
+
+message CreateContinuousQueryCommand {
+    extend Command {
+        optional CreateContinuousQueryCommand command = 111;
+    }
+    required string Database = 1;
+    required string Name = 2;
+    required string Query = 3;
+}
+
+message DropContinuousQueryCommand {
+    extend Command {
+        optional DropContinuousQueryCommand command = 112;
+    }
+    required string Database = 1;
+    required string Name = 2;
+}
+
+message CreateUserCommand {
+    extend Command {
+        optional CreateUserCommand command = 113;
+    }
+    required string Name = 1;
+    required string Hash = 2;
+    required bool Admin = 3;
+}
+
+message DropUserCommand {
+    extend Command {
+        optional DropUserCommand command = 114;
+    }
+    required string Name = 1;
+}
+
+message UpdateUserCommand {
+    extend Command {
+        optional UpdateUserCommand command = 115;
+    }
+    required string Name = 1;
+    required string Hash = 2;
+}
+
+message SetPrivilegeCommand {
+    extend Command {
+        optional SetPrivilegeCommand command = 116;
+    }
+    required string Username = 1;
+    required string Database = 2;
+    required int32 Privilege = 3;
+}
+
+message SetDataCommand {
+    extend Command {
+        optional SetDataCommand command = 117;
+    }
+    required Data Data = 1;
+}
+
+message SetAdminPrivilegeCommand {
+    extend Command {
+        optional SetAdminPrivilegeCommand command = 118;
+    }
+    required string Username = 1;
+    required bool Admin = 2;
+}
+
+message Response {
+	required bool OK = 1;
+	optional string Error = 2;
+	optional uint64 Index = 3;
+}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/statement_executor.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/statement_executor.go
new file mode 100644
index 00000000000..58f86393ca4
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/statement_executor.go
@@ -0,0 +1,280 @@
+package meta
+
+import (
+	"fmt"
+
+	"github.com/influxdb/influxdb/influxql"
+)
+
+// StatementExecutor translates InfluxQL queries to meta store methods.
+type StatementExecutor struct {
+	Store interface {
+		Nodes() ([]NodeInfo, error)
+
+		Database(name string) (*DatabaseInfo, error)
+		Databases() ([]DatabaseInfo, error)
+		CreateDatabase(name string) (*DatabaseInfo, error)
+		DropDatabase(name string) error
+
+		DefaultRetentionPolicy(database string) (*RetentionPolicyInfo, error)
+		CreateRetentionPolicy(database string, rpi *RetentionPolicyInfo) (*RetentionPolicyInfo, error)
+		UpdateRetentionPolicy(database, name string, rpu *RetentionPolicyUpdate) error
+		SetDefaultRetentionPolicy(database, name string) error
+		DropRetentionPolicy(database, name string) error
+
+		Users() ([]UserInfo, error)
+		CreateUser(name, password string, admin bool) (*UserInfo, error)
+		UpdateUser(name, password string) error
+		DropUser(name string) error
+		SetPrivilege(username, database string, p influxql.Privilege) error
+		SetAdminPrivilege(username string, admin bool) error
+		UserPrivileges(username string) (map[string]influxql.Privilege, error)
+		UserPrivilege(username, database string) (*influxql.Privilege, error)
+
+		CreateContinuousQuery(database, name, query string) error
+		DropContinuousQuery(database, name string) error
+	}
+}
+
+// ExecuteStatement executes stmt against the meta store as user.
+func (e *StatementExecutor) ExecuteStatement(stmt influxql.Statement) *influxql.Result {
+	switch stmt := stmt.(type) {
+	case *influxql.CreateDatabaseStatement:
+		return e.executeCreateDatabaseStatement(stmt)
+	case *influxql.DropDatabaseStatement:
+		return e.executeDropDatabaseStatement(stmt)
+	case *influxql.ShowDatabasesStatement:
+		return e.executeShowDatabasesStatement(stmt)
+	case *influxql.ShowGrantsForUserStatement:
+		return e.executeShowGrantsForUserStatement(stmt)
+	case *influxql.ShowServersStatement:
+		return e.executeShowServersStatement(stmt)
+	case *influxql.CreateUserStatement:
+		return e.executeCreateUserStatement(stmt)
+	case *influxql.SetPasswordUserStatement:
+		return e.executeSetPasswordUserStatement(stmt)
+	case *influxql.DropUserStatement:
+		return e.executeDropUserStatement(stmt)
+	case *influxql.ShowUsersStatement:
+		return e.executeShowUsersStatement(stmt)
+	case *influxql.GrantStatement:
+		return e.executeGrantStatement(stmt)
+	case *influxql.GrantAdminStatement:
+		return e.executeGrantAdminStatement(stmt)
+	case *influxql.RevokeStatement:
+		return e.executeRevokeStatement(stmt)
+	case *influxql.RevokeAdminStatement:
+		return e.executeRevokeAdminStatement(stmt)
+	case *influxql.CreateRetentionPolicyStatement:
+		return e.executeCreateRetentionPolicyStatement(stmt)
+	case *influxql.AlterRetentionPolicyStatement:
+		return e.executeAlterRetentionPolicyStatement(stmt)
+	case *influxql.DropRetentionPolicyStatement:
+		return e.executeDropRetentionPolicyStatement(stmt)
+	case *influxql.ShowRetentionPoliciesStatement:
+		return e.executeShowRetentionPoliciesStatement(stmt)
+	case *influxql.CreateContinuousQueryStatement:
+		return e.executeCreateContinuousQueryStatement(stmt)
+	case *influxql.DropContinuousQueryStatement:
+		return e.executeDropContinuousQueryStatement(stmt)
+	case *influxql.ShowContinuousQueriesStatement:
+		return e.executeShowContinuousQueriesStatement(stmt)
+	case *influxql.ShowStatsStatement:
+		return e.executeShowStatsStatement(stmt)
+	default:
+		panic(fmt.Sprintf("unsupported statement type: %T", stmt))
+	}
+}
+
+func (e *StatementExecutor) executeCreateDatabaseStatement(q *influxql.CreateDatabaseStatement) *influxql.Result {
+	_, err := e.Store.CreateDatabase(q.Name)
+	return &influxql.Result{Err: err}
+}
+
+func (e *StatementExecutor) executeDropDatabaseStatement(q *influxql.DropDatabaseStatement) *influxql.Result {
+	return &influxql.Result{Err: e.Store.DropDatabase(q.Name)}
+}
+
+func (e *StatementExecutor) executeShowDatabasesStatement(q *influxql.ShowDatabasesStatement) *influxql.Result {
+	dis, err := e.Store.Databases()
+	if err != nil {
+		return &influxql.Result{Err: err}
+	}
+
+	row := &influxql.Row{Name: "databases", Columns: []string{"name"}}
+	for _, di := range dis {
+		row.Values = append(row.Values, []interface{}{di.Name})
+	}
+	return &influxql.Result{Series: []*influxql.Row{row}}
+}
+
+func (e *StatementExecutor) executeShowGrantsForUserStatement(q *influxql.ShowGrantsForUserStatement) *influxql.Result {
+	priv, err := e.Store.UserPrivileges(q.Name)
+	if err != nil {
+		return &influxql.Result{Err: err}
+	}
+
+	row := &influxql.Row{Columns: []string{"database", "privilege"}}
+	for d, p := range priv {
+		row.Values = append(row.Values, []interface{}{d, p.String()})
+	}
+	return &influxql.Result{Series: []*influxql.Row{row}}
+}
+
+func (e *StatementExecutor) executeShowServersStatement(q *influxql.ShowServersStatement) *influxql.Result {
+	nis, err := e.Store.Nodes()
+	if err != nil {
+		return &influxql.Result{Err: err}
+	}
+
+	row := &influxql.Row{Columns: []string{"id", "url"}}
+	for _, ni := range nis {
+		row.Values = append(row.Values, []interface{}{ni.ID, "http://" + ni.Host})
+	}
+	return &influxql.Result{Series: []*influxql.Row{row}}
+}
+
+func (e *StatementExecutor) executeCreateUserStatement(q *influxql.CreateUserStatement) *influxql.Result {
+	_, err := e.Store.CreateUser(q.Name, q.Password, q.Admin)
+	return &influxql.Result{Err: err}
+}
+
+func (e *StatementExecutor) executeSetPasswordUserStatement(q *influxql.SetPasswordUserStatement) *influxql.Result {
+	return &influxql.Result{Err: e.Store.UpdateUser(q.Name, q.Password)}
+}
+
+func (e *StatementExecutor) executeDropUserStatement(q *influxql.DropUserStatement) *influxql.Result {
+	return &influxql.Result{Err: e.Store.DropUser(q.Name)}
+}
+
+func (e *StatementExecutor) executeShowUsersStatement(q *influxql.ShowUsersStatement) *influxql.Result {
+	uis, err := e.Store.Users()
+	if err != nil {
+		return &influxql.Result{Err: err}
+	}
+
+	row := &influxql.Row{Columns: []string{"user", "admin"}}
+	for _, ui := range uis {
+		row.Values = append(row.Values, []interface{}{ui.Name, ui.Admin})
+	}
+	return &influxql.Result{Series: []*influxql.Row{row}}
+}
+
+func (e *StatementExecutor) executeGrantStatement(stmt *influxql.GrantStatement) *influxql.Result {
+	return &influxql.Result{Err: e.Store.SetPrivilege(stmt.User, stmt.On, stmt.Privilege)}
+}
+
+func (e *StatementExecutor) executeGrantAdminStatement(stmt *influxql.GrantAdminStatement) *influxql.Result {
+	return &influxql.Result{Err: e.Store.SetAdminPrivilege(stmt.User, true)}
+}
+
+func (e *StatementExecutor) executeRevokeStatement(stmt *influxql.RevokeStatement) *influxql.Result {
+	priv := influxql.NoPrivileges
+
+	// Revoking all privileges means there's no need to look at existing user privileges.
+	if stmt.Privilege != influxql.AllPrivileges {
+		p, err := e.Store.UserPrivilege(stmt.User, stmt.On)
+		if err != nil {
+			return &influxql.Result{Err: err}
+		}
+		// Bit clear (AND NOT) the user's privilege with the revoked privilege.
+		priv = *p &^ stmt.Privilege
+	}
+
+	return &influxql.Result{Err: e.Store.SetPrivilege(stmt.User, stmt.On, priv)}
+}
+
+func (e *StatementExecutor) executeRevokeAdminStatement(stmt *influxql.RevokeAdminStatement) *influxql.Result {
+	return &influxql.Result{Err: e.Store.SetAdminPrivilege(stmt.User, false)}
+}
+
+func (e *StatementExecutor) executeCreateRetentionPolicyStatement(stmt *influxql.CreateRetentionPolicyStatement) *influxql.Result {
+	rpi := NewRetentionPolicyInfo(stmt.Name)
+	rpi.Duration = stmt.Duration
+	rpi.ReplicaN = stmt.Replication
+
+	// Create new retention policy.
+	_, err := e.Store.CreateRetentionPolicy(stmt.Database, rpi)
+	if err != nil {
+		return &influxql.Result{Err: err}
+	}
+
+	// If requested, set new policy as the default.
+	if stmt.Default {
+		err = e.Store.SetDefaultRetentionPolicy(stmt.Database, stmt.Name)
+	}
+
+	return &influxql.Result{Err: err}
+}
+
+func (e *StatementExecutor) executeAlterRetentionPolicyStatement(stmt *influxql.AlterRetentionPolicyStatement) *influxql.Result {
+	rpu := &RetentionPolicyUpdate{
+		Duration: stmt.Duration,
+		ReplicaN: stmt.Replication,
+	}
+
+	// Update the retention policy.
+	err := e.Store.UpdateRetentionPolicy(stmt.Database, stmt.Name, rpu)
+	if err != nil {
+		return &influxql.Result{Err: err}
+	}
+
+	// If requested, set as default retention policy.
+	if stmt.Default {
+		err = e.Store.SetDefaultRetentionPolicy(stmt.Database, stmt.Name)
+	}
+
+	return &influxql.Result{Err: err}
+}
+
+func (e *StatementExecutor) executeDropRetentionPolicyStatement(q *influxql.DropRetentionPolicyStatement) *influxql.Result {
+	return &influxql.Result{Err: e.Store.DropRetentionPolicy(q.Database, q.Name)}
+}
+
+func (e *StatementExecutor) executeShowRetentionPoliciesStatement(q *influxql.ShowRetentionPoliciesStatement) *influxql.Result {
+	di, err := e.Store.Database(q.Database)
+	if err != nil {
+		return &influxql.Result{Err: err}
+	} else if di == nil {
+		return &influxql.Result{Err: ErrDatabaseNotFound}
+	}
+
+	row := &influxql.Row{Columns: []string{"name", "duration", "replicaN", "default"}}
+	for _, rpi := range di.RetentionPolicies {
+		row.Values = append(row.Values, []interface{}{rpi.Name, rpi.Duration.String(), rpi.ReplicaN, di.DefaultRetentionPolicy == rpi.Name})
+	}
+	return &influxql.Result{Series: []*influxql.Row{row}}
+}
+
+func (e *StatementExecutor) executeCreateContinuousQueryStatement(q *influxql.CreateContinuousQueryStatement) *influxql.Result {
+	return &influxql.Result{
+		Err: e.Store.CreateContinuousQuery(q.Database, q.Name, q.String()),
+	}
+}
+
+func (e *StatementExecutor) executeDropContinuousQueryStatement(q *influxql.DropContinuousQueryStatement) *influxql.Result {
+	return &influxql.Result{
+		Err: e.Store.DropContinuousQuery(q.Database, q.Name),
+	}
+}
+
+func (e *StatementExecutor) executeShowContinuousQueriesStatement(stmt *influxql.ShowContinuousQueriesStatement) *influxql.Result {
+	dis, err := e.Store.Databases()
+	if err != nil {
+		return &influxql.Result{Err: err}
+	}
+
+	rows := []*influxql.Row{}
+	for _, di := range dis {
+		row := &influxql.Row{Columns: []string{"name", "query"}, Name: di.Name}
+		for _, cqi := range di.ContinuousQueries {
+			row.Values = append(row.Values, []interface{}{cqi.Name, cqi.Query})
+		}
+		rows = append(rows, row)
+	}
+	return &influxql.Result{Series: rows}
+}
+
+func (e *StatementExecutor) executeShowStatsStatement(stmt *influxql.ShowStatsStatement) *influxql.Result {
+	return &influxql.Result{Err: fmt.Errorf("SHOW STATS is not implemented yet")}
+}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/store.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/store.go
new file mode 100644
index 00000000000..2a0c6fc7368
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/store.go
@@ -0,0 +1,1876 @@
+package meta
+
+import (
+	"bytes"
+	crand "crypto/rand"
+	"crypto/sha256"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"log"
+	"math/rand"
+	"net"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/gogo/protobuf/proto"
+	"github.com/hashicorp/raft"
+	"github.com/hashicorp/raft-boltdb"
+	"github.com/influxdb/influxdb/influxql"
+	"github.com/influxdb/influxdb/meta/internal"
+	"golang.org/x/crypto/bcrypt"
+)
+
+// tcp.Mux header bytes.
+const (
+	MuxRaftHeader = 0
+	MuxExecHeader = 1
+
+	// SaltBytes is the number of bytes used for salts
+	SaltBytes = 32
+)
+
+// ExecMagic is the first 4 bytes sent to a remote exec connection to verify
+// that it is coming from a remote exec client connection.
+const ExecMagic = "EXEC"
+
+// Retention policy settings.
+const (
+	AutoCreateRetentionPolicyName   = "default"
+	AutoCreateRetentionPolicyPeriod = 0
+	RetentionPolicyMinDuration      = time.Hour
+)
+
+// Raft configuration.
+const (
+	raftLogCacheSize      = 512
+	raftSnapshotsRetained = 2
+	raftTransportMaxPool  = 3
+	raftTransportTimeout  = 10 * time.Second
+)
+
+// Store represents a raft-backed metastore.
+type Store struct {
+	mu     sync.RWMutex
+	path   string
+	opened bool
+
+	id uint64 // local node id
+
+	// All peers in cluster. Used during bootstrapping.
+	peers []string
+
+	data *Data
+
+	remoteAddr net.Addr
+	raft       *raft.Raft
+	raftLayer  *raftLayer
+	peerStore  raft.PeerStore
+	transport  *raft.NetworkTransport
+	store      *raftboltdb.BoltStore
+
+	ready   chan struct{}
+	err     chan error
+	closing chan struct{}
+	wg      sync.WaitGroup
+
+	retentionAutoCreate bool
+
+	// The listeners to accept raft and remote exec connections from.
+	RaftListener net.Listener
+	ExecListener net.Listener
+
+	// The advertised hostname of the store.
+	Addr net.Addr
+
+	// The amount of time before a follower starts a new election.
+	HeartbeatTimeout time.Duration
+
+	// The amount of time before a candidate starts a new election.
+	ElectionTimeout time.Duration
+
+	// The amount of time without communication to the cluster before a
+	// leader steps down to a follower state.
+	LeaderLeaseTimeout time.Duration
+
+	// The amount of time without an apply before sending a heartbeat.
+	CommitTimeout time.Duration
+
+	// Authentication cache.
+	authCache map[string]authUser
+
+	// hashPassword generates a cryptographically secure hash for password.
+	// Returns an error if the password is invalid or a hash cannot be generated.
+	hashPassword HashPasswordFn
+
+	Logger *log.Logger
+}
+
+type authUser struct {
+	salt []byte
+	hash []byte
+}
+
+// NewStore returns a new instance of Store.
+func NewStore(c Config) *Store {
+	return &Store{
+		path:  c.Dir,
+		peers: c.Peers,
+		data:  &Data{},
+
+		ready:   make(chan struct{}),
+		err:     make(chan error),
+		closing: make(chan struct{}),
+
+		retentionAutoCreate: c.RetentionAutoCreate,
+
+		HeartbeatTimeout:   time.Duration(c.HeartbeatTimeout),
+		ElectionTimeout:    time.Duration(c.ElectionTimeout),
+		LeaderLeaseTimeout: time.Duration(c.LeaderLeaseTimeout),
+		CommitTimeout:      time.Duration(c.CommitTimeout),
+		authCache:          make(map[string]authUser, 0),
+		hashPassword: func(password string) ([]byte, error) {
+			return bcrypt.GenerateFromPassword([]byte(password), BcryptCost)
+		},
+		Logger: log.New(os.Stderr, "[metastore] ", log.LstdFlags),
+	}
+}
+
+// Path returns the root path when open.
+// Returns an empty string when the store is closed.
+func (s *Store) Path() string { return s.path }
+
+// IDPath returns the path to the local node ID file.
+func (s *Store) IDPath() string { return filepath.Join(s.path, "id") }
+
+// Open opens and initializes the raft store.
+func (s *Store) Open() error {
+	// Verify that no more than 3 peers.
+	// https://github.com/influxdb/influxdb/issues/2750
+	if len(s.peers) > 3 {
+		return ErrTooManyPeers
+	}
+
+	// Verify listeners are set.
+	if s.RaftListener == nil {
+		panic("Store.RaftListener not set")
+	} else if s.ExecListener == nil {
+		panic("Store.ExecListener not set")
+	}
+
+	if err := func() error {
+		s.mu.Lock()
+		defer s.mu.Unlock()
+
+		// Check if store has already been opened.
+		if s.opened {
+			return ErrStoreOpen
+		}
+		s.opened = true
+
+		// Create the root directory if it doesn't already exist.
+		if err := os.MkdirAll(s.path, 0777); err != nil {
+			return fmt.Errorf("mkdir all: %s", err)
+		}
+
+		// Open the raft store.
+		if err := s.openRaft(); err != nil {
+			return fmt.Errorf("raft: %s", err)
+		}
+
+		// Initialize the store, if necessary.
+		if err := s.initialize(); err != nil {
+			return fmt.Errorf("initialize raft: %s", err)
+		}
+
+		// Load existing ID, if exists.
+		if err := s.readID(); err != nil {
+			return fmt.Errorf("read id: %s", err)
+		}
+
+		return nil
+	}(); err != nil {
+		s.close()
+		return err
+	}
+
+	// Begin serving listener.
+	s.wg.Add(1)
+	go s.serveExecListener()
+
+	// If the ID doesn't exist then create a new node.
+	if s.id == 0 {
+		go s.init()
+	} else {
+		close(s.ready)
+	}
+
+	return nil
+}
+
+// openRaft initializes the raft store.
+func (s *Store) openRaft() error {
+	// Setup raft configuration.
+	config := raft.DefaultConfig()
+	config.Logger = s.Logger
+	config.HeartbeatTimeout = s.HeartbeatTimeout
+	config.ElectionTimeout = s.ElectionTimeout
+	config.LeaderLeaseTimeout = s.LeaderLeaseTimeout
+	config.CommitTimeout = s.CommitTimeout
+
+	// If no peers are set in the config then start as a single server.
+	config.EnableSingleNode = (len(s.peers) == 0)
+
+	// Build raft layer to multiplex listener.
+	s.raftLayer = newRaftLayer(s.RaftListener, s.Addr)
+
+	// Create a transport layer
+	s.transport = raft.NewNetworkTransport(s.raftLayer, 3, 10*time.Second, os.Stderr)
+
+	// Create peer storage.
+	s.peerStore = raft.NewJSONPeers(s.path, s.transport)
+
+	// Create the log store and stable store.
+	store, err := raftboltdb.NewBoltStore(filepath.Join(s.path, "raft.db"))
+	if err != nil {
+		return fmt.Errorf("new bolt store: %s", err)
+	}
+	s.store = store
+
+	// Create the snapshot store.
+	snapshots, err := raft.NewFileSnapshotStore(s.path, raftSnapshotsRetained, os.Stderr)
+	if err != nil {
+		return fmt.Errorf("file snapshot store: %s", err)
+	}
+
+	// Create raft log.
+	r, err := raft.NewRaft(config, (*storeFSM)(s), store, store, snapshots, s.peerStore, s.transport)
+	if err != nil {
+		return fmt.Errorf("new raft: %s", err)
+	}
+	s.raft = r
+
+	return nil
+}
+
+// initialize attempts to bootstrap the raft store if there are no committed entries.
+func (s *Store) initialize() error {
+	// If we have committed entries then the store is already in the cluster.
+	/*
+		if index, err := s.store.LastIndex(); err != nil {
+			return fmt.Errorf("last index: %s", err)
+		} else if index > 0 {
+			return nil
+		}
+	*/
+
+	// Force set peers.
+	if err := s.SetPeers(s.peers); err != nil {
+		return fmt.Errorf("set raft peers: %s", err)
+	}
+
+	return nil
+}
+
+// Close closes the store and shuts down the node in the cluster.
+func (s *Store) Close() error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return s.close()
+}
+
+func (s *Store) close() error {
+	// Check if store has already been closed.
+	if !s.opened {
+		return ErrStoreClosed
+	}
+	s.opened = false
+
+	// Notify goroutines of close.
+	close(s.closing)
+	// FIXME(benbjohnson): s.wg.Wait()
+
+	// Shutdown raft.
+	if s.raft != nil {
+		s.raft.Shutdown()
+		s.raft = nil
+	}
+	if s.transport != nil {
+		s.transport.Close()
+		s.transport = nil
+	}
+	if s.store != nil {
+		s.store.Close()
+		s.store = nil
+	}
+
+	return nil
+}
+
+// readID reads the local node ID from the ID file.
+func (s *Store) readID() error {
+	b, err := ioutil.ReadFile(s.IDPath())
+	if os.IsNotExist(err) {
+		s.id = 0
+		return nil
+	} else if err != nil {
+		return fmt.Errorf("read file: %s", err)
+	}
+
+	id, err := strconv.ParseUint(string(b), 10, 64)
+	if err != nil {
+		return fmt.Errorf("parse id: %s", err)
+	}
+	s.id = id
+
+	s.Logger.Printf("read local node id: %d", s.id)
+
+	return nil
+}
+
+// init initializes the store in a separate goroutine.
+// This occurs when the store first creates or joins a cluster.
+// The ready channel is closed once the store is initialized.
+func (s *Store) init() {
+	// Create a node for this store.
+	if err := s.createLocalNode(); err != nil {
+		s.err <- fmt.Errorf("create local node: %s", err)
+		return
+	}
+
+	// Notify the ready channel.
+	close(s.ready)
+}
+
+// createLocalNode creates the node for this local instance.
+// Writes the id of the node to file on success.
+func (s *Store) createLocalNode() error {
+	// Wait for leader.
+	if err := s.WaitForLeader(0); err != nil {
+		return fmt.Errorf("wait for leader: %s", err)
+	}
+
+	// Create new node.
+	ni, err := s.CreateNode(s.Addr.String())
+	if err != nil {
+		return fmt.Errorf("create node: %s", err)
+	}
+
+	// Write node id to file.
+	if err := ioutil.WriteFile(s.IDPath(), []byte(strconv.FormatUint(ni.ID, 10)), 0666); err != nil {
+		return fmt.Errorf("write file: %s", err)
+	}
+
+	// Set ID locally.
+	s.id = ni.ID
+
+	s.Logger.Printf("created local node: id=%d, host=%s", s.id, s.Addr.String())
+
+	return nil
+}
+
+// Snapshot saves a snapshot of the current state.
+func (s *Store) Snapshot() error {
+	future := s.raft.Snapshot()
+	return future.Error()
+}
+
+// WaitForLeader sleeps until a leader is found or a timeout occurs.
+// timeout == 0 means to wait forever.
+func (s *Store) WaitForLeader(timeout time.Duration) error {
+	if s.raft.Leader() != "" {
+		return nil
+	}
+
+	// Begin timeout timer.
+	timer := time.NewTimer(timeout)
+	defer timer.Stop()
+
+	// Continually check for leader until timeout.
+	ticker := time.NewTicker(100 * time.Millisecond)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-s.closing:
+			return errors.New("closing")
+		case <-timer.C:
+			if timeout != 0 {
+				return errors.New("timeout")
+			}
+		case <-ticker.C:
+			if s.raft.Leader() != "" {
+				return nil
+			}
+		}
+	}
+}
+
+// Ready returns a channel that is closed once the store is initialized.
+func (s *Store) Ready() <-chan struct{} { return s.ready }
+
+// Err returns a channel for all out-of-band errors.
+func (s *Store) Err() <-chan error { return s.err }
+
+// IsLeader returns true if the store is currently the leader.
+func (s *Store) IsLeader() bool {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	if s.raft == nil {
+		return false
+	}
+	return s.raft.State() == raft.Leader
+}
+
+// Leader returns what the store thinks is the current leader. An empty
+// string indicates no leader exists.
+func (s *Store) Leader() string {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	if s.raft == nil {
+		return ""
+	}
+	return s.raft.Leader()
+}
+
+// LeaderCh returns a channel that notifies on leadership change.
+// Panics when the store has not been opened yet.
+func (s *Store) LeaderCh() <-chan bool {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	assert(s.raft != nil, "cannot retrieve leadership channel when closed")
+	return s.raft.LeaderCh()
+}
+
+// SetPeers sets a list of peers in the cluster.
+func (s *Store) SetPeers(addrs []string) error {
+	a := make([]string, len(addrs))
+	for i, s := range addrs {
+		addr, err := net.ResolveTCPAddr("tcp", s)
+		if err != nil {
+			return fmt.Errorf("cannot resolve addr: %s, err=%s", s, err)
+		}
+		a[i] = addr.String()
+	}
+	return s.raft.SetPeers(a).Error()
+}
+
+// serveExecListener processes remote exec connections.
+// This function runs in a separate goroutine.
+func (s *Store) serveExecListener() {
+	defer s.wg.Done()
+
+	for {
+		// Accept next TCP connection.
+		conn, err := s.ExecListener.Accept()
+		if err != nil {
+			if strings.Contains(err.Error(), "connection closed") {
+				return
+			} else {
+				s.Logger.Printf("temporary accept error: %s", err)
+				continue
+			}
+		}
+
+		// Handle connection in a separate goroutine.
+		s.wg.Add(1)
+		go s.handleExecConn(conn)
+	}
+}
+
+// handleExecConn reads a command from the connection and executes it.
+func (s *Store) handleExecConn(conn net.Conn) {
+	defer s.wg.Done()
+
+	// Read and execute command.
+	err := func() error {
+		// Read marker message.
+		b := make([]byte, 4)
+		if _, err := io.ReadFull(conn, b); err != nil {
+			return fmt.Errorf("read magic: %s", err)
+		} else if string(b) != ExecMagic {
+			return fmt.Errorf("invalid exec magic: %q", string(b))
+		}
+
+		// Read command size.
+		var sz uint64
+		if err := binary.Read(conn, binary.BigEndian, &sz); err != nil {
+			return fmt.Errorf("read size: %s", err)
+		}
+
+		// Read command.
+		buf := make([]byte, sz)
+		if _, err := io.ReadFull(conn, buf); err != nil {
+			return fmt.Errorf("read command: %s", err)
+		}
+
+		// Ensure command can be deserialized before applying.
+		if err := proto.Unmarshal(buf, &internal.Command{}); err != nil {
+			return fmt.Errorf("unable to unmarshal command: %s", err)
+		}
+
+		// Apply against the raft log.
+		if err := s.apply(buf); err != nil {
+			return fmt.Errorf("apply: %s", err)
+		}
+		return nil
+	}()
+
+	// Build response message.
+	var resp internal.Response
+	resp.OK = proto.Bool(err == nil)
+	resp.Index = proto.Uint64(s.raft.LastIndex())
+	if err != nil {
+		resp.Error = proto.String(err.Error())
+	}
+
+	// Encode response back to connection.
+	if b, err := proto.Marshal(&resp); err != nil {
+		panic(err)
+	} else if err = binary.Write(conn, binary.BigEndian, uint64(len(b))); err != nil {
+		s.Logger.Printf("unable to write exec response size: %s", err)
+	} else if _, err = conn.Write(b); err != nil {
+		s.Logger.Printf("unable to write exec response: %s", err)
+	}
+	conn.Close()
+}
+
+// MarshalBinary encodes the store's data to a binary protobuf format.
+func (s *Store) MarshalBinary() ([]byte, error) {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	return s.data.MarshalBinary()
+}
+
+// ClusterID returns the unique identifier for the cluster.
+// This is generated once a node has been created.
+func (s *Store) ClusterID() (id uint64, err error) {
+	err = s.read(func(data *Data) error {
+		id = data.ClusterID
+		return nil
+	})
+	return
+}
+
+// NodeID returns the identifier for the local node.
+// Panics if the node has not joined the cluster.
+func (s *Store) NodeID() uint64 { return s.id }
+
+// Node returns a node by id.
+func (s *Store) Node(id uint64) (ni *NodeInfo, err error) {
+	err = s.read(func(data *Data) error {
+		ni = data.Node(id)
+		if ni == nil {
+			return errInvalidate
+		}
+		return nil
+	})
+	return
+}
+
+// NodeByHost returns a node by hostname.
+func (s *Store) NodeByHost(host string) (ni *NodeInfo, err error) {
+	err = s.read(func(data *Data) error {
+		ni = data.NodeByHost(host)
+		if ni == nil {
+			return errInvalidate
+		}
+		return nil
+	})
+	return
+}
+
+// Nodes returns a list of all nodes.
+func (s *Store) Nodes() (a []NodeInfo, err error) {
+	err = s.read(func(data *Data) error {
+		a = data.Nodes
+		return nil
+	})
+	return
+}
+
+// CreateNode creates a new node in the store.
+func (s *Store) CreateNode(host string) (*NodeInfo, error) {
+	if err := s.exec(internal.Command_CreateNodeCommand, internal.E_CreateNodeCommand_Command,
+		&internal.CreateNodeCommand{
+			Host: proto.String(host),
+			Rand: proto.Uint64(uint64(rand.Int63())),
+		},
+	); err != nil {
+		return nil, err
+	}
+	return s.NodeByHost(host)
+}
+
+// DeleteNode removes a node from the metastore by id.
+func (s *Store) DeleteNode(id uint64) error {
+	return s.exec(internal.Command_DeleteNodeCommand, internal.E_DeleteNodeCommand_Command,
+		&internal.DeleteNodeCommand{
+			ID: proto.Uint64(id),
+		},
+	)
+}
+
+// Database returns a database by name.
+func (s *Store) Database(name string) (di *DatabaseInfo, err error) {
+	err = s.read(func(data *Data) error {
+		di = data.Database(name)
+		if di == nil {
+			return errInvalidate
+		}
+		return nil
+	})
+	return
+}
+
+// Databases returns a list of all databases.
+func (s *Store) Databases() (dis []DatabaseInfo, err error) {
+	err = s.read(func(data *Data) error {
+		dis = data.Databases
+		return nil
+	})
+	return
+}
+
+// CreateDatabase creates a new database in the store.
+func (s *Store) CreateDatabase(name string) (*DatabaseInfo, error) {
+	if err := s.exec(internal.Command_CreateDatabaseCommand, internal.E_CreateDatabaseCommand_Command,
+		&internal.CreateDatabaseCommand{
+			Name: proto.String(name),
+		},
+	); err != nil {
+		return nil, err
+	}
+
+	if s.retentionAutoCreate {
+		// Read node count.
+		// Retention policies must be fully replicated.
+		var nodeN int
+		if err := s.read(func(data *Data) error {
+			nodeN = len(data.Nodes)
+			return nil
+		}); err != nil {
+			return nil, fmt.Errorf("read: %s", err)
+		}
+
+		// Create a retention policy.
+		rpi := NewRetentionPolicyInfo(AutoCreateRetentionPolicyName)
+		rpi.ReplicaN = nodeN
+		rpi.Duration = AutoCreateRetentionPolicyPeriod
+		if _, err := s.CreateRetentionPolicy(name, rpi); err != nil {
+			return nil, err
+		}
+
+		// Set it as the default retention policy.
+		if err := s.SetDefaultRetentionPolicy(name, AutoCreateRetentionPolicyName); err != nil {
+			return nil, err
+		}
+	}
+
+	return s.Database(name)
+}
+
+// CreateDatabaseIfNotExists creates a new database in the store if it doesn't already exist.
+func (s *Store) CreateDatabaseIfNotExists(name string) (*DatabaseInfo, error) {
+	// Try to find database locally first.
+	if di, err := s.Database(name); err != nil {
+		return nil, err
+	} else if di != nil {
+		return di, nil
+	}
+
+	// Attempt to create database.
+	if di, err := s.CreateDatabase(name); err == ErrDatabaseExists {
+		return s.Database(name)
+	} else {
+		return di, err
+	}
+}
+
+// DropDatabase removes a database from the metastore by name.
+func (s *Store) DropDatabase(name string) error {
+	return s.exec(internal.Command_DropDatabaseCommand, internal.E_DropDatabaseCommand_Command,
+		&internal.DropDatabaseCommand{
+			Name: proto.String(name),
+		},
+	)
+}
+
+// RetentionPolicy returns a retention policy for a database by name.
+func (s *Store) RetentionPolicy(database, name string) (rpi *RetentionPolicyInfo, err error) {
+	err = s.read(func(data *Data) error {
+		rpi, err = data.RetentionPolicy(database, name)
+		if err != nil {
+			return err
+		} else if rpi == nil {
+			return errInvalidate
+		}
+		return nil
+	})
+	return
+}
+
+// DefaultRetentionPolicy returns the default retention policy for a database.
+func (s *Store) DefaultRetentionPolicy(database string) (rpi *RetentionPolicyInfo, err error) {
+	err = s.read(func(data *Data) error {
+		di := data.Database(database)
+		if di == nil {
+			return ErrDatabaseNotFound
+		}
+
+		for i := range di.RetentionPolicies {
+			if di.RetentionPolicies[i].Name == di.DefaultRetentionPolicy {
+				rpi = &di.RetentionPolicies[i]
+				return nil
+			}
+		}
+		return errInvalidate
+	})
+	return
+}
+
+// RetentionPolicies returns a list of all retention policies for a database.
+func (s *Store) RetentionPolicies(database string) (a []RetentionPolicyInfo, err error) {
+	err = s.read(func(data *Data) error {
+		di := data.Database(database)
+		if di != nil {
+			return ErrDatabaseNotFound
+		}
+		a = di.RetentionPolicies
+		return nil
+	})
+	return
+}
+
+// CreateRetentionPolicy creates a new retention policy for a database.
+func (s *Store) CreateRetentionPolicy(database string, rpi *RetentionPolicyInfo) (*RetentionPolicyInfo, error) {
+	if rpi.Duration < RetentionPolicyMinDuration && rpi.Duration != 0 {
+		return nil, ErrRetentionPolicyDurationTooLow
+	}
+	if err := s.exec(internal.Command_CreateRetentionPolicyCommand, internal.E_CreateRetentionPolicyCommand_Command,
+		&internal.CreateRetentionPolicyCommand{
+			Database:        proto.String(database),
+			RetentionPolicy: rpi.marshal(),
+		},
+	); err != nil {
+		return nil, err
+	}
+
+	return s.RetentionPolicy(database, rpi.Name)
+}
+
+// CreateRetentionPolicyIfNotExists creates a new policy in the store if it doesn't already exist.
+func (s *Store) CreateRetentionPolicyIfNotExists(database string, rpi *RetentionPolicyInfo) (*RetentionPolicyInfo, error) {
+	// Try to find policy locally first.
+	if rpi, err := s.RetentionPolicy(database, rpi.Name); err != nil {
+		return nil, err
+	} else if rpi != nil {
+		return rpi, nil
+	}
+
+	// Attempt to create policy.
+	if other, err := s.CreateRetentionPolicy(database, rpi); err == ErrRetentionPolicyExists {
+		return s.RetentionPolicy(database, rpi.Name)
+	} else {
+		return other, err
+	}
+}
+
+// SetDefaultRetentionPolicy sets the default retention policy for a database.
+func (s *Store) SetDefaultRetentionPolicy(database, name string) error {
+	return s.exec(internal.Command_SetDefaultRetentionPolicyCommand, internal.E_SetDefaultRetentionPolicyCommand_Command,
+		&internal.SetDefaultRetentionPolicyCommand{
+			Database: proto.String(database),
+			Name:     proto.String(name),
+		},
+	)
+}
+
+// UpdateRetentionPolicy updates an existing retention policy.
+func (s *Store) UpdateRetentionPolicy(database, name string, rpu *RetentionPolicyUpdate) error {
+	var newName *string
+	if rpu.Name != nil {
+		newName = rpu.Name
+	}
+
+	var duration *int64
+	if rpu.Duration != nil {
+		value := int64(*rpu.Duration)
+		duration = &value
+	}
+
+	var replicaN *uint32
+	if rpu.ReplicaN != nil {
+		value := uint32(*rpu.ReplicaN)
+		replicaN = &value
+	}
+
+	return s.exec(internal.Command_UpdateRetentionPolicyCommand, internal.E_UpdateRetentionPolicyCommand_Command,
+		&internal.UpdateRetentionPolicyCommand{
+			Database: proto.String(database),
+			Name:     proto.String(name),
+			NewName:  newName,
+			Duration: duration,
+			ReplicaN: replicaN,
+		},
+	)
+}
+
+// DropRetentionPolicy removes a policy from a database by name.
+func (s *Store) DropRetentionPolicy(database, name string) error {
+	return s.exec(internal.Command_DropRetentionPolicyCommand, internal.E_DropRetentionPolicyCommand_Command,
+		&internal.DropRetentionPolicyCommand{
+			Database: proto.String(database),
+			Name:     proto.String(name),
+		},
+	)
+}
+
+// FIX: CreateRetentionPolicyIfNotExists(database string, rp *RetentionPolicyInfo) (*RetentionPolicyInfo, error)
+
+// CreateShardGroup creates a new shard group in a retention policy for a given time.
+func (s *Store) CreateShardGroup(database, policy string, timestamp time.Time) (*ShardGroupInfo, error) {
+	if err := s.exec(internal.Command_CreateShardGroupCommand, internal.E_CreateShardGroupCommand_Command,
+		&internal.CreateShardGroupCommand{
+			Database:  proto.String(database),
+			Policy:    proto.String(policy),
+			Timestamp: proto.Int64(timestamp.UnixNano()),
+		},
+	); err != nil {
+		return nil, err
+	}
+
+	return s.ShardGroupByTimestamp(database, policy, timestamp)
+}
+
+// CreateShardGroupIfNotExists creates a new shard group if one doesn't already exist.
+func (s *Store) CreateShardGroupIfNotExists(database, policy string, timestamp time.Time) (*ShardGroupInfo, error) {
+	// Try to find shard group locally first.
+	if sgi, err := s.ShardGroupByTimestamp(database, policy, timestamp); err != nil {
+		return nil, err
+	} else if sgi != nil && !sgi.Deleted() {
+		return sgi, nil
+	}
+
+	// Attempt to create database.
+	if sgi, err := s.CreateShardGroup(database, policy, timestamp); err == ErrShardGroupExists {
+		return s.ShardGroupByTimestamp(database, policy, timestamp)
+	} else {
+		return sgi, err
+	}
+}
+
+// DeleteShardGroup removes an existing shard group from a policy by ID.
+func (s *Store) DeleteShardGroup(database, policy string, id uint64) error {
+	return s.exec(internal.Command_DeleteShardGroupCommand, internal.E_DeleteShardGroupCommand_Command,
+		&internal.DeleteShardGroupCommand{
+			Database:     proto.String(database),
+			Policy:       proto.String(policy),
+			ShardGroupID: proto.Uint64(id),
+		},
+	)
+}
+
+// ShardGroups returns a list of all shard groups for a policy by timestamp.
+func (s *Store) ShardGroups(database, policy string) (a []ShardGroupInfo, err error) {
+	err = s.read(func(data *Data) error {
+		a, err = data.ShardGroups(database, policy)
+		if err != nil {
+			return err
+		}
+		return nil
+	})
+	return
+}
+
+// ShardGroupsByTimeRange returns a slice of ShardGroups that may contain data for the given time range. ShardGroups
+// are sorted by start time.
+func (s *Store) ShardGroupsByTimeRange(database, policy string, tmin, tmax time.Time) (a []ShardGroupInfo, err error) {
+	err = s.read(func(data *Data) error {
+		a, err = data.ShardGroupsByTimeRange(database, policy, tmin, tmax)
+		if err != nil {
+			return err
+		} else if a == nil {
+			return errInvalidate
+		}
+		return nil
+	})
+	return
+}
+
+// VisitRetentionPolicies calls the given function with full retention policy details.
+func (s *Store) VisitRetentionPolicies(f func(d DatabaseInfo, r RetentionPolicyInfo)) {
+	s.read(func(data *Data) error {
+		for _, di := range data.Databases {
+			for _, rp := range di.RetentionPolicies {
+				f(di, rp)
+			}
+		}
+		return nil
+	})
+	return
+}
+
+// ShardGroupByTimestamp returns a shard group for a policy by timestamp.
+func (s *Store) ShardGroupByTimestamp(database, policy string, timestamp time.Time) (sgi *ShardGroupInfo, err error) {
+	err = s.read(func(data *Data) error {
+		sgi, err = data.ShardGroupByTimestamp(database, policy, timestamp)
+		if err != nil {
+			return err
+		} else if sgi == nil {
+			return errInvalidate
+		}
+		return nil
+	})
+	return
+}
+
+func (s *Store) ShardOwner(shardID uint64) (database, policy string, sgi *ShardGroupInfo) {
+	s.read(func(data *Data) error {
+		for _, dbi := range data.Databases {
+			for _, rpi := range dbi.RetentionPolicies {
+				for _, g := range rpi.ShardGroups {
+					if g.Deleted() {
+						continue
+					}
+
+					for _, sh := range g.Shards {
+						if sh.ID == shardID {
+							database = dbi.Name
+							policy = rpi.Name
+							sgi = &g
+							return nil
+						}
+					}
+				}
+			}
+		}
+		return errInvalidate
+	})
+	return
+}
+
+// CreateContinuousQuery creates a new continuous query on the store.
+func (s *Store) CreateContinuousQuery(database, name, query string) error {
+	return s.exec(internal.Command_CreateContinuousQueryCommand, internal.E_CreateContinuousQueryCommand_Command,
+		&internal.CreateContinuousQueryCommand{
+			Database: proto.String(database),
+			Name:     proto.String(name),
+			Query:    proto.String(query),
+		},
+	)
+}
+
+// DropContinuousQuery removes a continuous query from the store.
+func (s *Store) DropContinuousQuery(database, name string) error {
+	return s.exec(internal.Command_DropContinuousQueryCommand, internal.E_DropContinuousQueryCommand_Command,
+		&internal.DropContinuousQueryCommand{
+			Database: proto.String(database),
+			Name:     proto.String(name),
+		},
+	)
+}
+
+// User returns a user by name.
+func (s *Store) User(name string) (ui *UserInfo, err error) {
+	err = s.read(func(data *Data) error {
+		ui = data.User(name)
+		if ui == nil {
+			return errInvalidate
+		}
+		return nil
+	})
+	return
+}
+
+// Users returns a list of all users.
+func (s *Store) Users() (a []UserInfo, err error) {
+	err = s.read(func(data *Data) error {
+		a = data.Users
+		return nil
+	})
+	return
+}
+
+// AdminUserExists returns true if an admin user exists on the system.
+func (s *Store) AdminUserExists() (exists bool, err error) {
+	err = s.read(func(data *Data) error {
+		for i := range data.Users {
+			if data.Users[i].Admin {
+				exists = true
+				break
+			}
+		}
+		return nil
+	})
+	return
+}
+
+// ErrAuthenticate is returned when authentication fails.
+var ErrAuthenticate = errors.New("authentication failed")
+
+// Authenticate retrieves a user with a matching username and password.
+func (s *Store) Authenticate(username, password string) (ui *UserInfo, err error) {
+	err = s.read(func(data *Data) error {
+		s.mu.Lock()
+		defer s.mu.Unlock()
+
+		// Find user.
+		u := data.User(username)
+		if u == nil {
+			return ErrUserNotFound
+		}
+
+		// Check the local auth cache first.
+		if au, ok := s.authCache[username]; ok {
+			// verify the password using the cached salt and hash
+			hashed, err := s.hashWithSalt(au.salt, password)
+			if err != nil {
+				return err
+			}
+
+			if bytes.Equal(hashed, au.hash) {
+				ui = u
+				return nil
+			} else {
+				return ErrAuthenticate
+			}
+		}
+
+		// Compare password with user hash.
+		if err := bcrypt.CompareHashAndPassword([]byte(u.Hash), []byte(password)); err != nil {
+			return ErrAuthenticate
+		}
+
+		// generate a salt and hash of the password for the cache
+		salt, hashed, err := s.saltedHash(password)
+		if err != nil {
+			return err
+		}
+		s.authCache[username] = authUser{salt: salt, hash: hashed}
+
+		ui = u
+		return nil
+	})
+	return
+}
+
+// hashWithSalt returns a salted hash of password using salt
+func (s *Store) hashWithSalt(salt []byte, password string) ([]byte, error) {
+	hasher := sha256.New()
+	hasher.Write(append(salt, []byte(password)...))
+	return hasher.Sum(nil), nil
+}
+
+// saltedHash returns a salt and salted hash of password
+func (s *Store) saltedHash(password string) (salt, hash []byte, err error) {
+	salt = make([]byte, SaltBytes)
+	_, err = io.ReadFull(crand.Reader, salt)
+	if err != nil {
+		return
+	}
+
+	hash, err = s.hashWithSalt(salt, password)
+	return
+}
+
+// CreateUser creates a new user in the store.
+func (s *Store) CreateUser(name, password string, admin bool) (*UserInfo, error) {
+	// Hash the password before serializing it.
+	hash, err := s.hashPassword(password)
+	if err != nil {
+		return nil, err
+	}
+
+	// Serialize command and send it to the leader.
+	if err := s.exec(internal.Command_CreateUserCommand, internal.E_CreateUserCommand_Command,
+		&internal.CreateUserCommand{
+			Name:  proto.String(name),
+			Hash:  proto.String(string(hash)),
+			Admin: proto.Bool(admin),
+		},
+	); err != nil {
+		return nil, err
+	}
+	return s.User(name)
+}
+
+// DropUser removes a user from the metastore by name.
+func (s *Store) DropUser(name string) error {
+	return s.exec(internal.Command_DropUserCommand, internal.E_DropUserCommand_Command,
+		&internal.DropUserCommand{
+			Name: proto.String(name),
+		},
+	)
+}
+
+// UpdateUser updates an existing user in the store.
+func (s *Store) UpdateUser(name, password string) error {
+	// Hash the password before serializing it.
+	hash, err := s.hashPassword(password)
+	if err != nil {
+		return err
+	}
+
+	// Serialize command and send it to the leader.
+	return s.exec(internal.Command_UpdateUserCommand, internal.E_UpdateUserCommand_Command,
+		&internal.UpdateUserCommand{
+			Name: proto.String(name),
+			Hash: proto.String(string(hash)),
+		},
+	)
+}
+
+// SetPrivilege sets a privilege for a user on a database.
+func (s *Store) SetPrivilege(username, database string, p influxql.Privilege) error {
+	return s.exec(internal.Command_SetPrivilegeCommand, internal.E_SetPrivilegeCommand_Command,
+		&internal.SetPrivilegeCommand{
+			Username:  proto.String(username),
+			Database:  proto.String(database),
+			Privilege: proto.Int32(int32(p)),
+		},
+	)
+}
+
+// SetAdminPrivilege sets the admin privilege for a user on a database.
+func (s *Store) SetAdminPrivilege(username string, admin bool) error {
+	return s.exec(internal.Command_SetAdminPrivilegeCommand, internal.E_SetAdminPrivilegeCommand_Command,
+		&internal.SetAdminPrivilegeCommand{
+			Username: proto.String(username),
+			Admin:    proto.Bool(admin),
+		},
+	)
+}
+
+// UserPrivileges returns a list of all databases.
+func (s *Store) UserPrivileges(username string) (p map[string]influxql.Privilege, err error) {
+	err = s.read(func(data *Data) error {
+		p, err = data.UserPrivileges(username)
+		return err
+	})
+	return
+}
+
+// UserPrivilege returns the privilege for a database.
+func (s *Store) UserPrivilege(username, database string) (p *influxql.Privilege, err error) {
+	err = s.read(func(data *Data) error {
+		p, err = data.UserPrivilege(username, database)
+		return err
+	})
+	return
+}
+
+// UserCount returns the number of users defined in the cluster.
+func (s *Store) UserCount() (count int, err error) {
+	err = s.read(func(data *Data) error {
+		count = len(data.Users)
+		return nil
+	})
+	return
+}
+
+// PrecreateShardGroups creates shard groups whose endtime is before the cutoff time passed in. This
+// avoid the need for these shards to be created when data for the corresponding time range arrives.
+// Shard creation involves Raft consensus, and precreation avoids taking the hit at write-time.
+func (s *Store) PrecreateShardGroups(cutoff time.Time) error {
+	s.read(func(data *Data) error {
+		for _, di := range data.Databases {
+			for _, rp := range di.RetentionPolicies {
+				for _, g := range rp.ShardGroups {
+					// Check to see if it is not deleted and going to end before our interval
+					if !g.Deleted() && g.EndTime.Before(cutoff) {
+						nextShardGroupTime := g.EndTime.Add(1 * time.Nanosecond)
+
+						// Check if successive shard group exists.
+						if sgi, err := s.ShardGroupByTimestamp(di.Name, rp.Name, nextShardGroupTime); err != nil {
+							s.Logger.Printf("failed to check if successive shard group for group exists %d: %s",
+								g.ID, err.Error())
+							continue
+						} else if sgi != nil && !sgi.Deleted() {
+							continue
+						}
+
+						// It doesn't. Create it.
+						if newGroup, err := s.CreateShardGroupIfNotExists(di.Name, rp.Name, nextShardGroupTime); err != nil {
+							s.Logger.Printf("failed to create successive shard group for group %d: %s",
+								g.ID, err.Error())
+						} else {
+							s.Logger.Printf("new shard group %d successfully created for database %s, retention policy %s",
+								newGroup.ID, di.Name, rp.Name)
+						}
+					}
+				}
+
+			}
+		}
+		return nil
+	})
+	return nil
+}
+
+// SetData force overwrites the root data.
+// This should only be used when restoring a snapshot.
+func (s *Store) SetData(data *Data) error {
+	return s.exec(internal.Command_SetDataCommand, internal.E_SetDataCommand_Command,
+		&internal.SetDataCommand{
+			Data: data.marshal(),
+		},
+	)
+}
+
+// read executes a function with the current metadata.
+// If an error is returned then the cache is invalidated and retried.
+//
+// The error returned by the retry is passed through to the original caller
+// unless the error is errInvalidate. A nil error is passed through when
+// errInvalidate is returned.
+func (s *Store) read(fn func(*Data) error) error {
+	// First use the cached metadata.
+	s.mu.RLock()
+	data := s.data
+	s.mu.RUnlock()
+
+	// Execute fn against cached data.
+	// Return immediately if there was no error.
+	if err := fn(data); err == nil {
+		return nil
+	}
+
+	// If an error occurred then invalidate cache and retry.
+	if err := s.invalidate(); err != nil {
+		return err
+	}
+
+	// Re-read the metadata.
+	s.mu.RLock()
+	data = s.data
+	s.mu.RUnlock()
+
+	// Passthrough error unless it is a cache invalidation.
+	if err := fn(data); err != nil && err != errInvalidate {
+		return err
+	}
+
+	return nil
+}
+
+// errInvalidate is returned to read() when the cache should be invalidated
+// but an error should not be passed through to the caller.
+var errInvalidate = errors.New("invalidate cache")
+
+func (s *Store) invalidate() error {
+	time.Sleep(1 * time.Second)
+	return nil // FIXME(benbjohnson): Reload cache from the leader.
+}
+
+func (s *Store) exec(typ internal.Command_Type, desc *proto.ExtensionDesc, value interface{}) error {
+	// Create command.
+	cmd := &internal.Command{Type: &typ}
+	err := proto.SetExtension(cmd, desc, value)
+	assert(err == nil, "proto.SetExtension: %s", err)
+
+	// Marshal to a byte slice.
+	b, err := proto.Marshal(cmd)
+	assert(err == nil, "proto.Marshal: %s", err)
+
+	// Apply the command if this is the leader.
+	// Otherwise remotely execute the command against the current leader.
+	if s.raft.State() == raft.Leader {
+		return s.apply(b)
+	} else {
+		return s.remoteExec(b)
+	}
+}
+
+// apply applies a serialized command to the raft log.
+func (s *Store) apply(b []byte) error {
+	// Apply to raft log.
+	f := s.raft.Apply(b, 0)
+	if err := f.Error(); err != nil {
+		return err
+	}
+
+	// Return response if it's an error.
+	// No other non-nil objects should be returned.
+	resp := f.Response()
+	if err, ok := resp.(error); ok {
+		return lookupError(err)
+	}
+	assert(resp == nil, "unexpected response: %#v", resp)
+
+	return nil
+}
+
+// remoteExec sends an encoded command to the remote leader.
+func (s *Store) remoteExec(b []byte) error {
+	// Retrieve the current known leader.
+	leader := s.raft.Leader()
+	if leader == "" {
+		return errors.New("no leader")
+	}
+
+	// Create a connection to the leader.
+	conn, err := net.DialTimeout("tcp", leader, 10*time.Second)
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+
+	// Write a marker byte for exec messages.
+	_, err = conn.Write([]byte{MuxExecHeader})
+	if err != nil {
+		return err
+	}
+
+	// Write a marker message.
+	_, err = conn.Write([]byte(ExecMagic))
+	if err != nil {
+		return err
+	}
+
+	// Write command size & bytes.
+	if err := binary.Write(conn, binary.BigEndian, uint64(len(b))); err != nil {
+		return fmt.Errorf("write command size: %s", err)
+	} else if _, err := conn.Write(b); err != nil {
+		return fmt.Errorf("write command: %s", err)
+	}
+
+	// Read response bytes.
+	var sz uint64
+	if err := binary.Read(conn, binary.BigEndian, &sz); err != nil {
+		return fmt.Errorf("read response size: %s", err)
+	}
+	buf := make([]byte, sz)
+	if _, err := io.ReadFull(conn, buf); err != nil {
+		return fmt.Errorf("read response: %s", err)
+	}
+
+	// Unmarshal response.
+	var resp internal.Response
+	if err := proto.Unmarshal(buf, &resp); err != nil {
+		return fmt.Errorf("unmarshal response: %s", err)
+	} else if !resp.GetOK() {
+		return fmt.Errorf("exec failed: %s", resp.GetError())
+	}
+
+	// Wait for local FSM to sync to index.
+	if err := s.sync(resp.GetIndex(), 5*time.Second); err != nil {
+		return fmt.Errorf("sync: %s", err)
+	}
+
+	return nil
+}
+
+// sync polls the state machine until it reaches a given index.
+func (s *Store) sync(index uint64, timeout time.Duration) error {
+	ticker := time.NewTicker(100 * time.Millisecond)
+	defer ticker.Stop()
+
+	timer := time.NewTimer(timeout)
+	defer timer.Stop()
+
+	for {
+		// Wait for next tick or timeout.
+		select {
+		case <-ticker.C:
+		case <-timer.C:
+			return errors.New("timeout")
+		}
+
+		// Compare index against current metadata.
+		s.mu.Lock()
+		ok := (s.data.Index >= index)
+		s.mu.Unlock()
+
+		// Exit if we are at least at the given index.
+		if ok {
+			return nil
+		}
+	}
+}
+
+// BcryptCost is the cost associated with generating password with Bcrypt.
+// This setting is lowered during testing to improve test suite performance.
+var BcryptCost = 10
+
+// HashPasswordFn represnets a password hashing function.
+type HashPasswordFn func(password string) ([]byte, error)
+
+// GetHashPasswordFn returns the current password hashing function.
+func (s *Store) GetHashPasswordFn() HashPasswordFn {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	return s.hashPassword
+}
+
+// SetHashPasswordFn sets the password hashing function.
+func (s *Store) SetHashPasswordFn(fn HashPasswordFn) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	s.hashPassword = fn
+}
+
+// storeFSM represents the finite state machine used by Store to interact with Raft.
+type storeFSM Store
+
+func (fsm *storeFSM) Apply(l *raft.Log) interface{} {
+	var cmd internal.Command
+	if err := proto.Unmarshal(l.Data, &cmd); err != nil {
+		panic(fmt.Errorf("cannot marshal command: %x", l.Data))
+	}
+
+	// Lock the store.
+	s := (*Store)(fsm)
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	err := func() interface{} {
+		switch cmd.GetType() {
+		case internal.Command_CreateNodeCommand:
+			return fsm.applyCreateNodeCommand(&cmd)
+		case internal.Command_DeleteNodeCommand:
+			return fsm.applyDeleteNodeCommand(&cmd)
+		case internal.Command_CreateDatabaseCommand:
+			return fsm.applyCreateDatabaseCommand(&cmd)
+		case internal.Command_DropDatabaseCommand:
+			return fsm.applyDropDatabaseCommand(&cmd)
+		case internal.Command_CreateRetentionPolicyCommand:
+			return fsm.applyCreateRetentionPolicyCommand(&cmd)
+		case internal.Command_DropRetentionPolicyCommand:
+			return fsm.applyDropRetentionPolicyCommand(&cmd)
+		case internal.Command_SetDefaultRetentionPolicyCommand:
+			return fsm.applySetDefaultRetentionPolicyCommand(&cmd)
+		case internal.Command_UpdateRetentionPolicyCommand:
+			return fsm.applyUpdateRetentionPolicyCommand(&cmd)
+		case internal.Command_CreateShardGroupCommand:
+			return fsm.applyCreateShardGroupCommand(&cmd)
+		case internal.Command_DeleteShardGroupCommand:
+			return fsm.applyDeleteShardGroupCommand(&cmd)
+		case internal.Command_CreateContinuousQueryCommand:
+			return fsm.applyCreateContinuousQueryCommand(&cmd)
+		case internal.Command_DropContinuousQueryCommand:
+			return fsm.applyDropContinuousQueryCommand(&cmd)
+		case internal.Command_CreateUserCommand:
+			return fsm.applyCreateUserCommand(&cmd)
+		case internal.Command_DropUserCommand:
+			return fsm.applyDropUserCommand(&cmd)
+		case internal.Command_UpdateUserCommand:
+			return fsm.applyUpdateUserCommand(&cmd)
+		case internal.Command_SetPrivilegeCommand:
+			return fsm.applySetPrivilegeCommand(&cmd)
+		case internal.Command_SetAdminPrivilegeCommand:
+			return fsm.applySetAdminPrivilegeCommand(&cmd)
+		case internal.Command_SetDataCommand:
+			return fsm.applySetDataCommand(&cmd)
+		default:
+			panic(fmt.Errorf("cannot apply command: %x", l.Data))
+		}
+	}()
+
+	// Copy term and index to new metadata.
+	fsm.data.Term = l.Term
+	fsm.data.Index = l.Index
+
+	return err
+}
+
+func (fsm *storeFSM) applyCreateNodeCommand(cmd *internal.Command) interface{} {
+	ext, _ := proto.GetExtension(cmd, internal.E_CreateNodeCommand_Command)
+	v := ext.(*internal.CreateNodeCommand)
+
+	// Copy data and update.
+	other := fsm.data.Clone()
+	if err := other.CreateNode(v.GetHost()); err != nil {
+		return err
+	}
+
+	// If the cluster ID hasn't been set then use the command's random number.
+	if other.ClusterID == 0 {
+		other.ClusterID = uint64(v.GetRand())
+	}
+
+	fsm.data = other
+	return nil
+}
+
+func (fsm *storeFSM) applyDeleteNodeCommand(cmd *internal.Command) interface{} {
+	ext, _ := proto.GetExtension(cmd, internal.E_DeleteNodeCommand_Command)
+	v := ext.(*internal.DeleteNodeCommand)
+
+	// Copy data and update.
+	other := fsm.data.Clone()
+	if err := other.DeleteNode(v.GetID()); err != nil {
+		return err
+	}
+	fsm.data = other
+
+	return nil
+}
+
+func (fsm *storeFSM) applyCreateDatabaseCommand(cmd *internal.Command) interface{} {
+	ext, _ := proto.GetExtension(cmd, internal.E_CreateDatabaseCommand_Command)
+	v := ext.(*internal.CreateDatabaseCommand)
+
+	// Copy data and update.
+	other := fsm.data.Clone()
+	if err := other.CreateDatabase(v.GetName()); err != nil {
+		return err
+	}
+	fsm.data = other
+
+	return nil
+}
+
+func (fsm *storeFSM) applyDropDatabaseCommand(cmd *internal.Command) interface{} {
+	ext, _ := proto.GetExtension(cmd, internal.E_DropDatabaseCommand_Command)
+	v := ext.(*internal.DropDatabaseCommand)
+
+	// Copy data and update.
+	other := fsm.data.Clone()
+	if err := other.DropDatabase(v.GetName()); err != nil {
+		return err
+	}
+	fsm.data = other
+
+	return nil
+}
+
+func (fsm *storeFSM) applyCreateRetentionPolicyCommand(cmd *internal.Command) interface{} {
+	ext, _ := proto.GetExtension(cmd, internal.E_CreateRetentionPolicyCommand_Command)
+	v := ext.(*internal.CreateRetentionPolicyCommand)
+	pb := v.GetRetentionPolicy()
+
+	// Copy data and update.
+	other := fsm.data.Clone()
+	if err := other.CreateRetentionPolicy(v.GetDatabase(),
+		&RetentionPolicyInfo{
+			Name:               pb.GetName(),
+			ReplicaN:           int(pb.GetReplicaN()),
+			Duration:           time.Duration(pb.GetDuration()),
+			ShardGroupDuration: time.Duration(pb.GetShardGroupDuration()),
+		}); err != nil {
+		return err
+	}
+	fsm.data = other
+
+	return nil
+}
+
+func (fsm *storeFSM) applyDropRetentionPolicyCommand(cmd *internal.Command) interface{} {
+	ext, _ := proto.GetExtension(cmd, internal.E_DropRetentionPolicyCommand_Command)
+	v := ext.(*internal.DropRetentionPolicyCommand)
+
+	// Copy data and update.
+	other := fsm.data.Clone()
+	if err := other.DropRetentionPolicy(v.GetDatabase(), v.GetName()); err != nil {
+		return err
+	}
+	fsm.data = other
+
+	return nil
+}
+
+func (fsm *storeFSM) applySetDefaultRetentionPolicyCommand(cmd *internal.Command) interface{} {
+	ext, _ := proto.GetExtension(cmd, internal.E_SetDefaultRetentionPolicyCommand_Command)
+	v := ext.(*internal.SetDefaultRetentionPolicyCommand)
+
+	// Copy data and update.
+	other := fsm.data.Clone()
+	if err := other.SetDefaultRetentionPolicy(v.GetDatabase(), v.GetName()); err != nil {
+		return err
+	}
+	fsm.data = other
+
+	return nil
+}
+
+func (fsm *storeFSM) applyUpdateRetentionPolicyCommand(cmd *internal.Command) interface{} {
+	ext, _ := proto.GetExtension(cmd, internal.E_UpdateRetentionPolicyCommand_Command)
+	v := ext.(*internal.UpdateRetentionPolicyCommand)
+
+	// Create update object.
+	rpu := RetentionPolicyUpdate{Name: v.NewName}
+	if v.Duration != nil {
+		value := time.Duration(v.GetDuration())
+		rpu.Duration = &value
+	}
+	if v.ReplicaN != nil {
+		value := int(v.GetReplicaN())
+		rpu.ReplicaN = &value
+	}
+
+	// Copy data and update.
+	other := fsm.data.Clone()
+	if err := other.UpdateRetentionPolicy(v.GetDatabase(), v.GetName(), &rpu); err != nil {
+		return err
+	}
+	fsm.data = other
+
+	return nil
+}
+
+func (fsm *storeFSM) applyCreateShardGroupCommand(cmd *internal.Command) interface{} {
+	ext, _ := proto.GetExtension(cmd, internal.E_CreateShardGroupCommand_Command)
+	v := ext.(*internal.CreateShardGroupCommand)
+
+	// Copy data and update.
+	other := fsm.data.Clone()
+	if err := other.CreateShardGroup(v.GetDatabase(), v.GetPolicy(), time.Unix(0, v.GetTimestamp())); err != nil {
+		return err
+	}
+	fsm.data = other
+
+	return nil
+}
+
+func (fsm *storeFSM) applyDeleteShardGroupCommand(cmd *internal.Command) interface{} {
+	ext, _ := proto.GetExtension(cmd, internal.E_DeleteShardGroupCommand_Command)
+	v := ext.(*internal.DeleteShardGroupCommand)
+
+	// Copy data and update.
+	other := fsm.data.Clone()
+	if err := other.DeleteShardGroup(v.GetDatabase(), v.GetPolicy(), v.GetShardGroupID()); err != nil {
+		return err
+	}
+	fsm.data = other
+
+	return nil
+}
+
+func (fsm *storeFSM) applyCreateContinuousQueryCommand(cmd *internal.Command) interface{} {
+	ext, _ := proto.GetExtension(cmd, internal.E_CreateContinuousQueryCommand_Command)
+	v := ext.(*internal.CreateContinuousQueryCommand)
+
+	// Copy data and update.
+	other := fsm.data.Clone()
+	if err := other.CreateContinuousQuery(v.GetDatabase(), v.GetName(), v.GetQuery()); err != nil {
+		return err
+	}
+	fsm.data = other
+
+	return nil
+}
+
+func (fsm *storeFSM) applyDropContinuousQueryCommand(cmd *internal.Command) interface{} {
+	ext, _ := proto.GetExtension(cmd, internal.E_DropContinuousQueryCommand_Command)
+	v := ext.(*internal.DropContinuousQueryCommand)
+
+	// Copy data and update.
+	other := fsm.data.Clone()
+	if err := other.DropContinuousQuery(v.GetDatabase(), v.GetName()); err != nil {
+		return err
+	}
+	fsm.data = other
+
+	return nil
+}
+
+func (fsm *storeFSM) applyCreateUserCommand(cmd *internal.Command) interface{} {
+	ext, _ := proto.GetExtension(cmd, internal.E_CreateUserCommand_Command)
+	v := ext.(*internal.CreateUserCommand)
+
+	// Copy data and update.
+	other := fsm.data.Clone()
+	if err := other.CreateUser(v.GetName(), v.GetHash(), v.GetAdmin()); err != nil {
+		return err
+	}
+	fsm.data = other
+
+	return nil
+}
+
+func (fsm *storeFSM) applyDropUserCommand(cmd *internal.Command) interface{} {
+	ext, _ := proto.GetExtension(cmd, internal.E_DropUserCommand_Command)
+	v := ext.(*internal.DropUserCommand)
+
+	// Copy data and update.
+	other := fsm.data.Clone()
+	if err := other.DropUser(v.GetName()); err != nil {
+		return err
+	}
+	fsm.data = other
+	delete(fsm.authCache, v.GetName())
+	return nil
+}
+
+func (fsm *storeFSM) applyUpdateUserCommand(cmd *internal.Command) interface{} {
+	ext, _ := proto.GetExtension(cmd, internal.E_UpdateUserCommand_Command)
+	v := ext.(*internal.UpdateUserCommand)
+
+	// Copy data and update.
+	other := fsm.data.Clone()
+	if err := other.UpdateUser(v.GetName(), v.GetHash()); err != nil {
+		return err
+	}
+	fsm.data = other
+	delete(fsm.authCache, v.GetName())
+	return nil
+}
+
+func (fsm *storeFSM) applySetPrivilegeCommand(cmd *internal.Command) interface{} {
+	ext, _ := proto.GetExtension(cmd, internal.E_SetPrivilegeCommand_Command)
+	v := ext.(*internal.SetPrivilegeCommand)
+
+	// Copy data and update.
+	other := fsm.data.Clone()
+	if err := other.SetPrivilege(v.GetUsername(), v.GetDatabase(), influxql.Privilege(v.GetPrivilege())); err != nil {
+		return err
+	}
+	fsm.data = other
+	return nil
+}
+
+func (fsm *storeFSM) applySetAdminPrivilegeCommand(cmd *internal.Command) interface{} {
+	ext, _ := proto.GetExtension(cmd, internal.E_SetAdminPrivilegeCommand_Command)
+	v := ext.(*internal.SetAdminPrivilegeCommand)
+
+	// Copy data and update.
+	other := fsm.data.Clone()
+	if err := other.SetAdminPrivilege(v.GetUsername(), v.GetAdmin()); err != nil {
+		return err
+	}
+	fsm.data = other
+	return nil
+}
+
+func (fsm *storeFSM) applySetDataCommand(cmd *internal.Command) interface{} {
+	ext, _ := proto.GetExtension(cmd, internal.E_SetDataCommand_Command)
+	v := ext.(*internal.SetDataCommand)
+
+	// Overwrite data.
+	fsm.data = &Data{}
+	fsm.data.unmarshal(v.GetData())
+
+	return nil
+}
+
+func (fsm *storeFSM) Snapshot() (raft.FSMSnapshot, error) {
+	s := (*Store)(fsm)
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	return &storeFSMSnapshot{Data: (*Store)(fsm).data}, nil
+}
+
+func (fsm *storeFSM) Restore(r io.ReadCloser) error {
+	// Read all bytes.
+	b, err := ioutil.ReadAll(r)
+	if err != nil {
+		return err
+	}
+
+	// Decode metadata.
+	data := &Data{}
+	if err := data.UnmarshalBinary(b); err != nil {
+		return err
+	}
+
+	// Set metadata on store.
+	// NOTE: No lock because Hashicorp Raft doesn't call Restore concurrently
+	// with any other function.
+	fsm.data = data
+
+	return nil
+}
+
+type storeFSMSnapshot struct {
+	Data *Data
+}
+
+func (s *storeFSMSnapshot) Persist(sink raft.SnapshotSink) error {
+	err := func() error {
+		// Encode data.
+		p, err := s.Data.MarshalBinary()
+		if err != nil {
+			return err
+		}
+
+		// Write data to sink.
+		if _, err := sink.Write(p); err != nil {
+			return err
+		}
+
+		// Close the sink.
+		if err := sink.Close(); err != nil {
+			return err
+		}
+
+		return nil
+	}()
+
+	if err != nil {
+		sink.Cancel()
+		return err
+	}
+
+	return nil
+}
+
+// Release is invoked when we are finished with the snapshot
+func (s *storeFSMSnapshot) Release() {}
+
+// raftLayer wraps the connection so it can be re-used for forwarding.
+type raftLayer struct {
+	ln     net.Listener
+	addr   net.Addr
+	conn   chan net.Conn
+	closed chan struct{}
+}
+
+// newRaftLayer returns a new instance of raftLayer.
+func newRaftLayer(ln net.Listener, addr net.Addr) *raftLayer {
+	return &raftLayer{
+		ln:     ln,
+		addr:   addr,
+		conn:   make(chan net.Conn),
+		closed: make(chan struct{}),
+	}
+}
+
+// Addr returns the local address for the layer.
+func (l *raftLayer) Addr() net.Addr { return l.addr }
+
+// Dial creates a new network connection.
+func (l *raftLayer) Dial(addr string, timeout time.Duration) (net.Conn, error) {
+	conn, err := net.DialTimeout("tcp", addr, timeout)
+	if err != nil {
+		return nil, err
+	}
+
+	// Write a marker byte for raft messages.
+	_, err = conn.Write([]byte{MuxRaftHeader})
+	if err != nil {
+		conn.Close()
+		return nil, err
+	}
+	return conn, err
+}
+
+// Accept waits for the next connection.
+func (l *raftLayer) Accept() (net.Conn, error) { return l.ln.Accept() }
+
+// Close closes the layer.
+func (l *raftLayer) Close() error { return l.ln.Close() }
+
+// RetentionPolicyUpdate represents retention policy fields to be updated.
+type RetentionPolicyUpdate struct {
+	Name     *string
+	Duration *time.Duration
+	ReplicaN *int
+}
+
+func (rpu *RetentionPolicyUpdate) SetName(v string)            { rpu.Name = &v }
+func (rpu *RetentionPolicyUpdate) SetDuration(v time.Duration) { rpu.Duration = &v }
+func (rpu *RetentionPolicyUpdate) SetReplicaN(v int)           { rpu.ReplicaN = &v }
+
+// assert will panic with a given formatted message if the given condition is false.
+func assert(condition bool, msg string, v ...interface{}) {
+	if !condition {
+		panic(fmt.Sprintf("assert failed: "+msg, v...))
+	}
+}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/snapshot/snapshot.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/snapshot/snapshot.go
new file mode 100644
index 00000000000..b08e62fee93
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/snapshot/snapshot.go
@@ -0,0 +1,529 @@
+package snapshot
+
+import (
+	"archive/tar"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"sort"
+	"time"
+)
+
+// manifestName is the name of the manifest file in the snapshot.
+const manifestName = "manifest"
+
+// Manifest represents a list of files in a snapshot.
+type Manifest struct {
+	Files []File `json:"files"`
+}
+
+// Diff returns a Manifest of files that are newer in m than other.
+func (m *Manifest) Diff(other *Manifest) *Manifest {
+	diff := &Manifest{}
+
+	// Find versions of files that are newer in m.
+loop:
+	for _, a := range m.Files {
+		// Try to find a newer version of the file in other.
+		// If found then don't append this file and move to the next file.
+		for _, b := range other.Files {
+			if a.Name != b.Name {
+				continue
+			} else if !a.ModTime.After(b.ModTime) {
+				continue loop
+			} else {
+				break
+			}
+		}
+
+		// Append the newest version.
+		diff.Files = append(diff.Files, a)
+	}
+
+	// Sort files.
+	sort.Sort(Files(diff.Files))
+
+	return diff
+}
+
+// Merge returns a Manifest that combines m with other.
+// Only the newest file between the two snapshots is returned.
+func (m *Manifest) Merge(other *Manifest) *Manifest {
+	ret := &Manifest{}
+	ret.Files = make([]File, len(m.Files))
+	copy(ret.Files, m.Files)
+
+	// Update/insert versions of files that are newer in other.
+loop:
+	for _, a := range other.Files {
+		for i, b := range ret.Files {
+			// Ignore if it doesn't match.
+			if a.Name != b.Name {
+				continue
+			}
+
+			// Update if it's newer and then start the next file.
+			if a.ModTime.After(b.ModTime) {
+				ret.Files[i] = a
+			}
+			continue loop
+		}
+
+		// If the file wasn't found then append it.
+		ret.Files = append(ret.Files, a)
+	}
+
+	// Sort files.
+	sort.Sort(Files(ret.Files))
+
+	return ret
+}
+
+// File represents a single file in a manifest.
+type File struct {
+	Name    string    `json:"name"`         // filename
+	Size    int64     `json:"size"`         // file size
+	ModTime time.Time `json:"lastModified"` // last modified time
+}
+
+// Files represents a sortable list of files.
+type Files []File
+
+func (p Files) Len() int           { return len(p) }
+func (p Files) Less(i, j int) bool { return p[i].Name < p[j].Name }
+func (p Files) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
+
+// Reader reads a snapshot from a Reader.
+// This type is not safe for concurrent use.
+type Reader struct {
+	tr       *tar.Reader
+	manifest *Manifest
+}
+
+// NewReader returns a new Reader reading from r.
+func NewReader(r io.Reader) *Reader {
+	return &Reader{
+		tr: tar.NewReader(r),
+	}
+}
+
+// Manifest returns the snapshot manifest.
+func (sr *Reader) Manifest() (*Manifest, error) {
+	if err := sr.readManifest(); err != nil {
+		return nil, err
+	}
+	return sr.manifest, nil
+}
+
+// readManifest reads the first entry from the snapshot and materializes the snapshot.
+// This is skipped if the snapshot manifest has already been read.
+func (sr *Reader) readManifest() error {
+	// Already read, ignore.
+	if sr.manifest != nil {
+		return nil
+	}
+
+	// Read manifest header.
+	hdr, err := sr.tr.Next()
+	if err != nil {
+		return fmt.Errorf("snapshot header: %s", err)
+	} else if hdr.Name != manifestName {
+		return fmt.Errorf("invalid snapshot header: expected manifest")
+	}
+
+	// Materialize manifest.
+	var manifest Manifest
+	if err := json.NewDecoder(sr.tr).Decode(&manifest); err != nil {
+		return fmt.Errorf("decode manifest: %s", err)
+	}
+	sr.manifest = &manifest
+
+	return nil
+}
+
+// Next returns the next file in the snapshot.
+func (sr *Reader) Next() (File, error) {
+	// Read manifest if it hasn't been read yet.
+	if err := sr.readManifest(); err != nil {
+		return File{}, err
+	}
+
+	// Read next header.
+	hdr, err := sr.tr.Next()
+	if err != nil {
+		return File{}, err
+	}
+
+	// Match header to file in snapshot.
+	for i := range sr.manifest.Files {
+		if sr.manifest.Files[i].Name == hdr.Name {
+			return sr.manifest.Files[i], nil
+		}
+	}
+
+	// Return error if file is not in the manifest.
+	return File{}, fmt.Errorf("snapshot entry not found in manifest: %s", hdr.Name)
+}
+
+// Read reads the current entry in the snapshot.
+func (sr *Reader) Read(b []byte) (n int, err error) {
+	// Read manifest if it hasn't been read yet.
+	if err := sr.readManifest(); err != nil {
+		return 0, err
+	}
+
+	// Pass read through to the tar reader.
+	return sr.tr.Read(b)
+}
+
+// MultiReader reads from a collection of snapshots.
+// Only files with the highest index are read from the reader.
+// This type is not safe for concurrent use.
+type MultiReader struct {
+	readers []*Reader // underlying snapshot readers
+	files   []*File   // current file for each reader
+
+	manifest *Manifest // combined manifest from all readers
+	index    int       // index of file in snapshot to read
+	curr     *Reader   // current reader
+}
+
+// NewMultiReader returns a new MultiReader reading from a list of readers.
+func NewMultiReader(readers ...io.Reader) *MultiReader {
+	r := &MultiReader{
+		readers: make([]*Reader, len(readers)),
+		files:   make([]*File, len(readers)),
+		index:   -1,
+	}
+	for i := range readers {
+		r.readers[i] = NewReader(readers[i])
+	}
+	return r
+}
+
+// Manifest returns the combined manifest from all readers.
+func (ssr *MultiReader) Manifest() (*Manifest, error) {
+	// Use manifest if it's already been calculated.
+	if ssr.manifest != nil {
+		return ssr.manifest, nil
+	}
+
+	// Build manifest from other readers.
+	ss := &Manifest{}
+	for i, sr := range ssr.readers {
+		other, err := sr.Manifest()
+		if err != nil {
+			return nil, fmt.Errorf("manifest: idx=%d, err=%s", i, err)
+		}
+		ss = ss.Merge(other)
+	}
+
+	// Cache manifest and return.
+	ssr.manifest = ss
+	return ss, nil
+}
+
+// Next returns the next file in the reader.
+func (ssr *MultiReader) Next() (File, error) {
+	ss, err := ssr.Manifest()
+	if err != nil {
+		return File{}, fmt.Errorf("manifest: %s", err)
+	}
+
+	// Return EOF if there are no more files in snapshot.
+	if ssr.index == len(ss.Files)-1 {
+		ssr.curr = nil
+		return File{}, io.EOF
+	}
+
+	// Queue up next files.
+	if err := ssr.nextFiles(); err != nil {
+		return File{}, fmt.Errorf("next files: %s", err)
+	}
+
+	// Increment the file index.
+	ssr.index++
+	sf := ss.Files[ssr.index]
+
+	// Find the matching reader. Clear other readers.
+	var sr *Reader
+	for i, f := range ssr.files {
+		if f == nil || f.Name != sf.Name {
+			continue
+		}
+
+		// Set reader to the first match.
+		if sr == nil && *f == sf {
+			sr = ssr.readers[i]
+		}
+		ssr.files[i] = nil
+	}
+
+	// Return an error if file doesn't match.
+	// This shouldn't happen unless the underlying snapshot is altered.
+	if sr == nil {
+		return File{}, fmt.Errorf("snaphot file not found in readers: %s", sf.Name)
+	}
+
+	// Set current reader.
+	ssr.curr = sr
+
+	// Return file.
+	return sf, nil
+}
+
+// nextFiles queues up a next file for all readers.
+func (ssr *MultiReader) nextFiles() error {
+	for i, sr := range ssr.readers {
+		if ssr.files[i] == nil {
+			// Read next file.
+			sf, err := sr.Next()
+			if err == io.EOF {
+				ssr.files[i] = nil
+				continue
+			} else if err != nil {
+				return fmt.Errorf("next: reader=%d, err=%s", i, err)
+			}
+
+			// Cache file.
+			ssr.files[i] = &sf
+		}
+	}
+
+	return nil
+}
+
+// nextIndex returns the index of the next reader to read from.
+// Returns -1 if all readers are at EOF.
+func (ssr *MultiReader) nextIndex() int {
+	// Find the next file by name and lowest index.
+	index := -1
+	for i, f := range ssr.files {
+		if f == nil {
+			continue
+		} else if index == -1 {
+			index = i
+		} else if f.Name < ssr.files[index].Name {
+			index = i
+		} else if f.Name == ssr.files[index].Name && f.ModTime.After(ssr.files[index].ModTime) {
+			index = i
+		}
+	}
+	return index
+}
+
+// Read reads the current entry in the reader.
+func (ssr *MultiReader) Read(b []byte) (n int, err error) {
+	if ssr.curr == nil {
+		return 0, io.EOF
+	}
+	return ssr.curr.Read(b)
+}
+
+// OpenFileMultiReader returns a MultiReader based on the path of the base snapshot.
+// Returns the underlying files which need to be closed separately.
+func OpenFileMultiReader(path string) (*MultiReader, []io.Closer, error) {
+	var readers []io.Reader
+	var closers []io.Closer
+	if err := func() error {
+		// Open original snapshot file.
+		f, err := os.Open(path)
+		if os.IsNotExist(err) {
+			return err
+		} else if err != nil {
+			return fmt.Errorf("open snapshot: %s", err)
+		}
+		readers = append(readers, f)
+		closers = append(closers, f)
+
+		// Open all incremental snapshots.
+		for i := 0; ; i++ {
+			filename := path + fmt.Sprintf(".%d", i)
+			f, err := os.Open(filename)
+			if os.IsNotExist(err) {
+				break
+			} else if err != nil {
+				return fmt.Errorf("open incremental snapshot: file=%s, err=%s", filename, err)
+			}
+			readers = append(readers, f)
+			closers = append(closers, f)
+		}
+
+		return nil
+	}(); err != nil {
+		closeAll(closers)
+		return nil, nil, err
+	}
+
+	return NewMultiReader(readers...), nil, nil
+}
+
+// ReadFileManifest returns a Manifest for a given base snapshot path.
+// This merges all incremental backup manifests as well.
+func ReadFileManifest(path string) (*Manifest, error) {
+	// Open a multi-snapshot reader.
+	ssr, files, err := OpenFileMultiReader(path)
+	if os.IsNotExist(err) {
+		return nil, err
+	} else if err != nil {
+		return nil, fmt.Errorf("open file multi reader: %s", err)
+	}
+	defer closeAll(files)
+
+	// Read manifest.
+	ss, err := ssr.Manifest()
+	if err != nil {
+		return nil, fmt.Errorf("manifest: %s", err)
+	}
+
+	return ss, nil
+}
+
+func closeAll(a []io.Closer) {
+	for _, c := range a {
+		_ = c.Close()
+	}
+}
+
+// Writer writes a snapshot and the underlying files to disk as a tar archive.
+type Writer struct {
+	// The manifest to write from.
+	// Removing files from the manifest after creation will cause those files to be ignored.
+	Manifest *Manifest
+
+	// Writers for each file by filename.
+	// Writers will be closed as they're processed and will close by the end of WriteTo().
+	FileWriters map[string]FileWriter
+}
+
+// NewWriter returns a new instance of Writer.
+func NewWriter() *Writer {
+	return &Writer{
+		Manifest:    &Manifest{},
+		FileWriters: make(map[string]FileWriter),
+	}
+}
+
+// Close closes all file writers on the snapshot.
+func (sw *Writer) Close() error {
+	for _, fw := range sw.FileWriters {
+		_ = fw.Close()
+	}
+	return nil
+}
+
+// closeUnusedWriters closes all file writers not on the manifest.
+// This allows transactions on these files to be short lived.
+func (sw *Writer) closeUnusedWriters() {
+loop:
+	for name, fw := range sw.FileWriters {
+		// Find writer in manifest.
+		for _, f := range sw.Manifest.Files {
+			if f.Name == name {
+				continue loop
+			}
+		}
+
+		// If not found then close it.
+		_ = fw.Close()
+	}
+}
+
+// WriteTo writes the snapshot to the writer.
+// File writers are closed as they are written.
+// This function will always return n == 0.
+func (sw *Writer) WriteTo(w io.Writer) (n int64, err error) {
+	// Close any file writers that aren't required.
+	sw.closeUnusedWriters()
+
+	// Sort manifest files.
+	// This is required for combining multiple snapshots together.
+	sort.Sort(Files(sw.Manifest.Files))
+
+	// Begin writing a tar file to the output.
+	tw := tar.NewWriter(w)
+	defer tw.Close()
+
+	// Write manifest file.
+	if err := sw.writeManifestTo(tw); err != nil {
+		return 0, fmt.Errorf("write manifest: %s", err)
+	}
+
+	// Write each backup file.
+	for _, f := range sw.Manifest.Files {
+		if err := sw.writeFileTo(tw, &f); err != nil {
+			return 0, fmt.Errorf("write file: %s", err)
+		}
+	}
+
+	// Close tar writer and check error.
+	if err := tw.Close(); err != nil {
+		return 0, fmt.Errorf("tar close: %s", err)
+	}
+
+	return 0, nil
+}
+
+// writeManifestTo writes a manifest to the archive.
+func (sw *Writer) writeManifestTo(tw *tar.Writer) error {
+	// Convert manifest to JSON.
+	b, err := json.Marshal(sw.Manifest)
+	if err != nil {
+		return fmt.Errorf("marshal json: %s", err)
+	}
+
+	// Write header & file.
+	if err := tw.WriteHeader(&tar.Header{
+		Name:    manifestName,
+		Size:    int64(len(b)),
+		Mode:    0666,
+		ModTime: time.Now(),
+	}); err != nil {
+		return fmt.Errorf("write header: %s", err)
+	}
+	if _, err := tw.Write(b); err != nil {
+		return fmt.Errorf("write: %s", err)
+	}
+
+	return nil
+}
+
+// writeFileTo writes a single file to the archive.
+func (sw *Writer) writeFileTo(tw *tar.Writer, f *File) error {
+	// Retrieve the file writer by filename.
+	fw := sw.FileWriters[f.Name]
+	if fw == nil {
+		return fmt.Errorf("file writer not found: name=%s", f.Name)
+	}
+
+	// Write file header.
+	if err := tw.WriteHeader(&tar.Header{
+		Name:    f.Name,
+		Size:    f.Size,
+		Mode:    0666,
+		ModTime: time.Now(),
+	}); err != nil {
+		return fmt.Errorf("write header: file=%s, err=%s", f.Name, err)
+	}
+
+	// Copy the database to the writer.
+	if nn, err := fw.WriteTo(tw); err != nil {
+		return fmt.Errorf("write: file=%s, err=%s", f.Name, err)
+	} else if nn != f.Size {
+		return fmt.Errorf("short write: file=%s", f.Name)
+	}
+
+	// Close the writer.
+	if err := fw.Close(); err != nil {
+		return fmt.Errorf("close: file=%s, err=%s", f.Name, err)
+	}
+
+	return nil
+}
+
+// FileWriter is the interface used for writing a file to a snapshot.
+type FileWriter interface {
+	io.WriterTo
+	io.Closer
+}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/toml/toml.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/toml/toml.go
new file mode 100644
index 00000000000..105c908c70a
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/toml/toml.go
@@ -0,0 +1,72 @@
+package toml
+
+import (
+	"fmt"
+	"strconv"
+	"time"
+)
+
+// maxInt is the largest integer representable by a word (architecture dependent).
+const maxInt = int64(^uint(0) >> 1)
+
+// Duration is a TOML wrapper type for time.Duration.
+type Duration time.Duration
+
+func (d Duration) String() string {
+	return time.Duration(d).String()
+}
+
+// UnmarshalText parses a TOML value into a duration value.
+func (d *Duration) UnmarshalText(text []byte) error {
+	// Ignore if there is no value set.
+	if len(text) == 0 {
+		return nil
+	}
+
+	// Otherwise parse as a duration formatted string.
+	duration, err := time.ParseDuration(string(text))
+	if err != nil {
+		return err
+	}
+
+	// Set duration and return.
+	*d = Duration(duration)
+	return nil
+}
+
+// MarshalText converts a duration to a string for decoding toml
+func (d Duration) MarshalText() (text []byte, err error) {
+	return []byte(d.String()), nil
+}
+
+// Size represents a TOML parseable file size.
+// Users can specify size using "m" for megabytes and "g" for gigabytes.
+type Size int
+
+// UnmarshalText parses a byte size from text.
+func (s *Size) UnmarshalText(text []byte) error {
+	// Parse numeric portion of value.
+	length := len(string(text))
+	size, err := strconv.ParseInt(string(text[:length-1]), 10, 64)
+	if err != nil {
+		return err
+	}
+
+	// Parse unit of measure ("m", "g", etc).
+	switch suffix := text[len(text)-1]; suffix {
+	case 'm':
+		size *= 1 << 20 // MB
+	case 'g':
+		size *= 1 << 30 // GB
+	default:
+		return fmt.Errorf("unknown size suffix: %c", suffix)
+	}
+
+	// Check for overflow.
+	if size > maxInt {
+		return fmt.Errorf("size %d cannot be represented by an int", size)
+	}
+
+	*s = Size(size)
+	return nil
+}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/README.md b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/README.md
new file mode 100644
index 00000000000..a2229ee3bc6
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/README.md
@@ -0,0 +1,85 @@
+# Line Protocol
+
+The line protocol is a text based format for writing points to InfluxDB.  Each line defines a single point. 
+Multiple lines must be separated by the newline character `\n`. The format of the line consists of three parts:
+
+```
+[key] [fields] [timestamp]
+```
+
+Each section is separated by spaces.  The minimum required point consists of a measurement name and at least one field. Points without a specified timestamp will be written using the server's local timestamp. Timestamps are assumed to be in nanoseconds unless a `precision` value is passed in the query string.
+
+## Key
+
+The key is the measurement name and any optional tags separated by commas.  Measurement names, tag keys, and tag values must escape any spaces or commas using a backslash (`\`). For example: `\ ` and `\,`.  All tag values are stored as strings and should not be surrounded in quotes. 
+
+Tags should be sorted by key before being sent for best performance. The sort should match that from the Go `bytes.Compare` function (http://golang.org/pkg/bytes/#Compare).
+
+### Examples
+
+```
+# measurement only
+cpu
+
+# measurment and tags
+cpu,host=serverA,region=us-west
+
+# measurment with commas
+cpu\,01,host=serverA,region=us-west
+
+# tag value with spaces
+cpu,host=server\ A,region=us\ west
+```
+
+## Fields
+
+Fields are key-value metrics associated with the measurement.  Every line must have at least one field.  Multiple fields must be separated with commas and not spaces.
+
+Field keys are always strings and follow the same syntactical rules as described above for tag keys and values. Field values can be one of four types.  The first value written for a given field on a given measurement defines the type of that field for all series under that measurement.
+
+* _integer_ - Numeric values that do not include a decimal.  (e.g. 1, 345, 2015, -10)
+* _float_ - Numeric values that include a decimal.  (e.g. 1.0, -3.14, 6.0+e5).  Note that all values _must_ have a decimal even if the decimal value is zero (1 is an _integer_, 1.0 is a _float_).
+* _boolean_ - A value indicating true or false.  Valid boolean strings are (t, T, true, TRUE, f, F, false, and FALSE).
+* _string_ - A text value.  All string values _must_ be surrounded in double-quotes `"`.  If the string contains
+a double-quote, it must be escaped with a backslash, e.g. `\"`.
+
+
+```
+# integer value
+cpu value=1
+
+# float value
+cpu_load value=1.2
+
+# boolean value
+error fatal=true
+
+# string value
+event msg="logged out"
+
+# multiple values
+cpu load=10.0,alert=true,reason="value above maximum threshold"
+```
+
+## Timestamp
+
+The timestamp section is optional but should be specified if possible.  The value is an integer representing nanoseconds since the epoch. If the timestamp is not provided the point will inherit the server's local timestamp.
+
+Some write APIs allow passing a lower precision.  If the API supports a lower precision, the timestamp may also be
+an integer epoch in microseconds, milliseconds, seconds, minutes or hours.
+
+## Full Example
+A full example is shown below.
+```
+cpu,host=server01,region=uswest value=1.0 1434055562000000000
+cpu,host=server02,region=uswest value=3.0 1434055562000010000
+```
+In this example the first line shows a `measurement` of "cpu", there are two tags "host" and "region, the `value` is 1.0, and the `timestamp` is 1434055562000000000. Following this is a second line, also a point in the `measurement` "cpu" but belonging to a different "host".
+```
+cpu,host=server\ 01,region=uswest value=1.0,msg="all systems nominal"
+cpu,host=server\ 01,region=us\,west value_int=1
+```
+In these examples, the "host" is set to `server 01`. The field value associated with field key `msg` is double-quoted, as it is a string. The second example shows a region of `us,west` with the comma properly escaped. In the first example `value` is written as a floating point number. In the second, `value_int` is an integer. 
+
+# Distributed Queries
+
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/batcher.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/batcher.go
new file mode 100644
index 00000000000..d1a4cf2b74d
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/batcher.go
@@ -0,0 +1,142 @@
+package tsdb
+
+import (
+	"sync"
+	"sync/atomic"
+	"time"
+)
+
+// PointBatcher accepts Points and will emit a batch of those points when either
+// a) the batch reaches a certain size, or b) a certain time passes.
+type PointBatcher struct {
+	size     int
+	duration time.Duration
+
+	stop  chan struct{}
+	in    chan Point
+	out   chan []Point
+	flush chan struct{}
+
+	stats PointBatcherStats
+
+	wg *sync.WaitGroup
+}
+
+// NewPointBatcher returns a new PointBatcher.
+func NewPointBatcher(sz int, d time.Duration) *PointBatcher {
+	return &PointBatcher{
+		size:     sz,
+		duration: d,
+		stop:     make(chan struct{}),
+		in:       make(chan Point),
+		out:      make(chan []Point),
+		flush:    make(chan struct{}),
+	}
+}
+
+// PointBatcherStats are the statistics each batcher tracks.
+type PointBatcherStats struct {
+	BatchTotal   uint64 // Total count of batches transmitted.
+	PointTotal   uint64 // Total count of points processed.
+	SizeTotal    uint64 // Number of batches that reached size threshold.
+	TimeoutTotal uint64 // Number of timeouts that occurred.
+}
+
+// Start starts the batching process. Returns the in and out channels for points
+// and point-batches respectively.
+func (b *PointBatcher) Start() {
+	// Already running?
+	if b.wg != nil {
+		return
+	}
+
+	var timer *time.Timer
+	var batch []Point
+	var timerCh <-chan time.Time
+
+	emit := func() {
+		b.out <- batch
+		atomic.AddUint64(&b.stats.BatchTotal, 1)
+		batch = nil
+	}
+
+	b.wg = &sync.WaitGroup{}
+	b.wg.Add(1)
+
+	go func() {
+		defer b.wg.Done()
+		for {
+			select {
+			case <-b.stop:
+				if len(batch) > 0 {
+					emit()
+					timerCh = nil
+				}
+				return
+			case p := <-b.in:
+				atomic.AddUint64(&b.stats.PointTotal, 1)
+				if batch == nil {
+					batch = make([]Point, 0, b.size)
+					if b.duration > 0 {
+						timer = time.NewTimer(b.duration)
+						timerCh = timer.C
+					}
+				}
+
+				batch = append(batch, p)
+				if len(batch) >= b.size { // 0 means send immediately.
+					atomic.AddUint64(&b.stats.SizeTotal, 1)
+					emit()
+					timerCh = nil
+				}
+
+			case <-b.flush:
+				if len(batch) > 0 {
+					emit()
+					timerCh = nil
+				}
+
+			case <-timerCh:
+				atomic.AddUint64(&b.stats.TimeoutTotal, 1)
+				emit()
+			}
+		}
+	}()
+}
+
+func (b *PointBatcher) Stop() {
+	// If not running, nothing to stop.
+	if b.wg == nil {
+		return
+	}
+
+	close(b.stop)
+	b.wg.Wait()
+}
+
+// In returns the channel to which points should be written.
+func (b *PointBatcher) In() chan<- Point {
+	return b.in
+}
+
+// Out returns the channel from which batches should be read.
+func (b *PointBatcher) Out() <-chan []Point {
+	return b.out
+}
+
+// Flush instructs the batcher to emit any pending points in a batch, regardless of batch size.
+// If there are no pending points, no batch is emitted.
+func (b *PointBatcher) Flush() {
+	b.flush <- struct{}{}
+}
+
+// Stats returns a PointBatcherStats object for the PointBatcher. While the each statistic should be
+// closely correlated with each other statistic, it is not guaranteed.
+func (b *PointBatcher) Stats() *PointBatcherStats {
+	stats := PointBatcherStats{}
+	stats.BatchTotal = atomic.LoadUint64(&b.stats.BatchTotal)
+	stats.PointTotal = atomic.LoadUint64(&b.stats.PointTotal)
+	stats.SizeTotal = atomic.LoadUint64(&b.stats.SizeTotal)
+	stats.TimeoutTotal = atomic.LoadUint64(&b.stats.TimeoutTotal)
+	return &stats
+}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/config.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/config.go
new file mode 100644
index 00000000000..a74caceec95
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/config.go
@@ -0,0 +1,34 @@
+package tsdb
+
+import (
+	"time"
+
+	"github.com/influxdb/influxdb/toml"
+)
+
+const (
+	// DefaultMaxWALSize is the default size of the WAL before it is flushed.
+	DefaultMaxWALSize = 100 * 1024 * 1024 // 100MB
+
+	// DefaultWALFlushInterval is the frequency the WAL will get flushed if
+	// it doesn't reach its size threshold.
+	DefaultWALFlushInterval = 10 * time.Minute
+
+	// DefaultWALPartitionFlushDelay is the sleep time between WAL partition flushes.
+	DefaultWALPartitionFlushDelay = 2 * time.Second
+)
+
+type Config struct {
+	Dir                    string        `toml:"dir"`
+	MaxWALSize             int           `toml:"max-wal-size"`
+	WALFlushInterval       toml.Duration `toml:"wal-flush-interval"`
+	WALPartitionFlushDelay toml.Duration `toml:"wal-partition-flush-delay"`
+}
+
+func NewConfig() Config {
+	return Config{
+		MaxWALSize:             DefaultMaxWALSize,
+		WALFlushInterval:       toml.Duration(DefaultWALFlushInterval),
+		WALPartitionFlushDelay: toml.Duration(DefaultWALPartitionFlushDelay),
+	}
+}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/doc.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/doc.go
new file mode 100644
index 00000000000..50c110ab24f
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/doc.go
@@ -0,0 +1,5 @@
+/*
+Package tsdb implements a durable time series database.
+
+*/
+package tsdb
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine.go
new file mode 100644
index 00000000000..74fed3298a8
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine.go
@@ -0,0 +1,961 @@
+package tsdb
+
+import (
+	"fmt"
+	"math"
+	"sort"
+	"time"
+
+	"github.com/influxdb/influxdb/influxql"
+)
+
+const (
+	// Return an error if the user is trying to select more than this number of points in a group by statement.
+	// Most likely they specified a group by interval without time boundaries.
+	MaxGroupByPoints = 100000
+
+	// Since time is always selected, the column count when selecting only a single other value will be 2
+	SelectColumnCountWithOneValue = 2
+
+	// IgnoredChunkSize is what gets passed into Mapper.Begin for aggregate queries as they don't chunk points out
+	IgnoredChunkSize = 0
+)
+
+// MapperResponse is the structure responses from mappers take over the network. Tagsets
+// is only set with the first response. Data will be nil when the Mapper has no more data.
+type MapperResponse struct {
+	TagSets []string `json:"tagSets,omitempty"`
+	Data    []byte   `json:"data"`
+}
+
+// Mapper is the interface all Mapper types must implement.
+type Mapper interface {
+	Open() error
+	TagSets() []string
+	NextChunk() (interface{}, error)
+	Close()
+}
+
+// StatefulMapper encapsulates a Mapper and some state that the executor needs to
+// track for that mapper.
+type StatefulMapper struct {
+	Mapper
+	bufferedChunk *mapperOutput // Last read chunk.
+	drained       bool
+}
+
+// Executor is the interface all Executor types must implement.
+type Executor interface {
+	Execute() <-chan *influxql.Row
+}
+
+// NextChunk wraps a RawMapper and some state.
+func (srm *StatefulMapper) NextChunk() (*mapperOutput, error) {
+	c, err := srm.Mapper.NextChunk()
+	if err != nil {
+		return nil, err
+	}
+	chunk, ok := c.(*mapperOutput)
+	if !ok {
+		if chunk == interface{}(nil) {
+			return nil, nil
+		}
+	}
+	return chunk, nil
+}
+
+// RawExecutor is an executor for RawMappers.
+type RawExecutor struct {
+	stmt           *influxql.SelectStatement
+	mappers        []*StatefulMapper
+	chunkSize      int
+	limitedTagSets map[string]struct{} // Set tagsets for which data has reached the LIMIT.
+}
+
+// NewRawExecutor returns a new RawExecutor.
+func NewRawExecutor(stmt *influxql.SelectStatement, mappers []Mapper, chunkSize int) *RawExecutor {
+	a := []*StatefulMapper{}
+	for _, m := range mappers {
+		a = append(a, &StatefulMapper{m, nil, false})
+	}
+	return &RawExecutor{
+		stmt:           stmt,
+		mappers:        a,
+		chunkSize:      chunkSize,
+		limitedTagSets: make(map[string]struct{}),
+	}
+}
+
+// Execute begins execution of the query and returns a channel to receive rows.
+func (re *RawExecutor) Execute() <-chan *influxql.Row {
+	// Create output channel and stream data in a separate goroutine.
+	out := make(chan *influxql.Row, 0)
+	go re.execute(out)
+	return out
+}
+
+func (re *RawExecutor) execute(out chan *influxql.Row) {
+	// It's important that all resources are released when execution completes.
+	defer re.close()
+
+	// Open the mappers.
+	for _, m := range re.mappers {
+		if err := m.Open(); err != nil {
+			out <- &influxql.Row{Err: err}
+			return
+		}
+	}
+
+	// Used to read ahead chunks from mappers.
+	var rowWriter *limitedRowWriter
+	var currTagset string
+
+	// Keep looping until all mappers drained.
+	var err error
+	for {
+		// Get the next chunk from each Mapper.
+		for _, m := range re.mappers {
+			if m.drained {
+				continue
+			}
+
+			// Set the next buffered chunk on the mapper, or mark it drained.
+			for {
+				if m.bufferedChunk == nil {
+					m.bufferedChunk, err = m.NextChunk()
+					if err != nil {
+						out <- &influxql.Row{Err: err}
+						return
+					}
+					if m.bufferedChunk == nil {
+						// Mapper can do no more for us.
+						m.drained = true
+						break
+					}
+				}
+
+				if re.tagSetIsLimited(m.bufferedChunk.Name) {
+					// chunk's tagset is limited, so no good. Try again.
+					m.bufferedChunk = nil
+					continue
+				}
+				// This mapper has a chunk available, and it is not limited.
+				break
+			}
+		}
+
+		// All Mappers done?
+		if re.mappersDrained() {
+			rowWriter.Flush()
+			break
+		}
+
+		// Send out data for the next alphabetically-lowest tagset. All Mappers emit data in this order,
+		// so by always continuing with the lowest tagset until it is finished, we process all data in
+		// the required order, and don't "miss" any.
+		tagset := re.nextMapperTagSet()
+		if tagset != currTagset {
+			currTagset = tagset
+			// Tagset has changed, time for a new rowWriter. Be sure to kick out any residual values.
+			rowWriter.Flush()
+			rowWriter = nil
+		}
+
+		// Process the mapper outputs. We can send out everything up to the min of the last time
+		// of the chunks for the next tagset.
+		minTime := re.nextMapperLowestTime(tagset)
+
+		// Now empty out all the chunks up to the min time. Create new output struct for this data.
+		var chunkedOutput *mapperOutput
+		for _, m := range re.mappers {
+			if m.drained {
+				continue
+			}
+
+			// This mapper's next chunk is not for the next tagset, or the very first value of
+			// the chunk is at a higher acceptable timestamp. Skip it.
+			if m.bufferedChunk.key() != tagset || m.bufferedChunk.Values[0].Time > minTime {
+				continue
+			}
+
+			// Find the index of the point up to the min.
+			ind := len(m.bufferedChunk.Values)
+			for i, mo := range m.bufferedChunk.Values {
+				if mo.Time > minTime {
+					ind = i
+					break
+				}
+			}
+
+			// Add up to the index to the values
+			if chunkedOutput == nil {
+				chunkedOutput = &mapperOutput{
+					Name: m.bufferedChunk.Name,
+					Tags: m.bufferedChunk.Tags,
+				}
+				chunkedOutput.Values = m.bufferedChunk.Values[:ind]
+			} else {
+				chunkedOutput.Values = append(chunkedOutput.Values, m.bufferedChunk.Values[:ind]...)
+			}
+
+			// Clear out the values being sent out, keep the remainder.
+			m.bufferedChunk.Values = m.bufferedChunk.Values[ind:]
+
+			// If we emptied out all the values, clear the mapper's buffered chunk.
+			if len(m.bufferedChunk.Values) == 0 {
+				m.bufferedChunk = nil
+			}
+		}
+
+		// Sort the values by time first so we can then handle offset and limit
+		sort.Sort(mapperValues(chunkedOutput.Values))
+
+		// Now that we have full name and tag details, initialize the rowWriter.
+		// The Name and Tags will be the same for all mappers.
+		if rowWriter == nil {
+			rowWriter = &limitedRowWriter{
+				limit:       re.stmt.Limit,
+				offset:      re.stmt.Offset,
+				chunkSize:   re.chunkSize,
+				name:        chunkedOutput.Name,
+				tags:        chunkedOutput.Tags,
+				selectNames: re.stmt.NamesInSelect(),
+				fields:      re.stmt.Fields,
+				c:           out,
+			}
+		}
+		if re.stmt.HasDerivative() {
+			interval, err := derivativeInterval(re.stmt)
+			if err != nil {
+				out <- &influxql.Row{Err: err}
+				return
+			}
+			rowWriter.transformer = &rawQueryDerivativeProcessor{
+				isNonNegative:      re.stmt.FunctionCalls()[0].Name == "non_negative_derivative",
+				derivativeInterval: interval,
+			}
+		}
+
+		// Emit the data via the limiter.
+		if limited := rowWriter.Add(chunkedOutput.Values); limited {
+			// Limit for this tagset was reached, mark it and start draining a new tagset.
+			re.limitTagSet(chunkedOutput.key())
+			continue
+		}
+	}
+
+	close(out)
+}
+
+// mappersDrained returns whether all the executors Mappers have been drained of data.
+func (re *RawExecutor) mappersDrained() bool {
+	for _, m := range re.mappers {
+		if !m.drained {
+			return false
+		}
+	}
+	return true
+}
+
+// nextMapperTagset returns the alphabetically lowest tagset across all Mappers.
+func (re *RawExecutor) nextMapperTagSet() string {
+	tagset := ""
+	for _, m := range re.mappers {
+		if m.bufferedChunk != nil {
+			if tagset == "" {
+				tagset = m.bufferedChunk.key()
+			} else if m.bufferedChunk.key() < tagset {
+				tagset = m.bufferedChunk.key()
+			}
+		}
+	}
+	return tagset
+}
+
+// nextMapperLowestTime returns the lowest minimum time across all Mappers, for the given tagset.
+func (re *RawExecutor) nextMapperLowestTime(tagset string) int64 {
+	minTime := int64(math.MaxInt64)
+	for _, m := range re.mappers {
+		if !m.drained && m.bufferedChunk != nil {
+			if m.bufferedChunk.key() != tagset {
+				continue
+			}
+			t := m.bufferedChunk.Values[len(m.bufferedChunk.Values)-1].Time
+			if t < minTime {
+				minTime = t
+			}
+		}
+	}
+	return minTime
+}
+
+// tagSetIsLimited returns whether data for the given tagset has been LIMITed.
+func (re *RawExecutor) tagSetIsLimited(tagset string) bool {
+	_, ok := re.limitedTagSets[tagset]
+	return ok
+}
+
+// limitTagSet marks the given taset as LIMITed.
+func (re *RawExecutor) limitTagSet(tagset string) {
+	re.limitedTagSets[tagset] = struct{}{}
+}
+
+// Close closes the executor such that all resources are released. Once closed,
+// an executor may not be re-used.
+func (re *RawExecutor) close() {
+	if re != nil {
+		for _, m := range re.mappers {
+			m.Close()
+		}
+	}
+}
+
+// AggregateExecutor is an executor for AggregateMappers.
+type AggregateExecutor struct {
+	stmt      *influxql.SelectStatement
+	queryTMin int64 // Needed?
+	queryTMax int64 // Needed?
+	mappers   []*StatefulMapper
+}
+
+// NewAggregateExecutor returns a new AggregateExecutor.
+func NewAggregateExecutor(stmt *influxql.SelectStatement, mappers []Mapper) *AggregateExecutor {
+	a := []*StatefulMapper{}
+	for _, m := range mappers {
+		a = append(a, &StatefulMapper{m, nil, false})
+	}
+	return &AggregateExecutor{
+		stmt:    stmt,
+		mappers: a,
+	}
+}
+
+// Execute begins execution of the query and returns a channel to receive rows.
+func (ae *AggregateExecutor) Execute() <-chan *influxql.Row {
+	// Create output channel and stream data in a separate goroutine.
+	out := make(chan *influxql.Row, 0)
+	go ae.execute(out)
+	return out
+}
+
+func (ae *AggregateExecutor) execute(out chan *influxql.Row) {
+	// It's important to close all resources when execution completes.
+	defer ae.close()
+
+	// Create the functions which will reduce values from mappers for
+	// a given interval. The function offsets within this slice match
+	// the offsets within the value slices that are returned by the
+	// mapper.
+	aggregates := ae.stmt.FunctionCalls()
+	reduceFuncs := make([]influxql.ReduceFunc, len(aggregates))
+	for i, c := range aggregates {
+		reduceFunc, err := influxql.InitializeReduceFunc(c)
+		if err != nil {
+			out <- &influxql.Row{Err: err}
+			return
+		}
+		reduceFuncs[i] = reduceFunc
+	}
+
+	// Put together the rows to return, starting with columns.
+	columnNames := make([]string, len(ae.stmt.Fields)+1)
+	columnNames[0] = "time"
+	for i, f := range ae.stmt.Fields {
+		columnNames[i+1] = f.Name()
+	}
+
+	// Open the mappers.
+	for _, m := range ae.mappers {
+		if err := m.Open(); err != nil {
+			out <- &influxql.Row{Err: err}
+			return
+		}
+	}
+
+	// Build the set of available tagsets across all mappers. This is used for
+	// later checks.
+	availTagSets := newStringSet()
+	for _, m := range ae.mappers {
+		for _, t := range m.TagSets() {
+			availTagSets.add(t)
+		}
+	}
+
+	// Prime each mapper's chunk buffer.
+	var err error
+	for _, m := range ae.mappers {
+		m.bufferedChunk, err = m.NextChunk()
+		if err != nil {
+			out <- &influxql.Row{Err: err}
+			return
+		}
+		if m.bufferedChunk == nil {
+			m.drained = true
+		}
+	}
+
+	// Keep looping until all mappers drained.
+	for !ae.mappersDrained() {
+		// Send out data for the next alphabetically-lowest tagset. All Mappers send out in this order
+		// so collect data for this tagset, ignoring all others.
+		tagset := ae.nextMapperTagSet()
+		chunks := []*mapperOutput{}
+
+		// Pull as much as possible from each mapper. Stop when a mapper offers
+		// data for a new tagset, or empties completely.
+		for _, m := range ae.mappers {
+			if m.drained {
+				continue
+			}
+
+			for {
+				if m.bufferedChunk == nil {
+					m.bufferedChunk, err = m.NextChunk()
+					if err != nil {
+						out <- &influxql.Row{Err: err}
+						return
+					}
+					if m.bufferedChunk == nil {
+						m.drained = true
+						break
+					}
+				}
+
+				// Got a chunk. Can we use it?
+				if m.bufferedChunk.key() != tagset {
+					// No, so just leave it in the buffer.
+					break
+				}
+				// We can, take it.
+				chunks = append(chunks, m.bufferedChunk)
+				m.bufferedChunk = nil
+			}
+		}
+
+		// Prep a row, ready for kicking out.
+		var row *influxql.Row
+
+		// Prep for bucketing data by start time of the interval.
+		buckets := map[int64][][]interface{}{}
+
+		for _, chunk := range chunks {
+			if row == nil {
+				row = &influxql.Row{
+					Name:    chunk.Name,
+					Tags:    chunk.Tags,
+					Columns: columnNames,
+				}
+			}
+
+			startTime := chunk.Values[0].Time
+			_, ok := buckets[startTime]
+			values := chunk.Values[0].Value.([]interface{})
+			if !ok {
+				buckets[startTime] = make([][]interface{}, len(values))
+			}
+			for i, v := range values {
+				buckets[startTime][i] = append(buckets[startTime][i], v)
+			}
+		}
+
+		// Now, after the loop above, within each time bucket is a slice. Within the element of each
+		// slice is another slice of interface{}, ready for passing to the reducer functions.
+
+		// Work each bucket of time, in time ascending order.
+		tMins := make(int64arr, 0, len(buckets))
+		for k, _ := range buckets {
+			tMins = append(tMins, k)
+		}
+		sort.Sort(tMins)
+
+		values := make([][]interface{}, len(tMins))
+		for i, t := range tMins {
+			values[i] = make([]interface{}, 0, len(columnNames))
+			values[i] = append(values[i], time.Unix(0, t).UTC()) // Time value is always first.
+
+			for j, f := range reduceFuncs {
+				reducedVal := f(buckets[t][j])
+				values[i] = append(values[i], reducedVal)
+			}
+		}
+
+		// Perform any mathematics.
+		values = processForMath(ae.stmt.Fields, values)
+
+		// Handle any fill options
+		values = ae.processFill(values)
+
+		// process derivatives
+		values = ae.processDerivative(values)
+
+		// If we have multiple tag sets we'll want to filter out the empty ones
+		if len(availTagSets.list()) > 1 && resultsEmpty(values) {
+			continue
+		}
+
+		row.Values = values
+		out <- row
+	}
+
+	close(out)
+}
+
+// processFill will take the results and return new results (or the same if no fill modifications are needed)
+// with whatever fill options the query has.
+func (ae *AggregateExecutor) processFill(results [][]interface{}) [][]interface{} {
+	// don't do anything if we're supposed to leave the nulls
+	if ae.stmt.Fill == influxql.NullFill {
+		return results
+	}
+
+	if ae.stmt.Fill == influxql.NoFill {
+		// remove any rows that have even one nil value. This one is tricky because they could have multiple
+		// aggregates, but this option means that any row that has even one nil gets purged.
+		newResults := make([][]interface{}, 0, len(results))
+		for _, vals := range results {
+			hasNil := false
+			// start at 1 because the first value is always time
+			for j := 1; j < len(vals); j++ {
+				if vals[j] == nil {
+					hasNil = true
+					break
+				}
+			}
+			if !hasNil {
+				newResults = append(newResults, vals)
+			}
+		}
+		return newResults
+	}
+
+	// They're either filling with previous values or a specific number
+	for i, vals := range results {
+		// start at 1 because the first value is always time
+		for j := 1; j < len(vals); j++ {
+			if vals[j] == nil {
+				switch ae.stmt.Fill {
+				case influxql.PreviousFill:
+					if i != 0 {
+						vals[j] = results[i-1][j]
+					}
+				case influxql.NumberFill:
+					vals[j] = ae.stmt.FillValue
+				}
+			}
+		}
+	}
+	return results
+}
+
+// processDerivative returns the derivatives of the results
+func (ae *AggregateExecutor) processDerivative(results [][]interface{}) [][]interface{} {
+	// Return early if we're not supposed to process the derivatives
+	if ae.stmt.HasDerivative() {
+		interval, err := derivativeInterval(ae.stmt)
+		if err != nil {
+			return results // XXX need to handle this better.
+		}
+
+		// Determines whether to drop negative differences
+		isNonNegative := ae.stmt.FunctionCalls()[0].Name == "non_negative_derivative"
+		return processAggregateDerivative(results, isNonNegative, interval)
+	}
+	return results
+}
+
+// mappersDrained returns whether all the executors Mappers have been drained of data.
+func (ae *AggregateExecutor) mappersDrained() bool {
+	for _, m := range ae.mappers {
+		if !m.drained {
+			return false
+		}
+	}
+	return true
+}
+
+// nextMapperTagset returns the alphabetically lowest tagset across all Mappers.
+func (ae *AggregateExecutor) nextMapperTagSet() string {
+	tagset := ""
+	for _, m := range ae.mappers {
+		if m.bufferedChunk != nil {
+			if tagset == "" {
+				tagset = m.bufferedChunk.key()
+			} else if m.bufferedChunk.key() < tagset {
+				tagset = m.bufferedChunk.key()
+			}
+		}
+	}
+	return tagset
+}
+
+// Close closes the executor such that all resources are released. Once closed,
+// an executor may not be re-used.
+func (ae *AggregateExecutor) close() {
+	for _, m := range ae.mappers {
+		m.Close()
+	}
+}
+
+// limitedRowWriter accepts raw mapper values, and will emit those values as rows in chunks
+// of the given size. If the chunk size is 0, no chunking will be performed. In addiiton if
+// limit is reached, outstanding values will be emitted. If limit is zero, no limit is enforced.
+type limitedRowWriter struct {
+	chunkSize   int
+	limit       int
+	offset      int
+	name        string
+	tags        map[string]string
+	selectNames []string
+	fields      influxql.Fields
+	c           chan *influxql.Row
+
+	currValues  []*mapperValue
+	totalOffSet int
+	totalSent   int
+
+	transformer interface {
+		process(input []*mapperValue) []*mapperValue
+	}
+}
+
+// Add accepts a slice of values, and will emit those values as per chunking requirements.
+// If limited is returned as true, the limit was also reached and no more values should be
+// added. In that case only up the limit of values are emitted.
+func (r *limitedRowWriter) Add(values []*mapperValue) (limited bool) {
+	if r.currValues == nil {
+		r.currValues = make([]*mapperValue, 0, r.chunkSize)
+	}
+
+	// Enforce offset.
+	if r.totalOffSet < r.offset {
+		// Still some offsetting to do.
+		offsetRequired := r.offset - r.totalOffSet
+		if offsetRequired >= len(values) {
+			r.totalOffSet += len(values)
+			return false
+		} else {
+			// Drop leading values and keep going.
+			values = values[offsetRequired:]
+			r.totalOffSet += offsetRequired
+		}
+	}
+	r.currValues = append(r.currValues, values...)
+
+	// Check limit.
+	limitReached := r.limit > 0 && r.totalSent+len(r.currValues) >= r.limit
+	if limitReached {
+		// Limit will be satified with current values. Truncate 'em.
+		r.currValues = r.currValues[:r.limit-r.totalSent]
+	}
+
+	// Is chunking in effect?
+	if r.chunkSize != IgnoredChunkSize {
+		// Chunking level reached?
+		for len(r.currValues) >= r.chunkSize {
+			index := len(r.currValues) - (len(r.currValues) - r.chunkSize)
+			r.c <- r.processValues(r.currValues[:index])
+			r.currValues = r.currValues[index:]
+		}
+
+		// After values have been sent out by chunking, there may still be some
+		// values left, if the remainder is less than the chunk size. But if the
+		// limit has been reached, kick them out.
+		if len(r.currValues) > 0 && limitReached {
+			r.c <- r.processValues(r.currValues)
+			r.currValues = nil
+		}
+	} else if limitReached {
+		// No chunking in effect, but the limit has been reached.
+		r.c <- r.processValues(r.currValues)
+		r.currValues = nil
+	}
+
+	return limitReached
+}
+
+// Flush instructs the limitedRowWriter to emit any pending values as a single row,
+// adhering to any limits. Chunking is not enforced.
+func (r *limitedRowWriter) Flush() {
+	if r == nil {
+		return
+	}
+
+	// If at least some rows were sent, and no values are pending, then don't
+	// emit anything, since at least 1 row was previously emitted. This ensures
+	// that if no rows were ever sent, at least 1 will be emitted, even an empty row.
+	if r.totalSent != 0 && len(r.currValues) == 0 {
+		return
+	}
+
+	if r.limit > 0 && len(r.currValues) > r.limit {
+		r.currValues = r.currValues[:r.limit]
+	}
+	r.c <- r.processValues(r.currValues)
+	r.currValues = nil
+}
+
+// processValues emits the given values in a single row.
+func (r *limitedRowWriter) processValues(values []*mapperValue) *influxql.Row {
+	defer func() {
+		r.totalSent += len(values)
+	}()
+
+	selectNames := r.selectNames
+
+	if r.transformer != nil {
+		values = r.transformer.process(values)
+	}
+
+	// ensure that time is in the select names and in the first position
+	hasTime := false
+	for i, n := range selectNames {
+		if n == "time" {
+			// Swap time to the first argument for names
+			if i != 0 {
+				selectNames[0], selectNames[i] = selectNames[i], selectNames[0]
+			}
+			hasTime = true
+			break
+		}
+	}
+
+	// time should always be in the list of names they get back
+	if !hasTime {
+		selectNames = append([]string{"time"}, selectNames...)
+	}
+
+	// since selectNames can contain tags, we need to strip them out
+	selectFields := make([]string, 0, len(selectNames))
+
+	for _, n := range selectNames {
+		if _, found := r.tags[n]; !found {
+			selectFields = append(selectFields, n)
+		}
+	}
+
+	row := &influxql.Row{
+		Name:    r.name,
+		Tags:    r.tags,
+		Columns: selectFields,
+	}
+
+	// Kick out an empty row it no results available.
+	if len(values) == 0 {
+		return row
+	}
+
+	// if they've selected only a single value we have to handle things a little differently
+	singleValue := len(selectFields) == SelectColumnCountWithOneValue
+
+	// the results will have all of the raw mapper results, convert into the row
+	for _, v := range values {
+		vals := make([]interface{}, len(selectFields))
+
+		if singleValue {
+			vals[0] = time.Unix(0, v.Time).UTC()
+			vals[1] = v.Value.(interface{})
+		} else {
+			fields := v.Value.(map[string]interface{})
+
+			// time is always the first value
+			vals[0] = time.Unix(0, v.Time).UTC()
+
+			// populate the other values
+			for i := 1; i < len(selectFields); i++ {
+				vals[i] = fields[selectFields[i]]
+			}
+		}
+
+		row.Values = append(row.Values, vals)
+	}
+
+	// Perform any mathematical post-processing.
+	row.Values = processForMath(r.fields, row.Values)
+
+	return row
+}
+
+type rawQueryDerivativeProcessor struct {
+	lastValueFromPreviousChunk *mapperValue
+	isNonNegative              bool // Whether to drop negative differences
+	derivativeInterval         time.Duration
+}
+
+func (rqdp *rawQueryDerivativeProcessor) process(input []*mapperValue) []*mapperValue {
+	if len(input) == 0 {
+		return input
+	}
+
+	// If we only have 1 value, then the value did not change, so return
+	// a single row with 0.0
+	if len(input) == 1 {
+		return []*mapperValue{
+			&mapperValue{
+				Time:  input[0].Time,
+				Value: 0.0,
+			},
+		}
+	}
+
+	if rqdp.lastValueFromPreviousChunk == nil {
+		rqdp.lastValueFromPreviousChunk = input[0]
+	}
+
+	derivativeValues := []*mapperValue{}
+	for i := 1; i < len(input); i++ {
+		v := input[i]
+
+		// Calculate the derivative of successive points by dividing the difference
+		// of each value by the elapsed time normalized to the interval
+		diff := int64toFloat64(v.Value) - int64toFloat64(rqdp.lastValueFromPreviousChunk.Value)
+
+		elapsed := v.Time - rqdp.lastValueFromPreviousChunk.Time
+
+		value := 0.0
+		if elapsed > 0 {
+			value = diff / (float64(elapsed) / float64(rqdp.derivativeInterval))
+		}
+
+		rqdp.lastValueFromPreviousChunk = v
+
+		// Drop negative values for non-negative derivatives
+		if rqdp.isNonNegative && diff < 0 {
+			continue
+		}
+
+		derivativeValues = append(derivativeValues, &mapperValue{
+			Time:  v.Time,
+			Value: value,
+		})
+	}
+
+	return derivativeValues
+}
+
+// processForMath will apply any math that was specified in the select statement
+// against the passed in results
+func processForMath(fields influxql.Fields, results [][]interface{}) [][]interface{} {
+	hasMath := false
+	for _, f := range fields {
+		if _, ok := f.Expr.(*influxql.BinaryExpr); ok {
+			hasMath = true
+		} else if _, ok := f.Expr.(*influxql.ParenExpr); ok {
+			hasMath = true
+		}
+	}
+
+	if !hasMath {
+		return results
+	}
+
+	processors := make([]influxql.Processor, len(fields))
+	startIndex := 1
+	for i, f := range fields {
+		processors[i], startIndex = influxql.GetProcessor(f.Expr, startIndex)
+	}
+
+	mathResults := make([][]interface{}, len(results))
+	for i, _ := range mathResults {
+		mathResults[i] = make([]interface{}, len(fields)+1)
+		// put the time in
+		mathResults[i][0] = results[i][0]
+		for j, p := range processors {
+			mathResults[i][j+1] = p(results[i])
+		}
+	}
+
+	return mathResults
+}
+
+// processAggregateDerivative returns the derivatives of an aggregate result set
+func processAggregateDerivative(results [][]interface{}, isNonNegative bool, interval time.Duration) [][]interface{} {
+	// Return early if we can't calculate derivatives
+	if len(results) == 0 {
+		return results
+	}
+
+	// If we only have 1 value, then the value did not change, so return
+	// a single row w/ 0.0
+	if len(results) == 1 {
+		return [][]interface{}{
+			[]interface{}{results[0][0], 0.0},
+		}
+	}
+
+	// Otherwise calculate the derivatives as the difference between consecutive
+	// points divided by the elapsed time.  Then normalize to the requested
+	// interval.
+	derivatives := [][]interface{}{}
+	for i := 1; i < len(results); i++ {
+		prev := results[i-1]
+		cur := results[i]
+
+		if cur[1] == nil || prev[1] == nil {
+			continue
+		}
+
+		elapsed := cur[0].(time.Time).Sub(prev[0].(time.Time))
+		diff := int64toFloat64(cur[1]) - int64toFloat64(prev[1])
+		value := 0.0
+		if elapsed > 0 {
+			value = float64(diff) / (float64(elapsed) / float64(interval))
+		}
+
+		// Drop negative values for non-negative derivatives
+		if isNonNegative && diff < 0 {
+			continue
+		}
+
+		val := []interface{}{
+			cur[0],
+			value,
+		}
+		derivatives = append(derivatives, val)
+	}
+
+	return derivatives
+}
+
+// derivativeInterval returns the time interval for the one (and only) derivative func
+func derivativeInterval(stmt *influxql.SelectStatement) (time.Duration, error) {
+	if len(stmt.FunctionCalls()[0].Args) == 2 {
+		return stmt.FunctionCalls()[0].Args[1].(*influxql.DurationLiteral).Val, nil
+	}
+	interval, err := stmt.GroupByInterval()
+	if err != nil {
+		return 0, err
+	}
+	if interval > 0 {
+		return interval, nil
+	}
+	return time.Second, nil
+}
+
+// resultsEmpty will return true if the all the result values are empty or contain only nulls
+func resultsEmpty(resultValues [][]interface{}) bool {
+	for _, vals := range resultValues {
+		// start the loop at 1 because we want to skip over the time value
+		for i := 1; i < len(vals); i++ {
+			if vals[i] != nil {
+				return false
+			}
+		}
+	}
+	return true
+}
+
+func int64toFloat64(v interface{}) float64 {
+	switch v.(type) {
+	case int64:
+		return float64(v.(int64))
+	case float64:
+		return v.(float64)
+	}
+	panic(fmt.Sprintf("expected either int64 or float64, got %v", v))
+}
+
+type int64arr []int64
+
+func (a int64arr) Len() int           { return len(a) }
+func (a int64arr) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+func (a int64arr) Less(i, j int) bool { return a[i] < a[j] }
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/internal/meta.pb.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/internal/meta.pb.go
new file mode 100644
index 00000000000..43d3eb3bf06
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/internal/meta.pb.go
@@ -0,0 +1,123 @@
+// Code generated by protoc-gen-go.
+// source: meta.proto
+// DO NOT EDIT!
+
+/*
+Package internal is a generated protocol buffer package.
+
+It is generated from these files:
+	meta.proto
+
+It has these top-level messages:
+	Series
+	Tag
+	MeasurementFields
+	Field
+*/
+package internal
+
+import proto "github.com/golang/protobuf/proto"
+import math "math"
+
+// Reference imports to suppress errors if they are not otherwise used.
+var _ = proto.Marshal
+var _ = math.Inf
+
+type Series struct {
+	Key              *string `protobuf:"bytes,1,req" json:"Key,omitempty"`
+	Tags             []*Tag  `protobuf:"bytes,2,rep" json:"Tags,omitempty"`
+	XXX_unrecognized []byte  `json:"-"`
+}
+
+func (m *Series) Reset()         { *m = Series{} }
+func (m *Series) String() string { return proto.CompactTextString(m) }
+func (*Series) ProtoMessage()    {}
+
+func (m *Series) GetKey() string {
+	if m != nil && m.Key != nil {
+		return *m.Key
+	}
+	return ""
+}
+
+func (m *Series) GetTags() []*Tag {
+	if m != nil {
+		return m.Tags
+	}
+	return nil
+}
+
+type Tag struct {
+	Key              *string `protobuf:"bytes,1,req" json:"Key,omitempty"`
+	Value            *string `protobuf:"bytes,2,req" json:"Value,omitempty"`
+	XXX_unrecognized []byte  `json:"-"`
+}
+
+func (m *Tag) Reset()         { *m = Tag{} }
+func (m *Tag) String() string { return proto.CompactTextString(m) }
+func (*Tag) ProtoMessage()    {}
+
+func (m *Tag) GetKey() string {
+	if m != nil && m.Key != nil {
+		return *m.Key
+	}
+	return ""
+}
+
+func (m *Tag) GetValue() string {
+	if m != nil && m.Value != nil {
+		return *m.Value
+	}
+	return ""
+}
+
+type MeasurementFields struct {
+	Fields           []*Field `protobuf:"bytes,1,rep" json:"Fields,omitempty"`
+	XXX_unrecognized []byte   `json:"-"`
+}
+
+func (m *MeasurementFields) Reset()         { *m = MeasurementFields{} }
+func (m *MeasurementFields) String() string { return proto.CompactTextString(m) }
+func (*MeasurementFields) ProtoMessage()    {}
+
+func (m *MeasurementFields) GetFields() []*Field {
+	if m != nil {
+		return m.Fields
+	}
+	return nil
+}
+
+type Field struct {
+	ID               *int32  `protobuf:"varint,1,req" json:"ID,omitempty"`
+	Name             *string `protobuf:"bytes,2,req" json:"Name,omitempty"`
+	Type             *int32  `protobuf:"varint,3,req" json:"Type,omitempty"`
+	XXX_unrecognized []byte  `json:"-"`
+}
+
+func (m *Field) Reset()         { *m = Field{} }
+func (m *Field) String() string { return proto.CompactTextString(m) }
+func (*Field) ProtoMessage()    {}
+
+func (m *Field) GetID() int32 {
+	if m != nil && m.ID != nil {
+		return *m.ID
+	}
+	return 0
+}
+
+func (m *Field) GetName() string {
+	if m != nil && m.Name != nil {
+		return *m.Name
+	}
+	return ""
+}
+
+func (m *Field) GetType() int32 {
+	if m != nil && m.Type != nil {
+		return *m.Type
+	}
+	return 0
+}
+
+func init() {
+}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/internal/meta.proto b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/internal/meta.proto
new file mode 100644
index 00000000000..d1deff6c5a1
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/internal/meta.proto
@@ -0,0 +1,27 @@
+package internal;
+
+//========================================================================
+//
+// Metadata
+//
+//========================================================================
+
+message Series {
+  required string Key = 1;
+  repeated Tag Tags = 2;
+}
+
+message Tag {
+  required string Key = 1;
+  required string Value = 2;
+}
+
+message MeasurementFields {
+  repeated Field Fields = 1;
+}
+
+message Field {
+  required int32 ID = 1;
+  required string Name = 2;
+  required int32 Type = 3;
+}
\ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/mapper.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/mapper.go
new file mode 100644
index 00000000000..b4ae8e8d044
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/mapper.go
@@ -0,0 +1,751 @@
+package tsdb
+
+import (
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"math"
+	"sort"
+	"strings"
+
+	"github.com/boltdb/bolt"
+	"github.com/influxdb/influxdb/influxql"
+)
+
+// mapperValue is a complex type, which can encapsulate data from both raw and aggregate
+// mappers. This currently allows marshalling and network system to remain simpler. For
+// aggregate output Time is ignored, and actual Time-Value pairs are contained soley
+// within the Value field.
+type mapperValue struct {
+	Time  int64       `json:"time,omitempty"`  // Ignored for aggregate output.
+	Value interface{} `json:"value,omitempty"` // For aggregate, contains interval time multiple values.
+}
+
+type mapperValues []*mapperValue
+
+func (a mapperValues) Len() int           { return len(a) }
+func (a mapperValues) Less(i, j int) bool { return a[i].Time < a[j].Time }
+func (a mapperValues) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+
+type mapperOutput struct {
+	Name   string            `json:"name,omitempty"`
+	Tags   map[string]string `json:"tags,omitempty"`
+	Values []*mapperValue    `json:"values,omitempty"` // For aggregates contains a single value at [0]
+}
+
+func (mo *mapperOutput) key() string {
+	return formMeasurementTagSetKey(mo.Name, mo.Tags)
+}
+
+// RawMapper is for retrieving data, for a raw query, for a single shard.
+type RawMapper struct {
+	shard     *Shard
+	stmt      *influxql.SelectStatement
+	chunkSize int
+
+	tx        *bolt.Tx // Read transaction for this shard.
+	queryTMin int64
+	queryTMax int64
+
+	whereFields  []string               // field names that occur in the where clause
+	selectFields []string               // field names that occur in the select clause
+	selectTags   []string               // tag keys that occur in the select clause
+	fieldName    string                 // the field name being read.
+	decoders     map[string]*FieldCodec // byte decoder per measurement
+
+	cursors         []*tagSetCursor // Cursors per tag sets.
+	currCursorIndex int             // Current tagset cursor being drained.
+}
+
+// NewRawMapper returns a mapper for the given shard, which will return data for the SELECT statement.
+func NewRawMapper(shard *Shard, stmt *influxql.SelectStatement, chunkSize int) *RawMapper {
+	return &RawMapper{
+		shard:     shard,
+		stmt:      stmt,
+		chunkSize: chunkSize,
+		cursors:   make([]*tagSetCursor, 0),
+	}
+}
+
+// Open opens the raw mapper.
+func (rm *RawMapper) Open() error {
+	// Get a read-only transaction.
+	tx, err := rm.shard.DB().Begin(false)
+	if err != nil {
+		return err
+	}
+	rm.tx = tx
+
+	// Set all time-related parameters on the mapper.
+	rm.queryTMin, rm.queryTMax = influxql.TimeRangeAsEpochNano(rm.stmt.Condition)
+
+	// Create the TagSet cursors for the Mapper.
+	for _, src := range rm.stmt.Sources {
+		mm, ok := src.(*influxql.Measurement)
+		if !ok {
+			return fmt.Errorf("invalid source type: %#v", src)
+		}
+
+		m := rm.shard.index.Measurement(mm.Name)
+		if m == nil {
+			// This shard have never received data for the measurement. No Mapper
+			// required.
+			return nil
+		}
+
+		// Create tagset cursors and determine various field types within SELECT statement.
+		tsf, err := createTagSetsAndFields(m, rm.stmt)
+		if err != nil {
+			return err
+		}
+		tagSets := tsf.tagSets
+		rm.selectFields = tsf.selectFields
+		rm.selectTags = tsf.selectTags
+		rm.whereFields = tsf.whereFields
+
+		if len(rm.selectFields) == 0 {
+			return fmt.Errorf("select statement must include at least one field")
+		}
+
+		// SLIMIT and SOFFSET the unique series
+		if rm.stmt.SLimit > 0 || rm.stmt.SOffset > 0 {
+			if rm.stmt.SOffset > len(tagSets) {
+				tagSets = nil
+			} else {
+				if rm.stmt.SOffset+rm.stmt.SLimit > len(tagSets) {
+					rm.stmt.SLimit = len(tagSets) - rm.stmt.SOffset
+				}
+
+				tagSets = tagSets[rm.stmt.SOffset : rm.stmt.SOffset+rm.stmt.SLimit]
+			}
+		}
+
+		// Create all cursors for reading the data from this shard.
+		for _, t := range tagSets {
+			cursors := []*seriesCursor{}
+
+			for i, key := range t.SeriesKeys {
+				c := createCursorForSeries(rm.tx, rm.shard, key)
+				if c == nil {
+					// No data exists for this key.
+					continue
+				}
+				cm := newSeriesCursor(c, t.Filters[i])
+				cursors = append(cursors, cm)
+			}
+
+			tsc := newTagSetCursor(m.Name, t.Tags, cursors, rm.shard.FieldCodec(m.Name))
+			// Prime the buffers.
+			for i := 0; i < len(tsc.cursors); i++ {
+				k, v := tsc.cursors[i].SeekTo(rm.queryTMin)
+				tsc.keyBuffer[i] = k
+				tsc.valueBuffer[i] = v
+			}
+			rm.cursors = append(rm.cursors, tsc)
+		}
+		sort.Sort(tagSetCursors(rm.cursors))
+	}
+
+	return nil
+}
+
+// TagSets returns the list of TagSets for which this mapper has data.
+func (rm *RawMapper) TagSets() []string {
+	return tagSetCursors(rm.cursors).Keys()
+}
+
+// NextChunk returns the next chunk of data. Data comes in the same order as the
+// tags return by TagSets. A chunk never contains data for more than 1 tagset.
+// If there is no more data for any tagset, nil will be returned.
+func (rm *RawMapper) NextChunk() (interface{}, error) {
+	var output *mapperOutput
+	for {
+		if rm.currCursorIndex == len(rm.cursors) {
+			// All tagset cursors processed. NextChunk'ing complete.
+			return nil, nil
+		}
+		cursor := rm.cursors[rm.currCursorIndex]
+
+		k, v := cursor.Next(rm.queryTMin, rm.queryTMax, rm.selectFields, rm.whereFields)
+		if v == nil {
+			// Tagset cursor is empty, move to next one.
+			rm.currCursorIndex++
+			if output != nil {
+				// There is data, so return it and continue when next called.
+				return output, nil
+			} else {
+				// Just go straight to the next cursor.
+				continue
+			}
+		}
+
+		if output == nil {
+			output = &mapperOutput{
+				Name: cursor.measurement,
+				Tags: cursor.tags,
+			}
+		}
+		value := &mapperValue{Time: k, Value: v}
+		output.Values = append(output.Values, value)
+		if len(output.Values) == rm.chunkSize {
+			return output, nil
+		}
+	}
+}
+
+// Close closes the mapper.
+func (rm *RawMapper) Close() {
+	if rm != nil && rm.tx != nil {
+		_ = rm.tx.Rollback()
+	}
+}
+
+// AggMapper is for retrieving data, for an aggregate query, from a given shard.
+type AggMapper struct {
+	shard *Shard
+	stmt  *influxql.SelectStatement
+
+	tx              *bolt.Tx // Read transaction for this shard.
+	queryTMin       int64    // Minimum time of the query.
+	queryTMinWindow int64    // Minimum time of the query floored to start of interval.
+	queryTMax       int64    // Maximum time of the query.
+	intervalSize    int64    // Size of each interval.
+
+	mapFuncs   []influxql.MapFunc // The mapping functions.
+	fieldNames []string           // the field name being read for mapping.
+
+	whereFields  []string // field names that occur in the where clause
+	selectFields []string // field names that occur in the select clause
+	selectTags   []string // tag keys that occur in the select clause
+
+	numIntervals int // Maximum number of intervals to return.
+	currInterval int // Current interval for which data is being fetched.
+
+	cursors         []*tagSetCursor // Cursors per tag sets.
+	currCursorIndex int             // Current tagset cursor being drained.
+}
+
+// NewAggMapper returns a mapper for the given shard, which will return data for the SELECT statement.
+func NewAggMapper(shard *Shard, stmt *influxql.SelectStatement) *AggMapper {
+	return &AggMapper{
+		shard:   shard,
+		stmt:    stmt,
+		cursors: make([]*tagSetCursor, 0),
+	}
+}
+
+// Open opens the aggregate mapper.
+func (am *AggMapper) Open() error {
+	var err error
+
+	// Get a read-only transaction.
+	tx, err := am.shard.DB().Begin(false)
+	if err != nil {
+		return err
+	}
+	am.tx = tx
+
+	// Set up each mapping function for this statement.
+	aggregates := am.stmt.FunctionCalls()
+	am.mapFuncs = make([]influxql.MapFunc, len(aggregates))
+	am.fieldNames = make([]string, len(am.mapFuncs))
+	for i, c := range aggregates {
+		am.mapFuncs[i], err = influxql.InitializeMapFunc(c)
+		if err != nil {
+			return err
+		}
+
+		// Check for calls like `derivative(mean(value), 1d)`
+		var nested *influxql.Call = c
+		if fn, ok := c.Args[0].(*influxql.Call); ok {
+			nested = fn
+		}
+		switch lit := nested.Args[0].(type) {
+		case *influxql.VarRef:
+			am.fieldNames[i] = lit.Val
+		case *influxql.Distinct:
+			if c.Name != "count" {
+				return fmt.Errorf("aggregate call didn't contain a field %s", c.String())
+			}
+			am.fieldNames[i] = lit.Val
+		default:
+			return fmt.Errorf("aggregate call didn't contain a field %s", c.String())
+		}
+	}
+
+	// Set all time-related parameters on the mapper.
+	am.queryTMin, am.queryTMax = influxql.TimeRangeAsEpochNano(am.stmt.Condition)
+
+	// For GROUP BY time queries, limit the number of data points returned by the limit and offset
+	d, err := am.stmt.GroupByInterval()
+	if err != nil {
+		return err
+	}
+	am.intervalSize = d.Nanoseconds()
+	if am.queryTMin == 0 || am.intervalSize == 0 {
+		am.numIntervals = 1
+		am.intervalSize = am.queryTMax - am.queryTMin
+	} else {
+		intervalTop := am.queryTMax/am.intervalSize*am.intervalSize + am.intervalSize
+		intervalBottom := am.queryTMin / am.intervalSize * am.intervalSize
+		am.numIntervals = int((intervalTop - intervalBottom) / am.intervalSize)
+	}
+
+	if am.stmt.Limit > 0 || am.stmt.Offset > 0 {
+		// ensure that the offset isn't higher than the number of points we'd get
+		if am.stmt.Offset > am.numIntervals {
+			return nil
+		}
+
+		// Take the lesser of either the pre computed number of GROUP BY buckets that
+		// will be in the result or the limit passed in by the user
+		if am.stmt.Limit < am.numIntervals {
+			am.numIntervals = am.stmt.Limit
+		}
+	}
+
+	// If we are exceeding our MaxGroupByPoints error out
+	if am.numIntervals > MaxGroupByPoints {
+		return errors.New("too many points in the group by interval. maybe you forgot to specify a where time clause?")
+	}
+
+	// Ensure that the start time for the results is on the start of the window.
+	am.queryTMinWindow = am.queryTMin
+	if am.intervalSize > 0 && am.numIntervals > 1 {
+		am.queryTMinWindow = am.queryTMinWindow / am.intervalSize * am.intervalSize
+	}
+
+	// Create the TagSet cursors for the Mapper.
+	for _, src := range am.stmt.Sources {
+		mm, ok := src.(*influxql.Measurement)
+		if !ok {
+			return fmt.Errorf("invalid source type: %#v", src)
+		}
+
+		m := am.shard.index.Measurement(mm.Name)
+		if m == nil {
+			// This shard have never received data for the measurement. No Mapper
+			// required.
+			return nil
+		}
+
+		// Create tagset cursors and determine various field types within SELECT statement.
+		tsf, err := createTagSetsAndFields(m, am.stmt)
+		if err != nil {
+			return err
+		}
+		tagSets := tsf.tagSets
+		am.selectFields = tsf.selectFields
+		am.selectTags = tsf.selectTags
+		am.whereFields = tsf.whereFields
+
+		// Validate that group by is not a field
+		if err := m.ValidateGroupBy(am.stmt); err != nil {
+			return err
+		}
+
+		// SLIMIT and SOFFSET the unique series
+		if am.stmt.SLimit > 0 || am.stmt.SOffset > 0 {
+			if am.stmt.SOffset > len(tagSets) {
+				tagSets = nil
+			} else {
+				if am.stmt.SOffset+am.stmt.SLimit > len(tagSets) {
+					am.stmt.SLimit = len(tagSets) - am.stmt.SOffset
+				}
+
+				tagSets = tagSets[am.stmt.SOffset : am.stmt.SOffset+am.stmt.SLimit]
+			}
+		}
+
+		// Create all cursors for reading the data from this shard.
+		for _, t := range tagSets {
+			cursors := []*seriesCursor{}
+
+			for i, key := range t.SeriesKeys {
+				c := createCursorForSeries(am.tx, am.shard, key)
+				if c == nil {
+					// No data exists for this key.
+					continue
+				}
+				cm := newSeriesCursor(c, t.Filters[i])
+				cursors = append(cursors, cm)
+			}
+			tsc := newTagSetCursor(m.Name, t.Tags, cursors, am.shard.FieldCodec(m.Name))
+			am.cursors = append(am.cursors, tsc)
+		}
+		sort.Sort(tagSetCursors(am.cursors))
+	}
+
+	return nil
+}
+
+// NextChunk returns the next chunk of data, which is the next interval of data
+// for the current tagset. Tagsets are always processed in the same order as that
+// returned by AvailTagsSets(). When there is no more data for any tagset nil
+// is returned.
+func (am *AggMapper) NextChunk() (interface{}, error) {
+	var output *mapperOutput
+	for {
+		if am.currCursorIndex == len(am.cursors) {
+			// All tagset cursors processed. NextChunk'ing complete.
+			return nil, nil
+		}
+		tsc := am.cursors[am.currCursorIndex]
+		tmin, tmax := am.nextInterval()
+
+		if tmin < 0 {
+			// All intervals complete for this tagset. Move to the next tagset.
+			am.resetIntervals()
+			am.currCursorIndex++
+			continue
+		}
+
+		// Prep the return data for this tagset. This will hold data for a single interval
+		// for a single tagset.
+		if output == nil {
+			output = &mapperOutput{
+				Name:   tsc.measurement,
+				Tags:   tsc.tags,
+				Values: make([]*mapperValue, 1),
+			}
+			// Aggregate values only use the first entry in the Values field. Set the time
+			// to the start of the interval.
+			output.Values[0] = &mapperValue{
+				Time:  tmin,
+				Value: make([]interface{}, 0)}
+		}
+
+		// Always clamp tmin. This can happen as bucket-times are bucketed to the nearest
+		// interval, and this can be less than the times in the query.
+		qmin := tmin
+		if qmin < am.queryTMin {
+			qmin = am.queryTMin
+		}
+
+		for i := range am.mapFuncs {
+			// Prime the tagset cursor for the start of the interval. This is not ideal, as
+			// it should really calculate the values all in 1 pass, but that would require
+			// changes to the mapper functions, which can come later.
+			// Prime the buffers.
+			for i := 0; i < len(tsc.cursors); i++ {
+				k, v := tsc.cursors[i].SeekTo(tmin)
+				tsc.keyBuffer[i] = k
+				tsc.valueBuffer[i] = v
+			}
+
+			// Wrap the tagset cursor so it implements the mapping functions interface.
+			f := func() (time int64, value interface{}) {
+				return tsc.Next(qmin, tmax, []string{am.fieldNames[i]}, am.whereFields)
+			}
+
+			tagSetCursor := &aggTagSetCursor{
+				nextFunc: f,
+			}
+
+			// Execute the map function which walks the entire interval, and aggregates
+			// the result.
+			values := output.Values[0].Value.([]interface{})
+			output.Values[0].Value = append(values, am.mapFuncs[i](tagSetCursor))
+		}
+		return output, nil
+	}
+}
+
+// nextInterval returns the next interval for which to return data. If start is less than 0
+// there are no more intervals.
+func (am *AggMapper) nextInterval() (start, end int64) {
+	t := am.queryTMinWindow + int64(am.currInterval+am.stmt.Offset)*am.intervalSize
+
+	// Onto next interval.
+	am.currInterval++
+	if t > am.queryTMax || am.currInterval > am.numIntervals {
+		start, end = -1, 1
+	} else {
+		start, end = t, t+am.intervalSize
+	}
+	return
+}
+
+// resetIntervals starts the Mapper at the first interval. Subsequent intervals
+// should be retrieved via nextInterval().
+func (am *AggMapper) resetIntervals() {
+	am.currInterval = 0
+}
+
+// TagSets returns the list of TagSets for which this mapper has data.
+func (am *AggMapper) TagSets() []string {
+	return tagSetCursors(am.cursors).Keys()
+}
+
+// Close closes the mapper.
+func (am *AggMapper) Close() {
+	if am != nil && am.tx != nil {
+		_ = am.tx.Rollback()
+	}
+}
+
+// aggTagSetCursor wraps a standard tagSetCursor, such that the values it emits are aggregated
+// by intervals.
+type aggTagSetCursor struct {
+	nextFunc func() (time int64, value interface{})
+}
+
+// Next returns the next value for the aggTagSetCursor. It implements the interface expected
+// by the mapping functions.
+func (a *aggTagSetCursor) Next() (time int64, value interface{}) {
+	return a.nextFunc()
+}
+
+// tagSetCursor is virtual cursor that iterates over mutiple series cursors, as though it were
+// a single series.
+type tagSetCursor struct {
+	measurement string            // Measurement name
+	tags        map[string]string // Tag key-value pairs
+	cursors     []*seriesCursor   // Underlying series cursors.
+	decoder     *FieldCodec       // decoder for the raw data bytes
+
+	// Lookahead buffers for the cursors. Performance analysis shows that it is critical
+	// that these buffers are part of the tagSetCursor type and not part of the the
+	// cursors type.
+	keyBuffer   []int64  // The current timestamp key for each cursor
+	valueBuffer [][]byte // The current value for each cursor
+}
+
+// tagSetCursors represents a sortable slice of tagSetCursors.
+type tagSetCursors []*tagSetCursor
+
+func (a tagSetCursors) Len() int           { return len(a) }
+func (a tagSetCursors) Less(i, j int) bool { return a[i].key() < a[j].key() }
+func (a tagSetCursors) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+
+func (a tagSetCursors) Keys() []string {
+	keys := []string{}
+	for i := range a {
+		keys = append(keys, a[i].key())
+	}
+	sort.Strings(keys)
+	return keys
+}
+
+// newTagSetCursor returns a tagSetCursor
+func newTagSetCursor(m string, t map[string]string, c []*seriesCursor, d *FieldCodec) *tagSetCursor {
+	return &tagSetCursor{
+		measurement: m,
+		tags:        t,
+		cursors:     c,
+		decoder:     d,
+		keyBuffer:   make([]int64, len(c)),
+		valueBuffer: make([][]byte, len(c)),
+	}
+}
+
+func (tsc *tagSetCursor) key() string {
+	return formMeasurementTagSetKey(tsc.measurement, tsc.tags)
+}
+
+// Next returns the next matching series-key, timestamp and byte slice for the tagset. Filtering
+// is enforced on the values. If there is no matching value, then a nil result is returned.
+func (tsc *tagSetCursor) Next(tmin, tmax int64, selectFields, whereFields []string) (int64, interface{}) {
+	for {
+		// Find the next lowest timestamp
+		min := -1
+		minKey := int64(math.MaxInt64)
+		for i, k := range tsc.keyBuffer {
+			if k != -1 && (k == tmin) || k < minKey && k >= tmin && k < tmax {
+				min = i
+				minKey = k
+			}
+		}
+
+		// Return if there is no more data for this tagset.
+		if min == -1 {
+			return -1, nil
+		}
+
+		// set the current timestamp and seriesID
+		timestamp := tsc.keyBuffer[min]
+
+		var value interface{}
+		if len(selectFields) > 1 {
+			if fieldsWithNames, err := tsc.decoder.DecodeFieldsWithNames(tsc.valueBuffer[min]); err == nil {
+				value = fieldsWithNames
+
+				// if there's a where clause, make sure we don't need to filter this value
+				if tsc.cursors[min].filter != nil && !matchesWhere(tsc.cursors[min].filter, fieldsWithNames) {
+					value = nil
+				}
+			}
+		} else {
+			// With only 1 field SELECTed, decoding all fields may be avoidable, which is faster.
+			var err error
+			value, err = tsc.decoder.DecodeByName(selectFields[0], tsc.valueBuffer[min])
+			if err != nil {
+				value = nil
+			} else {
+				// If there's a WHERE clase, see if we need to filter
+				if tsc.cursors[min].filter != nil {
+					// See if the WHERE is only on this field or on one or more other fields.
+					// If the latter, we'll have to decode everything
+					if len(whereFields) == 1 && whereFields[0] == selectFields[0] {
+						if !matchesWhere(tsc.cursors[min].filter, map[string]interface{}{selectFields[0]: value}) {
+							value = nil
+						}
+					} else { // Decode everything
+						fieldsWithNames, err := tsc.decoder.DecodeFieldsWithNames(tsc.valueBuffer[min])
+						if err != nil || !matchesWhere(tsc.cursors[min].filter, fieldsWithNames) {
+							value = nil
+						}
+					}
+				}
+			}
+		}
+
+		// Advance the cursor
+		nextKey, nextVal := tsc.cursors[min].Next()
+		tsc.keyBuffer[min] = nextKey
+		tsc.valueBuffer[min] = nextVal
+
+		// Value didn't match, look for the next one.
+		if value == nil {
+			continue
+		}
+
+		return timestamp, value
+	}
+}
+
+// seriesCursor is a cursor that walks a single series. It provides lookahead functionality.
+type seriesCursor struct {
+	cursor *shardCursor // BoltDB cursor for a series
+	filter influxql.Expr
+}
+
+// newSeriesCursor returns a new instance of a series cursor.
+func newSeriesCursor(b *shardCursor, filter influxql.Expr) *seriesCursor {
+	return &seriesCursor{
+		cursor: b,
+		filter: filter,
+	}
+}
+
+// Seek positions returning the timestamp and value at that key.
+func (sc *seriesCursor) SeekTo(key int64) (timestamp int64, value []byte) {
+	k, v := sc.cursor.Seek(u64tob(uint64(key)))
+	if k == nil {
+		timestamp = -1
+	} else {
+		timestamp, value = int64(btou64(k)), v
+	}
+	return
+}
+
+// Next returns the next timestamp and value from the cursor.
+func (sc *seriesCursor) Next() (key int64, value []byte) {
+	k, v := sc.cursor.Next()
+	if k == nil {
+		key = -1
+	} else {
+		key, value = int64(btou64(k)), v
+	}
+	return
+}
+
+// createCursorForSeries creates a cursor for walking the given series key. The cursor
+// consolidates both the Bolt store and any WAL cache.
+func createCursorForSeries(tx *bolt.Tx, shard *Shard, key string) *shardCursor {
+	// Retrieve key bucket.
+	b := tx.Bucket([]byte(key))
+
+	// Ignore if there is no bucket or points in the cache.
+	partitionID := WALPartition([]byte(key))
+	if b == nil && len(shard.cache[partitionID][key]) == 0 {
+		return nil
+	}
+
+	// Retrieve a copy of the in-cache points for the key.
+	cache := make([][]byte, len(shard.cache[partitionID][key]))
+	copy(cache, shard.cache[partitionID][key])
+
+	// Build a cursor that merges the bucket and cache together.
+	cur := &shardCursor{cache: cache}
+	if b != nil {
+		cur.cursor = b.Cursor()
+	}
+	return cur
+}
+
+type tagSetsAndFields struct {
+	tagSets      []*influxql.TagSet
+	selectFields []string
+	selectTags   []string
+	whereFields  []string
+}
+
+// createTagSetsAndFields returns the tagsets and various fields given a measurement and
+// SELECT statement. It also ensures that the fields and tags exist.
+func createTagSetsAndFields(m *Measurement, stmt *influxql.SelectStatement) (*tagSetsAndFields, error) {
+	_, tagKeys, err := stmt.Dimensions.Normalize()
+	if err != nil {
+		return nil, err
+	}
+
+	sfs := newStringSet()
+	sts := newStringSet()
+	wfs := newStringSet()
+
+	// Validate the fields and tags asked for exist and keep track of which are in the select vs the where
+	for _, n := range stmt.NamesInSelect() {
+		if m.HasField(n) {
+			sfs.add(n)
+			continue
+		}
+		if !m.HasTagKey(n) {
+			return nil, fmt.Errorf("unknown field or tag name in select clause: %s", n)
+		}
+		sts.add(n)
+		tagKeys = append(tagKeys, n)
+	}
+	for _, n := range stmt.NamesInWhere() {
+		if n == "time" {
+			continue
+		}
+		if m.HasField(n) {
+			wfs.add(n)
+			continue
+		}
+		if !m.HasTagKey(n) {
+			return nil, fmt.Errorf("unknown field or tag name in where clause: %s", n)
+		}
+	}
+
+	// Get the sorted unique tag sets for this statement.
+	tagSets, err := m.TagSets(stmt, tagKeys)
+	if err != nil {
+		return nil, err
+	}
+
+	return &tagSetsAndFields{
+		tagSets:      tagSets,
+		selectFields: sfs.list(),
+		selectTags:   sts.list(),
+		whereFields:  wfs.list(),
+	}, nil
+}
+
+// matchesFilter returns true if the value matches the where clause
+func matchesWhere(f influxql.Expr, fields map[string]interface{}) bool {
+	if ok, _ := influxql.Eval(f, fields).(bool); !ok {
+		return false
+	}
+	return true
+}
+
+func formMeasurementTagSetKey(name string, tags map[string]string) string {
+	if len(tags) == 0 {
+		return name
+	}
+	return strings.Join([]string{name, string(marshalTags(tags))}, "|")
+}
+
+// btou64 converts an 8-byte slice into an uint64.
+func btou64(b []byte) uint64 { return binary.BigEndian.Uint64(b) }
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/meta.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/meta.go
new file mode 100644
index 00000000000..07c17f9b728
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/meta.go
@@ -0,0 +1,1279 @@
+package tsdb
+
+import (
+	"fmt"
+	"regexp"
+	"sort"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/influxdb/influxdb/influxql"
+	"github.com/influxdb/influxdb/tsdb/internal"
+
+	"github.com/gogo/protobuf/proto"
+)
+
+//go:generate protoc --gogo_out=. internal/meta.proto
+
+const (
+	maxStringLength = 64 * 1024
+)
+
+// DatabaseIndex is the in memory index of a collection of measurements, time series, and their tags.
+// Exported functions are goroutine safe while un-exported functions assume the caller will use the appropriate locks
+type DatabaseIndex struct {
+	// in memory metadata index, built on load and updated when new series come in
+	mu           sync.RWMutex
+	measurements map[string]*Measurement // measurement name to object and index
+	series       map[string]*Series      // map series key to the Series object
+	names        []string                // sorted list of the measurement names
+	lastID       uint64                  // last used series ID. They're in memory only for this shard
+}
+
+func NewDatabaseIndex() *DatabaseIndex {
+	return &DatabaseIndex{
+		measurements: make(map[string]*Measurement),
+		series:       make(map[string]*Series),
+		names:        make([]string, 0),
+	}
+}
+
+// Measurement returns the measurement object from the index by the name
+func (d *DatabaseIndex) Measurement(name string) *Measurement {
+	d.mu.RLock()
+	defer d.mu.RUnlock()
+	return d.measurements[name]
+}
+
+// MeasurementSeriesCounts returns the number of measurements and series currently indexed by the database.
+// Useful for reporting and monitoring.
+func (d *DatabaseIndex) MeasurementSeriesCounts() (nMeasurements int, nSeries int) {
+	d.mu.RLock()
+	defer d.mu.RUnlock()
+	nMeasurements, nSeries = len(d.measurements), len(d.series)
+	return
+}
+
+// createSeriesIndexIfNotExists adds the series for the given measurement to the index and sets its ID or returns the existing series object
+func (s *DatabaseIndex) createSeriesIndexIfNotExists(measurementName string, series *Series) *Series {
+	// if there is a measurement for this id, it's already been added
+	ss := s.series[series.Key]
+	if ss != nil {
+		return ss
+	}
+
+	// get or create the measurement index
+	m := s.createMeasurementIndexIfNotExists(measurementName)
+
+	// set the in memory ID for query processing on this shard
+	series.id = s.lastID + 1
+	s.lastID += 1
+
+	series.measurement = m
+	s.series[series.Key] = series
+
+	m.AddSeries(series)
+
+	return series
+}
+
+// createMeasurementIndexIfNotExists creates or retrieves an in memory index object for the measurement
+func (s *DatabaseIndex) createMeasurementIndexIfNotExists(name string) *Measurement {
+	name = unescapeString(name)
+	m := s.measurements[name]
+	if m == nil {
+		m = NewMeasurement(name, s)
+		s.measurements[name] = m
+		s.names = append(s.names, name)
+		sort.Strings(s.names)
+	}
+	return m
+}
+
+// measurementsByExpr takes and expression containing only tags and returns
+// a list of matching *Measurement.
+func (db *DatabaseIndex) measurementsByExpr(expr influxql.Expr) (Measurements, error) {
+	switch e := expr.(type) {
+	case *influxql.BinaryExpr:
+		switch e.Op {
+		case influxql.EQ, influxql.NEQ, influxql.EQREGEX, influxql.NEQREGEX:
+			tag, ok := e.LHS.(*influxql.VarRef)
+			if !ok {
+				return nil, fmt.Errorf("left side of '%s' must be a tag name", e.Op.String())
+			}
+
+			tf := &TagFilter{
+				Op:  e.Op,
+				Key: tag.Val,
+			}
+
+			if influxql.IsRegexOp(e.Op) {
+				re, ok := e.RHS.(*influxql.RegexLiteral)
+				if !ok {
+					return nil, fmt.Errorf("right side of '%s' must be a regular expression", e.Op.String())
+				}
+				tf.Regex = re.Val
+			} else {
+				s, ok := e.RHS.(*influxql.StringLiteral)
+				if !ok {
+					return nil, fmt.Errorf("right side of '%s' must be a tag value string", e.Op.String())
+				}
+				tf.Value = s.Val
+			}
+
+			return db.measurementsByTagFilters([]*TagFilter{tf}), nil
+		case influxql.OR, influxql.AND:
+			lhsIDs, err := db.measurementsByExpr(e.LHS)
+			if err != nil {
+				return nil, err
+			}
+
+			rhsIDs, err := db.measurementsByExpr(e.RHS)
+			if err != nil {
+				return nil, err
+			}
+
+			if e.Op == influxql.OR {
+				return lhsIDs.union(rhsIDs), nil
+			}
+
+			return lhsIDs.intersect(rhsIDs), nil
+		default:
+			return nil, fmt.Errorf("invalid operator")
+		}
+	case *influxql.ParenExpr:
+		return db.measurementsByExpr(e.Expr)
+	}
+	return nil, fmt.Errorf("%#v", expr)
+}
+
+// measurementsByTagFilters returns the measurements matching the filters on tag values.
+func (db *DatabaseIndex) measurementsByTagFilters(filters []*TagFilter) Measurements {
+	// If no filters, then return all measurements.
+	if len(filters) == 0 {
+		measurements := make(Measurements, 0, len(db.measurements))
+		for _, m := range db.measurements {
+			measurements = append(measurements, m)
+		}
+		return measurements
+	}
+
+	// Build a list of measurements matching the filters.
+	var measurements Measurements
+	var tagMatch bool
+
+	// Iterate through all measurements in the database.
+	for _, m := range db.measurements {
+		// Iterate filters seeing if the measurement has a matching tag.
+		for _, f := range filters {
+			tagVals, ok := m.seriesByTagKeyValue[f.Key]
+			if !ok {
+				continue
+			}
+
+			tagMatch = false
+
+			// If the operator is non-regex, only check the specified value.
+			if f.Op == influxql.EQ || f.Op == influxql.NEQ {
+				if _, ok := tagVals[f.Value]; ok {
+					tagMatch = true
+				}
+			} else {
+				// Else, the operator is regex and we have to check all tag
+				// values against the regular expression.
+				for tagVal := range tagVals {
+					if f.Regex.MatchString(tagVal) {
+						tagMatch = true
+						break
+					}
+				}
+			}
+
+			isEQ := (f.Op == influxql.EQ || f.Op == influxql.EQREGEX)
+
+			// tags match | operation is EQ | measurement matches
+			// --------------------------------------------------
+			//     True   |       True      |      True
+			//     True   |       False     |      False
+			//     False  |       True      |      False
+			//     False  |       False     |      True
+
+			if tagMatch == isEQ {
+				measurements = append(measurements, m)
+				break
+			}
+		}
+	}
+
+	return measurements
+}
+
+// measurementsByRegex returns the measurements that match the regex.
+func (db *DatabaseIndex) measurementsByRegex(re *regexp.Regexp) Measurements {
+	var matches Measurements
+	for _, m := range db.measurements {
+		if re.MatchString(m.Name) {
+			matches = append(matches, m)
+		}
+	}
+	return matches
+}
+
+// Measurements returns a list of all measurements.
+func (db *DatabaseIndex) Measurements() Measurements {
+	measurements := make(Measurements, 0, len(db.measurements))
+	for _, m := range db.measurements {
+		measurements = append(measurements, m)
+	}
+	return measurements
+}
+
+// DropMeasurement removes the measurement and all of its underlying series from the database index
+func (db *DatabaseIndex) DropMeasurement(name string) {
+	db.mu.Lock()
+	defer db.mu.Unlock()
+
+	m := db.measurements[name]
+	if m == nil {
+		return
+	}
+
+	delete(db.measurements, name)
+	for _, s := range m.seriesByID {
+		delete(db.series, s.Key)
+	}
+
+	var names []string
+	for _, n := range db.names {
+		if n != name {
+			names = append(names, n)
+		}
+	}
+	db.names = names
+}
+
+// DropSeries removes the series keys and their tags from the index
+func (db *DatabaseIndex) DropSeries(keys []string) {
+	db.mu.Lock()
+	defer db.mu.Unlock()
+	for _, k := range keys {
+		series := db.series[k]
+		if series == nil {
+			continue
+		}
+		series.measurement.DropSeries(series.id)
+	}
+}
+
+// Measurement represents a collection of time series in a database. It also contains in memory
+// structures for indexing tags. Exported functions are goroutine safe while un-exported functions
+// assume the caller will use the appropriate locks
+type Measurement struct {
+	mu         sync.RWMutex
+	Name       string `json:"name,omitempty"`
+	fieldNames map[string]struct{}
+	index      *DatabaseIndex
+
+	// in-memory index fields
+	series              map[string]*Series // sorted tagset string to the series object
+	seriesByID          map[uint64]*Series // lookup table for series by their id
+	measurement         *Measurement
+	seriesByTagKeyValue map[string]map[string]seriesIDs // map from tag key to value to sorted set of series ids
+	seriesIDs           seriesIDs                       // sorted list of series IDs in this measurement
+}
+
+// NewMeasurement allocates and initializes a new Measurement.
+func NewMeasurement(name string, idx *DatabaseIndex) *Measurement {
+	return &Measurement{
+		Name:       name,
+		fieldNames: make(map[string]struct{}),
+		index:      idx,
+
+		series:              make(map[string]*Series),
+		seriesByID:          make(map[uint64]*Series),
+		seriesByTagKeyValue: make(map[string]map[string]seriesIDs),
+		seriesIDs:           make(seriesIDs, 0),
+	}
+}
+
+// HasField returns true if the measurement has a field by the given name
+func (m *Measurement) HasField(name string) bool {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	_, hasField := m.fieldNames[name]
+	return hasField
+}
+
+// SeriesKeys returns the keys of every series in this measurement
+func (m *Measurement) SeriesKeys() []string {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	var keys []string
+	for _, s := range m.seriesByID {
+		keys = append(keys, s.Key)
+	}
+	return keys
+}
+
+// ValidateGroupBy ensures that the GROUP BY is not a field.
+func (m *Measurement) ValidateGroupBy(stmt *influxql.SelectStatement) error {
+	for _, d := range stmt.Dimensions {
+		switch e := d.Expr.(type) {
+		case *influxql.VarRef:
+			if !m.HasTagKey(e.Val) {
+				return fmt.Errorf("can not use field in GROUP BY clause: %s", e.Val)
+			}
+		}
+	}
+	return nil
+}
+
+// HasTagKey returns true if at least one series in this measurement has written a value for the passed in tag key
+func (m *Measurement) HasTagKey(k string) bool {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	_, hasTag := m.seriesByTagKeyValue[k]
+	return hasTag
+}
+
+// HasSeries returns true if there is at least 1 series under this measurement
+func (m *Measurement) HasSeries() bool {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	return len(m.seriesByID) > 0
+}
+
+// AddSeries will add a series to the measurementIndex. Returns false if already present
+func (m *Measurement) AddSeries(s *Series) bool {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	if _, ok := m.seriesByID[s.id]; ok {
+		return false
+	}
+	m.seriesByID[s.id] = s
+	tagset := string(marshalTags(s.Tags))
+	m.series[tagset] = s
+	m.seriesIDs = append(m.seriesIDs, s.id)
+
+	// the series ID should always be higher than all others because it's a new
+	// series. So don't do the sort if we don't have to.
+	if len(m.seriesIDs) > 1 && m.seriesIDs[len(m.seriesIDs)-1] < m.seriesIDs[len(m.seriesIDs)-2] {
+		sort.Sort(m.seriesIDs)
+	}
+
+	// add this series id to the tag index on the measurement
+	for k, v := range s.Tags {
+		valueMap := m.seriesByTagKeyValue[k]
+		if valueMap == nil {
+			valueMap = make(map[string]seriesIDs)
+			m.seriesByTagKeyValue[k] = valueMap
+		}
+		ids := valueMap[v]
+		ids = append(ids, s.id)
+
+		// most of the time the series ID will be higher than all others because it's a new
+		// series. So don't do the sort if we don't have to.
+		if len(ids) > 1 && ids[len(ids)-1] < ids[len(ids)-2] {
+			sort.Sort(ids)
+		}
+		valueMap[v] = ids
+	}
+
+	return true
+}
+
+// DropSeries will remove a series from the measurementIndex.
+func (m *Measurement) DropSeries(seriesID uint64) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	if _, ok := m.seriesByID[seriesID]; !ok {
+		return
+	}
+	s := m.seriesByID[seriesID]
+	tagset := string(marshalTags(s.Tags))
+
+	delete(m.series, tagset)
+	delete(m.seriesByID, seriesID)
+
+	var ids []uint64
+	for _, id := range m.seriesIDs {
+		if id != seriesID {
+			ids = append(ids, id)
+		}
+	}
+	m.seriesIDs = ids
+
+	// remove this series id to the tag index on the measurement
+	// s.seriesByTagKeyValue is defined as map[string]map[string]seriesIDs
+	for k, v := range m.seriesByTagKeyValue {
+		values := v
+		for kk, vv := range values {
+			var ids []uint64
+			for _, id := range vv {
+				if id != seriesID {
+					ids = append(ids, id)
+				}
+			}
+			// Check to see if we have any ids, if not, remove the key
+			if len(ids) == 0 {
+				delete(values, kk)
+			} else {
+				values[kk] = ids
+			}
+		}
+		// If we have no values, then we delete the key
+		if len(values) == 0 {
+			delete(m.seriesByTagKeyValue, k)
+		} else {
+			m.seriesByTagKeyValue[k] = values
+		}
+	}
+
+	return
+}
+
+// filters walks the where clause of a select statement and returns a map with all series ids
+// matching the where clause and any filter expression that should be applied to each
+func (m *Measurement) filters(stmt *influxql.SelectStatement) (map[uint64]influxql.Expr, error) {
+	if stmt.Condition == nil || stmt.OnlyTimeDimensions() {
+		seriesIdsToExpr := make(map[uint64]influxql.Expr)
+		for _, id := range m.seriesIDs {
+			seriesIdsToExpr[id] = nil
+		}
+		return seriesIdsToExpr, nil
+	}
+
+	ids, seriesIdsToExpr, err := m.walkWhereForSeriesIds(stmt.Condition)
+	if err != nil {
+		return nil, err
+	}
+	// Ensure every id is in the map and replace literal true expressions with
+	// nil so the engine doesn't waste time evaluating them.
+	for _, id := range ids {
+		if expr, ok := seriesIdsToExpr[id]; !ok {
+			seriesIdsToExpr[id] = nil
+		} else if b, ok := expr.(*influxql.BooleanLiteral); ok && b.Val {
+			seriesIdsToExpr[id] = nil
+		}
+	}
+	return seriesIdsToExpr, nil
+}
+
+// tagSets returns the unique tag sets that exist for the given tag keys. This is used to determine
+// what composite series will be created by a group by. i.e. "group by region" should return:
+// {"region":"uswest"}, {"region":"useast"}
+// or region, service returns
+// {"region": "uswest", "service": "redis"}, {"region": "uswest", "service": "mysql"}, etc...
+// This will also populate the TagSet objects with the series IDs that match each tagset and any
+// influx filter expression that goes with the series
+// TODO: this shouldn't be exported. However, until tx.go and the engine get refactored into tsdb, we need it.
+func (m *Measurement) TagSets(stmt *influxql.SelectStatement, dimensions []string) ([]*influxql.TagSet, error) {
+	m.index.mu.RLock()
+	defer m.index.mu.RUnlock()
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+
+	// get the unique set of series ids and the filters that should be applied to each
+	filters, err := m.filters(stmt)
+	if err != nil {
+		return nil, err
+	}
+
+	// For every series, get the tag values for the requested tag keys i.e. dimensions. This is the
+	// TagSet for that series. Series with the same TagSet are then grouped together, because for the
+	// purpose of GROUP BY they are part of the same composite series.
+	tagSets := make(map[string]*influxql.TagSet)
+	for id, filter := range filters {
+		s := m.seriesByID[id]
+		tags := make(map[string]string)
+
+		// Build the TagSet for this series.
+		for _, dim := range dimensions {
+			tags[dim] = s.Tags[dim]
+		}
+
+		// Convert the TagSet to a string, so it can be added to a map allowing TagSets to be handled
+		// as a set.
+		tagsAsKey := string(marshalTags(tags))
+		tagSet, ok := tagSets[tagsAsKey]
+		if !ok {
+			// This TagSet is new, create a new entry for it.
+			tagSet = &influxql.TagSet{}
+			tagsForSet := make(map[string]string)
+			for k, v := range tags {
+				tagsForSet[k] = v
+			}
+			tagSet.Tags = tagsForSet
+			tagSet.Key = marshalTags(tagsForSet)
+		}
+
+		// Associate the series and filter with the Tagset.
+		tagSet.AddFilter(m.seriesByID[id].Key, filter)
+
+		// Ensure it's back in the map.
+		tagSets[tagsAsKey] = tagSet
+	}
+
+	// The TagSets have been created, as a map of TagSets. Just send
+	// the values back as a slice, sorting for consistency.
+	sortedTagSetKeys := make([]string, 0, len(tagSets))
+	for k, _ := range tagSets {
+		sortedTagSetKeys = append(sortedTagSetKeys, k)
+	}
+	sort.Strings(sortedTagSetKeys)
+
+	sortedTagsSets := make([]*influxql.TagSet, 0, len(sortedTagSetKeys))
+	for _, k := range sortedTagSetKeys {
+		sortedTagsSets = append(sortedTagsSets, tagSets[k])
+	}
+
+	return sortedTagsSets, nil
+}
+
+// mergeSeriesFilters merges two sets of filter expressions and culls series IDs.
+func mergeSeriesFilters(op influxql.Token, ids seriesIDs, lfilters, rfilters map[uint64]influxql.Expr) (seriesIDs, map[uint64]influxql.Expr) {
+	// Create a map to hold the final set of series filter expressions.
+	filters := make(map[uint64]influxql.Expr, 0)
+	// Resulting list of series IDs
+	var series seriesIDs
+
+	// Combining logic:
+	// +==========+==========+==========+=======================+=======================+
+	// | operator |   LHS    |   RHS    |   intermediate expr   |     reduced filter    |
+	// +==========+==========+==========+=======================+=======================+
+	// |          | <nil>    | <r-expr> | true OR <r-expr>      | true                  |
+	// |          |----------+----------+-----------------------+-----------------------+
+	// | OR       | <l-expr> | <nil>    | <l-expr> OR true      | true                  |
+	// |          |----------+----------+-----------------------+-----------------------+
+	// |          | <nil>    | <nil>    | true OR true          | true                  |
+	// |          |----------+----------+-----------------------+-----------------------+
+	// |          | <l-expr> | <r-expr> | <l-expr> OR <r-expr>  | <l-expr> OR <r-expr>  |
+	// +----------+----------+----------+-----------------------+-----------------------+
+	// |          | <nil>    | <r-expr> | false AND <r-expr>    | false*                |
+	// |          |----------+----------+-----------------------+-----------------------+
+	// | AND      | <l-expr> | <nil>    | <l-expr> AND false    | false                 |
+	// |          |----------+----------+-----------------------+-----------------------+
+	// |          | <nil>    | <nil>    | false AND false       | false                 |
+	// |          |----------+----------+-----------------------+-----------------------+
+	// |          | <l-expr> | <r-expr> | <l-expr> AND <r-expr> | <l-expr> AND <r-expr> |
+	// +----------+----------+----------+-----------------------+-----------------------+
+	// *literal false filters and series IDs should be excluded from the results
+
+	def := false
+	if op == influxql.OR {
+		def = true
+	}
+
+	for _, id := range ids {
+		// Get LHS and RHS filter expressions for this series ID.
+		lfilter, rfilter := lfilters[id], rfilters[id]
+
+		// Set default filters if either LHS or RHS expressions were nil.
+		if lfilter == nil {
+			lfilter = &influxql.BooleanLiteral{Val: def}
+		}
+		if rfilter == nil {
+			rfilter = &influxql.BooleanLiteral{Val: def}
+		}
+
+		// Create the intermediate filter expression for this series ID.
+		be := &influxql.BinaryExpr{
+			Op:  op,
+			LHS: lfilter,
+			RHS: rfilter,
+		}
+
+		// Reduce the intermediate expression.
+		expr := influxql.Reduce(be, nil)
+
+		// If the expression reduced to false, exclude this series ID and filter.
+		if b, ok := expr.(*influxql.BooleanLiteral); ok && !b.Val {
+			continue
+		}
+
+		// Store the series ID and merged filter in the final results.
+		filters[id] = expr
+		series = append(series, id)
+	}
+	return series, filters
+}
+
+// idsForExpr will return a collection of series ids and a filter expression that should
+// be used to filter points from those series.
+func (m *Measurement) idsForExpr(n *influxql.BinaryExpr) (seriesIDs, influxql.Expr, error) {
+	name, ok := n.LHS.(*influxql.VarRef)
+	value := n.RHS
+	if !ok {
+		name, ok = n.RHS.(*influxql.VarRef)
+		if !ok {
+			return nil, nil, fmt.Errorf("invalid expression: %s", n.String())
+		}
+		value = n.LHS
+	}
+
+	// For time literals, return all series IDs and "true" as the filter.
+	if _, ok := value.(*influxql.TimeLiteral); ok || name.Val == "time" {
+		return m.seriesIDs, &influxql.BooleanLiteral{Val: true}, nil
+	}
+
+	// For fields, return all series IDs from this measurement and return
+	// the expression passed in, as the filter.
+	if m.HasField(name.Val) {
+		return m.seriesIDs, n, nil
+	}
+
+	tagVals, ok := m.seriesByTagKeyValue[name.Val]
+	if !ok {
+		return nil, nil, nil
+	}
+
+	// if we're looking for series with a specific tag value
+	if str, ok := value.(*influxql.StringLiteral); ok {
+		var ids seriesIDs
+
+		if n.Op == influxql.EQ {
+			// return series that have a tag of specific value.
+			ids = tagVals[str.Val]
+		} else if n.Op == influxql.NEQ {
+			ids = m.seriesIDs.reject(tagVals[str.Val])
+		}
+		return ids, &influxql.BooleanLiteral{Val: true}, nil
+	}
+
+	// if we're looking for series with a tag value that matches a regex
+	if re, ok := value.(*influxql.RegexLiteral); ok {
+		var ids seriesIDs
+
+		// The operation is a NEQREGEX, code must start by assuming all match, even
+		// series without any tags.
+		if n.Op == influxql.NEQREGEX {
+			ids = m.seriesIDs
+		}
+
+		for k := range tagVals {
+			match := re.Val.MatchString(k)
+
+			if match && n.Op == influxql.EQREGEX {
+				ids = ids.union(tagVals[k])
+			} else if match && n.Op == influxql.NEQREGEX {
+				ids = ids.reject(tagVals[k])
+			}
+		}
+		return ids, &influxql.BooleanLiteral{Val: true}, nil
+	}
+
+	return nil, nil, nil
+}
+
+// walkWhereForSeriesIds recursively walks the WHERE clause and returns an ordered set of series IDs and
+// a map from those series IDs to filter expressions that should be used to limit points returned in
+// the final query result.
+func (m *Measurement) walkWhereForSeriesIds(expr influxql.Expr) (seriesIDs, map[uint64]influxql.Expr, error) {
+	switch n := expr.(type) {
+	case *influxql.BinaryExpr:
+		switch n.Op {
+		case influxql.EQ, influxql.NEQ, influxql.LT, influxql.LTE, influxql.GT, influxql.GTE, influxql.EQREGEX, influxql.NEQREGEX:
+			// Get the series IDs and filter expression for the tag or field comparison.
+			ids, expr, err := m.idsForExpr(n)
+			if err != nil {
+				return nil, nil, err
+			}
+
+			filters := map[uint64]influxql.Expr{}
+			for _, id := range ids {
+				filters[id] = expr
+			}
+
+			return ids, filters, nil
+		case influxql.AND, influxql.OR:
+			// Get the series IDs and filter expressions for the LHS.
+			lids, lfilters, err := m.walkWhereForSeriesIds(n.LHS)
+			if err != nil {
+				return nil, nil, err
+			}
+
+			// Get the series IDs and filter expressions for the RHS.
+			rids, rfilters, err := m.walkWhereForSeriesIds(n.RHS)
+			if err != nil {
+				return nil, nil, err
+			}
+
+			// Combine the series IDs from the LHS and RHS.
+			var ids seriesIDs
+			switch n.Op {
+			case influxql.AND:
+				ids = lids.intersect(rids)
+			case influxql.OR:
+				ids = lids.union(rids)
+			}
+
+			// Merge the filter expressions for the LHS and RHS.
+			ids, filters := mergeSeriesFilters(n.Op, ids, lfilters, rfilters)
+
+			return ids, filters, nil
+		}
+
+		ids, _, err := m.idsForExpr(n)
+		return ids, nil, err
+	case *influxql.ParenExpr:
+		// walk down the tree
+		return m.walkWhereForSeriesIds(n.Expr)
+	default:
+		return nil, nil, nil
+	}
+}
+
+// expandExpr returns a list of expressions expanded by all possible tag combinations.
+func (m *Measurement) expandExpr(expr influxql.Expr) []tagSetExpr {
+	// Retrieve list of unique values for each tag.
+	valuesByTagKey := m.uniqueTagValues(expr)
+
+	// Convert keys to slices.
+	keys := make([]string, 0, len(valuesByTagKey))
+	for key := range valuesByTagKey {
+		keys = append(keys, key)
+	}
+	sort.Strings(keys)
+
+	// Order uniques by key.
+	uniques := make([][]string, len(keys))
+	for i, key := range keys {
+		uniques[i] = valuesByTagKey[key]
+	}
+
+	// Reduce a condition for each combination of tag values.
+	return expandExprWithValues(expr, keys, []tagExpr{}, uniques, 0)
+}
+
+func expandExprWithValues(expr influxql.Expr, keys []string, tagExprs []tagExpr, uniques [][]string, index int) []tagSetExpr {
+	// If we have no more keys left then execute the reduction and return.
+	if index == len(keys) {
+		// Create a map of tag key/values.
+		m := make(map[string]*string, len(keys))
+		for i, key := range keys {
+			if tagExprs[i].op == influxql.EQ {
+				m[key] = &tagExprs[i].values[0]
+			} else {
+				m[key] = nil
+			}
+		}
+
+		// TODO: Rewrite full expressions instead of VarRef replacement.
+
+		// Reduce using the current tag key/value set.
+		// Ignore it if reduces down to "false".
+		e := influxql.Reduce(expr, &tagValuer{tags: m})
+		if e, ok := e.(*influxql.BooleanLiteral); ok && e.Val == false {
+			return nil
+		}
+
+		return []tagSetExpr{{values: copyTagExprs(tagExprs), expr: e}}
+	}
+
+	// Otherwise expand for each possible equality value of the key.
+	var exprs []tagSetExpr
+	for _, v := range uniques[index] {
+		exprs = append(exprs, expandExprWithValues(expr, keys, append(tagExprs, tagExpr{keys[index], []string{v}, influxql.EQ}), uniques, index+1)...)
+	}
+	exprs = append(exprs, expandExprWithValues(expr, keys, append(tagExprs, tagExpr{keys[index], uniques[index], influxql.NEQ}), uniques, index+1)...)
+
+	return exprs
+}
+
+// seriesIDsAllOrByExpr walks an expressions for matching series IDs
+// or, if no expressions is given, returns all series IDs for the measurement.
+func (m *Measurement) seriesIDsAllOrByExpr(expr influxql.Expr) (seriesIDs, error) {
+	// If no expression given or the measurement has no series,
+	// we can take just return the ids or nil accordingly.
+	if expr == nil {
+		return m.seriesIDs, nil
+	} else if len(m.seriesIDs) == 0 {
+		return nil, nil
+	}
+
+	// Get series IDs that match the WHERE clause.
+	ids, _, err := m.walkWhereForSeriesIds(expr)
+	if err != nil {
+		return nil, err
+	}
+
+	return ids, nil
+}
+
+// tagValuer is used during expression expansion to evaluate all sets of tag values.
+type tagValuer struct {
+	tags map[string]*string
+}
+
+// Value returns the string value of a tag and true if it's listed in the tagset.
+func (v *tagValuer) Value(name string) (interface{}, bool) {
+	if value, ok := v.tags[name]; ok {
+		if value == nil {
+			return nil, true
+		}
+		return *value, true
+	}
+	return nil, false
+}
+
+// tagSetExpr represents a set of tag keys/values and associated expression.
+type tagSetExpr struct {
+	values []tagExpr
+	expr   influxql.Expr
+}
+
+// tagExpr represents one or more values assigned to a given tag.
+type tagExpr struct {
+	key    string
+	values []string
+	op     influxql.Token // EQ or NEQ
+}
+
+func copyTagExprs(a []tagExpr) []tagExpr {
+	other := make([]tagExpr, len(a))
+	copy(other, a)
+	return other
+}
+
+// uniqueTagValues returns a list of unique tag values used in an expression.
+func (m *Measurement) uniqueTagValues(expr influxql.Expr) map[string][]string {
+	// Track unique value per tag.
+	tags := make(map[string]map[string]struct{})
+
+	// Find all tag values referenced in the expression.
+	influxql.WalkFunc(expr, func(n influxql.Node) {
+		switch n := n.(type) {
+		case *influxql.BinaryExpr:
+			// Ignore operators that are not equality.
+			if n.Op != influxql.EQ {
+				return
+			}
+
+			// Extract ref and string literal.
+			var key, value string
+			switch lhs := n.LHS.(type) {
+			case *influxql.VarRef:
+				if rhs, ok := n.RHS.(*influxql.StringLiteral); ok {
+					key, value = lhs.Val, rhs.Val
+				}
+			case *influxql.StringLiteral:
+				if rhs, ok := n.RHS.(*influxql.VarRef); ok {
+					key, value = rhs.Val, lhs.Val
+				}
+			}
+			if key == "" {
+				return
+			}
+
+			// Add value to set.
+			if tags[key] == nil {
+				tags[key] = make(map[string]struct{})
+			}
+			tags[key][value] = struct{}{}
+		}
+	})
+
+	// Convert to map of slices.
+	out := make(map[string][]string)
+	for k, values := range tags {
+		out[k] = make([]string, 0, len(values))
+		for v := range values {
+			out[k] = append(out[k], v)
+		}
+		sort.Strings(out[k])
+	}
+	return out
+}
+
+// Measurements represents a list of *Measurement.
+type Measurements []*Measurement
+
+func (a Measurements) Len() int           { return len(a) }
+func (a Measurements) Less(i, j int) bool { return a[i].Name < a[j].Name }
+func (a Measurements) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+
+func (a Measurements) intersect(other Measurements) Measurements {
+	l := a
+	r := other
+
+	// we want to iterate through the shortest one and stop
+	if len(other) < len(a) {
+		l = other
+		r = a
+	}
+
+	// they're in sorted order so advance the counter as needed.
+	// That is, don't run comparisons against lower values that we've already passed
+	var i, j int
+
+	result := make(Measurements, 0, len(l))
+	for i < len(l) && j < len(r) {
+		if l[i].Name == r[j].Name {
+			result = append(result, l[i])
+			i++
+			j++
+		} else if l[i].Name < r[j].Name {
+			i++
+		} else {
+			j++
+		}
+	}
+
+	return result
+}
+
+func (a Measurements) union(other Measurements) Measurements {
+	result := make(Measurements, 0, len(a)+len(other))
+	var i, j int
+	for i < len(a) && j < len(other) {
+		if a[i].Name == other[j].Name {
+			result = append(result, a[i])
+			i++
+			j++
+		} else if a[i].Name < other[j].Name {
+			result = append(result, a[i])
+			i++
+		} else {
+			result = append(result, other[j])
+			j++
+		}
+	}
+
+	// now append the remainder
+	if i < len(a) {
+		result = append(result, a[i:]...)
+	} else if j < len(other) {
+		result = append(result, other[j:]...)
+	}
+
+	return result
+}
+
+// Series belong to a Measurement and represent unique time series in a database
+type Series struct {
+	Key  string
+	Tags map[string]string
+
+	id          uint64
+	measurement *Measurement
+}
+
+// MarshalBinary encodes the object to a binary format.
+func (s *Series) MarshalBinary() ([]byte, error) {
+	var pb internal.Series
+	pb.Key = &s.Key
+	for k, v := range s.Tags {
+		key := k
+		value := v
+		pb.Tags = append(pb.Tags, &internal.Tag{Key: &key, Value: &value})
+	}
+	return proto.Marshal(&pb)
+}
+
+// UnmarshalBinary decodes the object from a binary format.
+func (s *Series) UnmarshalBinary(buf []byte) error {
+	var pb internal.Series
+	if err := proto.Unmarshal(buf, &pb); err != nil {
+		return err
+	}
+	s.Key = pb.GetKey()
+	s.Tags = make(map[string]string)
+	for _, t := range pb.Tags {
+		s.Tags[t.GetKey()] = t.GetValue()
+	}
+	return nil
+}
+
+// match returns true if all tags match the series' tags.
+func (s *Series) match(tags map[string]string) bool {
+	for k, v := range tags {
+		if s.Tags[k] != v {
+			return false
+		}
+	}
+	return true
+}
+
+// seriesIDs is a convenience type for sorting, checking equality, and doing
+// union and intersection of collections of series ids.
+type seriesIDs []uint64
+
+func (a seriesIDs) Len() int           { return len(a) }
+func (a seriesIDs) Less(i, j int) bool { return a[i] < a[j] }
+func (a seriesIDs) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+
+// equals assumes that both are sorted.
+func (a seriesIDs) equals(other seriesIDs) bool {
+	if len(a) != len(other) {
+		return false
+	}
+	for i, s := range other {
+		if a[i] != s {
+			return false
+		}
+	}
+	return true
+}
+
+// intersect returns a new collection of series ids in sorted order that is the intersection of the two.
+// The two collections must already be sorted.
+func (a seriesIDs) intersect(other seriesIDs) seriesIDs {
+	l := a
+	r := other
+
+	// we want to iterate through the shortest one and stop
+	if len(other) < len(a) {
+		l = other
+		r = a
+	}
+
+	// they're in sorted order so advance the counter as needed.
+	// That is, don't run comparisons against lower values that we've already passed
+	var i, j int
+
+	ids := make([]uint64, 0, len(l))
+	for i < len(l) && j < len(r) {
+		if l[i] == r[j] {
+			ids = append(ids, l[i])
+			i++
+			j++
+		} else if l[i] < r[j] {
+			i++
+		} else {
+			j++
+		}
+	}
+
+	return seriesIDs(ids)
+}
+
+// union returns a new collection of series ids in sorted order that is the union of the two.
+// The two collections must already be sorted.
+func (a seriesIDs) union(other seriesIDs) seriesIDs {
+	l := a
+	r := other
+	ids := make([]uint64, 0, len(l)+len(r))
+	var i, j int
+	for i < len(l) && j < len(r) {
+		if l[i] == r[j] {
+			ids = append(ids, l[i])
+			i++
+			j++
+		} else if l[i] < r[j] {
+			ids = append(ids, l[i])
+			i++
+		} else {
+			ids = append(ids, r[j])
+			j++
+		}
+	}
+
+	// now append the remainder
+	if i < len(l) {
+		ids = append(ids, l[i:]...)
+	} else if j < len(r) {
+		ids = append(ids, r[j:]...)
+	}
+
+	return ids
+}
+
+// reject returns a new collection of series ids in sorted order with the passed in set removed from the original.
+// This is useful for the NOT operator. The two collections must already be sorted.
+func (a seriesIDs) reject(other seriesIDs) seriesIDs {
+	l := a
+	r := other
+	var i, j int
+
+	ids := make([]uint64, 0, len(l))
+	for i < len(l) && j < len(r) {
+		if l[i] == r[j] {
+			i++
+			j++
+		} else if l[i] < r[j] {
+			ids = append(ids, l[i])
+			i++
+		} else {
+			j++
+		}
+	}
+
+	// Append the remainder
+	if i < len(l) {
+		ids = append(ids, l[i:]...)
+	}
+
+	return seriesIDs(ids)
+}
+
+// TagFilter represents a tag filter when looking up other tags or measurements.
+type TagFilter struct {
+	Op    influxql.Token
+	Key   string
+	Value string
+	Regex *regexp.Regexp
+}
+
+// used to convert the tag set to bytes for use as a lookup key
+func marshalTags(tags map[string]string) []byte {
+	// Empty maps marshal to empty bytes.
+	if len(tags) == 0 {
+		return nil
+	}
+
+	// Extract keys and determine final size.
+	sz := (len(tags) * 2) - 1 // separators
+	keys := make([]string, 0, len(tags))
+	for k, v := range tags {
+		keys = append(keys, k)
+		sz += len(k) + len(v)
+	}
+	sort.Strings(keys)
+
+	// Generate marshaled bytes.
+	b := make([]byte, sz)
+	buf := b
+	for _, k := range keys {
+		copy(buf, k)
+		buf[len(k)] = '|'
+		buf = buf[len(k)+1:]
+	}
+	for i, k := range keys {
+		v := tags[k]
+		copy(buf, v)
+		if i < len(keys)-1 {
+			buf[len(v)] = '|'
+			buf = buf[len(v)+1:]
+		}
+	}
+	return b
+}
+
+// timeBetweenInclusive returns true if t is between min and max, inclusive.
+func timeBetweenInclusive(t, min, max time.Time) bool {
+	return (t.Equal(min) || t.After(min)) && (t.Equal(max) || t.Before(max))
+}
+
+// TagKeys returns a list of the measurement's tag names.
+func (m *Measurement) TagKeys() []string {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	keys := make([]string, 0, len(m.seriesByTagKeyValue))
+	for k := range m.seriesByTagKeyValue {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+	return keys
+}
+
+// FieldNames returns a list of the measurement's field names
+func (m *Measurement) FieldNames() (a []string) {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+
+	for n, _ := range m.fieldNames {
+		a = append(a, n)
+	}
+	return
+}
+
+func (m *Measurement) tagValuesByKeyAndSeriesID(tagKeys []string, ids seriesIDs) map[string]stringSet {
+	// If no tag keys were passed, get all tag keys for the measurement.
+	if len(tagKeys) == 0 {
+		for k := range m.seriesByTagKeyValue {
+			tagKeys = append(tagKeys, k)
+		}
+	}
+
+	// Mapping between tag keys to all existing tag values.
+	tagValues := make(map[string]stringSet, 0)
+
+	// Iterate all series to collect tag values.
+	for _, id := range ids {
+		s, ok := m.seriesByID[id]
+		if !ok {
+			continue
+		}
+
+		// Iterate the tag keys we're interested in and collect values
+		// from this series, if they exist.
+		for _, tagKey := range tagKeys {
+			if tagVal, ok := s.Tags[tagKey]; ok {
+				if _, ok = tagValues[tagKey]; !ok {
+					tagValues[tagKey] = newStringSet()
+				}
+				tagValues[tagKey].add(tagVal)
+			}
+		}
+	}
+
+	return tagValues
+}
+
+// stringSet represents a set of strings.
+type stringSet map[string]struct{}
+
+// newStringSet returns an empty stringSet.
+func newStringSet() stringSet {
+	return make(map[string]struct{})
+}
+
+// add adds a string to the set.
+func (s stringSet) add(ss string) {
+	s[ss] = struct{}{}
+}
+
+// contains returns whether the set contains the given string.
+func (s stringSet) contains(ss string) bool {
+	_, ok := s[ss]
+	return ok
+}
+
+// list returns the current elements in the set, in sorted order.
+func (s stringSet) list() []string {
+	l := make([]string, 0, len(s))
+	for k := range s {
+		l = append(l, k)
+	}
+	sort.Strings(l)
+	return l
+}
+
+// union returns the union of this set and another.
+func (s stringSet) union(o stringSet) stringSet {
+	ns := newStringSet()
+	for k := range s {
+		ns[k] = struct{}{}
+	}
+	for k := range o {
+		ns[k] = struct{}{}
+	}
+	return ns
+}
+
+// union returns the intersection of this set and another.
+func (s stringSet) intersect(o stringSet) stringSet {
+	ns := newStringSet()
+	for k := range s {
+		if _, ok := o[k]; ok {
+			ns[k] = struct{}{}
+		}
+	}
+	for k := range o {
+		if _, ok := s[k]; ok {
+			ns[k] = struct{}{}
+		}
+	}
+	return ns
+}
+
+func measurementFromSeriesKey(key string) string {
+	idx := strings.Index(key, ",")
+	if idx == -1 {
+		return key
+	}
+	return key[:strings.Index(key, ",")]
+}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/monitor.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/monitor.go
new file mode 100644
index 00000000000..d0015ec1654
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/monitor.go
@@ -0,0 +1,83 @@
+package tsdb
+
+// Monitor represents a TSDB monitoring service.
+type Monitor struct {
+	Store interface{}
+}
+
+func (m *Monitor) Open() error  { return nil }
+func (m *Monitor) Close() error { return nil }
+
+// StartSelfMonitoring starts a goroutine which monitors the InfluxDB server
+// itself and stores the results in the specified database at a given interval.
+/*
+func (s *Server) StartSelfMonitoring(database, retention string, interval time.Duration) error {
+		if interval == 0 {
+			return fmt.Errorf("statistics check interval must be non-zero")
+		}
+
+		go func() {
+			tick := time.NewTicker(interval)
+			for {
+				<-tick.C
+
+				// Create the batch and tags
+				tags := map[string]string{"serverID": strconv.FormatUint(s.ID(), 10)}
+				if h, err := os.Hostname(); err == nil {
+					tags["host"] = h
+				}
+				batch := pointsFromStats(s.stats, tags)
+
+				// Shard-level stats.
+				tags["shardID"] = strconv.FormatUint(s.id, 10)
+				s.mu.RLock()
+				for _, sh := range s.shards {
+					if !sh.HasDataNodeID(s.id) {
+						// No stats for non-local shards.
+						continue
+					}
+					batch = append(batch, pointsFromStats(sh.stats, tags)...)
+				}
+				s.mu.RUnlock()
+
+				// Server diagnostics.
+				for _, row := range s.DiagnosticsAsRows() {
+					points, err := s.convertRowToPoints(row.Name, row)
+					if err != nil {
+						s.Logger.Printf("failed to write diagnostic row for %s: %s", row.Name, err.Error())
+						continue
+					}
+					for _, p := range points {
+						p.AddTag("serverID", strconv.FormatUint(s.ID(), 10))
+					}
+					batch = append(batch, points...)
+				}
+
+				s.WriteSeries(database, retention, batch)
+			}
+		}()
+	return nil
+}
+
+// Function for local use turns stats into a slice of points
+func pointsFromStats(st *Stats, tags map[string]string) []tsdb.Point {
+	var points []tsdb.Point
+	now := time.Now()
+	st.Walk(func(k string, v int64) {
+		point := tsdb.NewPoint(
+			st.name+"_"+k,
+			make(map[string]string),
+			map[string]interface{}{"value": int(v)},
+			now,
+		)
+		// Specifically create a new map.
+		for k, v := range tags {
+			tags[k] = v
+			point.AddTag(k, v)
+		}
+		points = append(points, point)
+	})
+
+	return points
+}
+*/
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/points.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/points.go
new file mode 100644
index 00000000000..26cec3ed5b5
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/points.go
@@ -0,0 +1,1135 @@
+package tsdb
+
+import (
+	"bytes"
+	"fmt"
+	"hash/fnv"
+	"math"
+	"regexp"
+	"sort"
+	"strconv"
+	"strings"
+	"time"
+)
+
+// Point defines the values that will be written to the database
+type Point interface {
+	Name() string
+	SetName(string)
+
+	Tags() Tags
+	AddTag(key, value string)
+	SetTags(tags Tags)
+
+	Fields() Fields
+	AddField(name string, value interface{})
+
+	Time() time.Time
+	SetTime(t time.Time)
+	UnixNano() int64
+
+	HashID() uint64
+	Key() []byte
+
+	Data() []byte
+	SetData(buf []byte)
+
+	String() string
+}
+
+// point is the default implementation of Point.
+type point struct {
+	time time.Time
+
+	// text encoding of measurement and tags
+	// key must always be stored sorted by tags, if the original line was not sorted,
+	// we need to resort it
+	key []byte
+
+	// text encoding of field data
+	fields []byte
+
+	// text encoding of timestamp
+	ts []byte
+
+	// binary encoded field data
+	data []byte
+}
+
+const (
+	// the number of characters for the largest possible int64 (9223372036854775807)
+	maxInt64Digits = 19
+
+	// the number of characters for the smallest possible int64 (-9223372036854775808)
+	minInt64Digits = 20
+
+	// the number of characters required for the largest float64 before a range check
+	// would occur during parsing
+	maxFloat64Digits = 25
+
+	// the number of characters required for smallest float64 before a range check occur
+	// would occur during parsing
+	minFloat64Digits = 27
+)
+
+var (
+	// Compile the regex that detects unquoted double quote sequences
+	quoteReplacer = regexp.MustCompile(`([^\\])"`)
+
+	escapeCodes = map[byte][]byte{
+		',': []byte(`\,`),
+		'"': []byte(`\"`),
+		' ': []byte(`\ `),
+		'=': []byte(`\=`),
+	}
+
+	escapeCodesStr = map[string]string{}
+)
+
+func init() {
+	for k, v := range escapeCodes {
+		escapeCodesStr[string(k)] = string(v)
+	}
+}
+
+func ParsePointsString(buf string) ([]Point, error) {
+	return ParsePoints([]byte(buf))
+}
+
+// ParsePoints returns a slice of Points from a text representation of a point
+// with each point separated by newlines.
+func ParsePoints(buf []byte) ([]Point, error) {
+	return ParsePointsWithPrecision(buf, time.Now().UTC(), "n")
+}
+
+func ParsePointsWithPrecision(buf []byte, defaultTime time.Time, precision string) ([]Point, error) {
+	points := []Point{}
+	var (
+		pos   int
+		block []byte
+	)
+	for {
+		pos, block = scanTo(buf, pos, '\n')
+		pos += 1
+
+		if len(block) == 0 {
+			break
+		}
+
+		// lines which start with '#' are comments
+		if start := skipWhitespace(block, 0); block[start] == '#' {
+			continue
+		}
+
+		pt, err := parsePoint(block, defaultTime, precision)
+		if err != nil {
+			return nil, fmt.Errorf("unable to parse '%s': %v", string(block), err)
+		}
+		points = append(points, pt)
+
+		if pos >= len(buf) {
+			break
+		}
+
+	}
+	return points, nil
+
+}
+
+func parsePoint(buf []byte, defaultTime time.Time, precision string) (Point, error) {
+	// scan the first block which is measurement[,tag1=value1,tag2=value=2...]
+	pos, key, err := scanKey(buf, 0)
+	if err != nil {
+		return nil, err
+	}
+
+	// measurement name is required
+	if len(key) == 0 {
+		return nil, fmt.Errorf("missing measurement")
+	}
+
+	// scan the second block is which is field1=value1[,field2=value2,...]
+	pos, fields, err := scanFields(buf, pos)
+	if err != nil {
+		return nil, err
+	}
+
+	// at least one field is required
+	if len(fields) == 0 {
+		return nil, fmt.Errorf("missing fields")
+	}
+
+	// scan the last block which is an optional integer timestamp
+	pos, ts, err := scanTime(buf, pos)
+
+	if err != nil {
+		return nil, err
+	}
+
+	pt := &point{
+		key:    key,
+		fields: fields,
+		ts:     ts,
+	}
+
+	if len(ts) == 0 {
+		pt.time = defaultTime
+		pt.SetPrecision(precision)
+	} else {
+		ts, err := strconv.ParseInt(string(ts), 10, 64)
+		if err != nil {
+			return nil, err
+		}
+		pt.time = time.Unix(0, ts*pt.GetPrecisionMultiplier(precision))
+	}
+	return pt, nil
+}
+
+// scanKey scans buf starting at i for the measurement and tag portion of the point.
+// It returns the ending position and the byte slice of key within buf.  If there
+// are tags, they will be sorted if they are not already.
+func scanKey(buf []byte, i int) (int, []byte, error) {
+	start := skipWhitespace(buf, i)
+
+	i = start
+
+	// Determines whether the tags are sort, assume they are
+	sorted := true
+
+	// indices holds the indexes within buf of the start of each tag.  For example,
+	// a buf of 'cpu,host=a,region=b,zone=c' would have indices slice of [4,11,20]
+	// which indicates that the first tag starts at buf[4], seconds at buf[11], and
+	// last at buf[20]
+	indices := make([]int, 100)
+
+	// tracks how many commas we've seen so we know how many values are indices.
+	// Since indices is an arbitrarily large slice,
+	// we need to know how many values in the buffer are in use.
+	commas := 0
+
+	// tracks whether we've see an '='
+	equals := 0
+
+	// loop over each byte in buf
+	for {
+		// reached the end of buf?
+		if i >= len(buf) {
+			if equals == 0 && commas > 0 {
+				return i, buf[start:i], fmt.Errorf("missing tag value")
+			}
+
+			break
+		}
+
+		if buf[i] == '=' {
+			// Check for "cpu,=value" but allow "cpu,a\,=value"
+			if buf[i-1] == ',' && buf[i-2] != '\\' {
+				return i, buf[start:i], fmt.Errorf("missing tag name")
+			}
+
+			// Check for "cpu,\ =value"
+			if buf[i-1] == ' ' && buf[i-2] != '\\' {
+				return i, buf[start:i], fmt.Errorf("missing tag name")
+			}
+
+			i += 1
+			equals += 1
+
+			// Check for "cpu,a=1,b= value=1"
+			if i < len(buf) && buf[i] == ' ' {
+				return i, buf[start:i], fmt.Errorf("missing tag value")
+			}
+			continue
+		}
+
+		// escaped character
+		if buf[i] == '\\' {
+			i += 2
+			continue
+		}
+
+		// At a tag separator (comma), track it's location
+		if buf[i] == ',' {
+			if equals == 0 && commas > 0 {
+				return i, buf[start:i], fmt.Errorf("missing tag value")
+			}
+			i += 1
+			indices[commas] = i
+			commas += 1
+
+			// Check for "cpu, value=1"
+			if i < len(buf) && buf[i] == ' ' {
+				return i, buf[start:i], fmt.Errorf("missing tag key")
+			}
+			continue
+		}
+
+		// reached end of the block? (next block would be fields)
+		if buf[i] == ' ' {
+			// check for "cpu,tag value=1"
+			if equals == 0 && commas > 0 {
+				return i, buf[start:i], fmt.Errorf("missing tag value")
+			}
+			if equals > 0 && commas-1 != equals-1 {
+				return i, buf[start:i], fmt.Errorf("missing tag value")
+			}
+			indices[commas] = i + 1
+			break
+		}
+
+		i += 1
+	}
+
+	// check that all field sections had key and values (e.g. prevent "a=1,b"
+	// We're using commas -1 because there should always be a comma after measurement
+	if equals > 0 && commas-1 != equals-1 {
+		return i, buf[start:i], fmt.Errorf("invalid tag format")
+	}
+
+	// Now we know where the key region is within buf, and the locations of tags, we
+	// need to deterimine if duplicate tags exist and if the tags are sorted.  This iterates
+	// 1/2 of the list comparing each end with each other, walking towards the center from
+	// both sides.
+	for j := 0; j < commas/2; j++ {
+		// get the left and right tags
+		_, left := scanTo(buf[indices[j]:indices[j+1]-1], 0, '=')
+		_, right := scanTo(buf[indices[commas-j-1]:indices[commas-j]-1], 0, '=')
+
+		// If the tags are equal, then there are duplicate tags, and we should abort
+		if bytes.Equal(left, right) {
+			return i, buf[start:i], fmt.Errorf("duplicate tags")
+		}
+
+		// If left is greater than right, the tags are not sorted.  We must continue
+		// since their could be duplicate tags still.
+		if bytes.Compare(left, right) > 0 {
+			sorted = false
+		}
+	}
+
+	// If the tags are not sorted, then sort them.  This sort is inline and
+	// uses the tag indices we created earlier.  The actual buffer is not sorted, the
+	// indices are using the buffer for value comparison.  After the indices are sorted,
+	// the buffer is reconstructed from the sorted indices.
+	if !sorted && commas > 0 {
+		// Get the measurement name for later
+		measurement := buf[start : indices[0]-1]
+
+		// Sort the indices
+		indices := indices[:commas]
+		insertionSort(0, commas, buf, indices)
+
+		// Create a new key using the measurement and sorted indices
+		b := make([]byte, len(buf[start:i]))
+		pos := copy(b, measurement)
+		for _, i := range indices {
+			b[pos] = ','
+			pos += 1
+			_, v := scanToSpaceOr(buf, i, ',')
+			pos += copy(b[pos:], v)
+		}
+
+		return i, b, nil
+	}
+
+	return i, buf[start:i], nil
+}
+
+func insertionSort(l, r int, buf []byte, indices []int) {
+	for i := l + 1; i < r; i++ {
+		for j := i; j > l && less(buf, indices, j, j-1); j-- {
+			indices[j], indices[j-1] = indices[j-1], indices[j]
+		}
+	}
+}
+
+func less(buf []byte, indices []int, i, j int) bool {
+	// This grabs the tag names for i & j, it ignores the values
+	_, a := scanTo(buf, indices[i], '=')
+	_, b := scanTo(buf, indices[j], '=')
+	return bytes.Compare(a, b) < 0
+}
+
+// scanFields scans buf, starting at i for the fields section of a point.  It returns
+// the ending position and the byte slice of the fields within buf
+func scanFields(buf []byte, i int) (int, []byte, error) {
+	start := skipWhitespace(buf, i)
+	i = start
+	quoted := false
+
+	// tracks how many '=' we've seen
+	equals := 0
+
+	// tracks how many commas we've seen
+	commas := 0
+
+	for {
+		// reached the end of buf?
+		if i >= len(buf) {
+			break
+		}
+
+		// escaped character
+		if buf[i] == '\\' {
+			i += 2
+			continue
+		}
+
+		// If the value is quoted, scan until we get to the end quote
+		if buf[i] == '"' {
+			quoted = !quoted
+			i += 1
+			continue
+		}
+
+		// If we see an =, ensure that there is at least on char before and after it
+		if buf[i] == '=' && !quoted {
+			equals += 1
+
+			// check for "... =123" but allow "a\ =123"
+			if buf[i-1] == ' ' && buf[i-2] != '\\' {
+				return i, buf[start:i], fmt.Errorf("missing field name")
+			}
+
+			// check for "...a=123,=456" but allow "a=123,a\,=456"
+			if buf[i-1] == ',' && buf[i-2] != '\\' {
+				return i, buf[start:i], fmt.Errorf("missing field name")
+			}
+
+			// check for "... value="
+			if i+1 >= len(buf) {
+				return i, buf[start:i], fmt.Errorf("missing field value")
+			}
+
+			// check for "... value=,value2=..."
+			if buf[i+1] == ',' || buf[i+1] == ' ' {
+				return i, buf[start:i], fmt.Errorf("missing field value")
+			}
+
+			if isNumeric(buf[i+1]) || buf[i+1] == '-' || buf[i+1] == 'N' || buf[i+1] == 'n' {
+				var err error
+				i, _, err = scanNumber(buf, i+1)
+				if err != nil {
+					return i, buf[start:i], err
+				} else {
+					continue
+				}
+				// If next byte is not a double-quote, the value must be a boolean
+			} else if buf[i+1] != '"' {
+				var err error
+				i, _, err = scanBoolean(buf, i+1)
+				if err != nil {
+					return i, buf[start:i], err
+				} else {
+					continue
+				}
+			}
+		}
+
+		if buf[i] == ',' && !quoted {
+			commas += 1
+		}
+
+		// reached end of block?
+		if buf[i] == ' ' && !quoted {
+			break
+		}
+		i += 1
+	}
+
+	if quoted {
+		return i, buf[start:i], fmt.Errorf("unbalanced quotes")
+	}
+
+	// check that all field sections had key and values (e.g. prevent "a=1,b"
+	if equals == 0 || commas != equals-1 {
+		return i, buf[start:i], fmt.Errorf("invalid field format")
+	}
+
+	return i, buf[start:i], nil
+}
+
+// scanTime scans buf, starting at i for the time section of a point.  It returns
+// the ending position and the byte slice of the fields within buf and error if the
+// timestamp is not in the correct numeric format
+func scanTime(buf []byte, i int) (int, []byte, error) {
+	start := skipWhitespace(buf, i)
+	i = start
+	for {
+		// reached the end of buf?
+		if i >= len(buf) {
+			break
+		}
+
+		// Timestamps should integers, make sure they are so we don't need to actually
+		// parse the timestamp until needed
+		if buf[i] < '0' || buf[i] > '9' {
+			return i, buf[start:i], fmt.Errorf("bad timestamp")
+		}
+
+		// reached end of block?
+		if buf[i] == '\n' {
+			break
+		}
+		i += 1
+	}
+	return i, buf[start:i], nil
+}
+
+func isNumeric(b byte) bool {
+	return (b >= '0' && b <= '9') || b == '.'
+}
+
+// scanNumber returns the end position within buf, start at i after
+// scanning over buf for an integer, or float.  It returns an
+// error if a invalid number is scanned.
+func scanNumber(buf []byte, i int) (int, []byte, error) {
+	start := i
+
+	// Is negative number?
+	if i < len(buf) && buf[i] == '-' {
+		i += 1
+	}
+
+	// how many decimal points we've see
+	decimals := 0
+
+	// indicates the number is float in scientific notation
+	scientific := false
+
+	for {
+		if i >= len(buf) {
+			break
+		}
+
+		if buf[i] == ',' || buf[i] == ' ' {
+			break
+		}
+
+		if buf[i] == '.' {
+			decimals += 1
+		}
+
+		// Can't have more than 1 decimal (e.g. 1.1.1 should fail)
+		if decimals > 1 {
+			return i, buf[start:i], fmt.Errorf("invalid number")
+		}
+
+		// `e` is valid for floats but not as the first char
+		if i > start && (buf[i] == 'e') {
+			scientific = true
+			i += 1
+			continue
+		}
+
+		// + and - are only valid at this point if they follow an e (scientific notation)
+		if (buf[i] == '+' || buf[i] == '-') && buf[i-1] == 'e' {
+			i += 1
+			continue
+		}
+
+		// NaN is a valid float
+		if i+3 < len(buf) && (buf[i] == 'N' || buf[i] == 'n') {
+			if (buf[i+1] == 'a' || buf[i+1] == 'A') && (buf[i+2] == 'N' || buf[i+2] == 'n') {
+				i += 3
+				continue
+			}
+			return i, buf[start:i], fmt.Errorf("invalid number")
+		}
+
+		if !isNumeric(buf[i]) {
+			return i, buf[start:i], fmt.Errorf("invalid number")
+		}
+		i += 1
+	}
+
+	// It's more common that numbers will be within min/max range for their type but we need to prevent
+	// out or range numbers from being parsed successfully.  This uses some simple heuristics to decide
+	// if we should parse the number to the actual type.  It does not do it all the time because it incurs
+	// extra allocations and we end up converting the type again when writing points to disk.
+	if decimals == 0 {
+		// Parse the int to check bounds the number of digits could be larger than the max range
+		if len(buf[start:i]) >= maxInt64Digits || len(buf[start:i]) >= minInt64Digits {
+			if _, err := strconv.ParseInt(string(buf[start:i]), 10, 64); err != nil {
+				return i, buf[start:i], fmt.Errorf("invalid integer")
+			}
+		}
+	} else {
+		// Parse the float to check bounds if it's scientific or the number of digits could be larger than the max range
+		if scientific || len(buf[start:i]) >= maxFloat64Digits || len(buf[start:i]) >= minFloat64Digits {
+			if _, err := strconv.ParseFloat(string(buf[start:i]), 10); err != nil {
+				return i, buf[start:i], fmt.Errorf("invalid float")
+			}
+		}
+	}
+
+	return i, buf[start:i], nil
+}
+
+// scanBoolean returns the end position within buf, start at i after
+// scanning over buf for boolean. Valid values for a boolean are
+// t, T, true, TRUE, f, F, false, FALSE.  It returns an error if a invalid boolean
+// is scanned.
+func scanBoolean(buf []byte, i int) (int, []byte, error) {
+	start := i
+
+	if i < len(buf) && (buf[i] != 't' && buf[i] != 'f' && buf[i] != 'T' && buf[i] != 'F') {
+		return i, buf[start:i], fmt.Errorf("invalid boolean")
+	}
+
+	i += 1
+	for {
+		if i >= len(buf) {
+			break
+		}
+
+		if buf[i] == ',' || buf[i] == ' ' {
+			break
+		}
+		i += 1
+	}
+
+	// Single char bool (t, T, f, F) is ok
+	if i-start == 1 {
+		return i, buf[start:i], nil
+	}
+
+	// length must be 4 for true or TRUE
+	if (buf[start] == 't' || buf[start] == 'T') && i-start != 4 {
+		return i, buf[start:i], fmt.Errorf("invalid boolean")
+	}
+
+	// length must be 5 for false or FALSE
+	if (buf[start] == 'f' || buf[start] == 'F') && i-start != 5 {
+		return i, buf[start:i], fmt.Errorf("invalid boolean")
+	}
+
+	// Otherwise
+	valid := false
+	switch buf[start] {
+	case 't':
+		valid = bytes.Equal(buf[start:i], []byte("true"))
+	case 'f':
+		valid = bytes.Equal(buf[start:i], []byte("false"))
+	case 'T':
+		valid = bytes.Equal(buf[start:i], []byte("TRUE")) || bytes.Equal(buf[start:i], []byte("True"))
+	case 'F':
+		valid = bytes.Equal(buf[start:i], []byte("FALSE")) || bytes.Equal(buf[start:i], []byte("False"))
+	}
+
+	if !valid {
+		return i, buf[start:i], fmt.Errorf("invalid boolean")
+	}
+
+	return i, buf[start:i], nil
+
+}
+
+// skipWhitespace returns the end position within buf, starting at i after
+// scanning over spaces in tags
+func skipWhitespace(buf []byte, i int) int {
+	for {
+		if i >= len(buf) {
+			return i
+		}
+
+		if buf[i] == '\\' {
+			i += 2
+			continue
+		}
+		if buf[i] == ' ' || buf[i] == '\t' {
+			i += 1
+			continue
+		}
+		break
+	}
+	return i
+}
+
+// scanTo returns the end position in buf and the next consecutive block
+// of bytes, starting from i and ending with stop byte.  If there are leading
+// spaces or escaped chars, they are skipped.
+func scanTo(buf []byte, i int, stop byte) (int, []byte) {
+	start := i
+	for {
+		// reached the end of buf?
+		if i >= len(buf) {
+			break
+		}
+
+		if buf[i] == '\\' {
+			i += 2
+			continue
+		}
+
+		// reached end of block?
+		if buf[i] == stop {
+			break
+		}
+		i += 1
+	}
+
+	return i, buf[start:i]
+}
+
+// scanTo returns the end position in buf and the next consecutive block
+// of bytes, starting from i and ending with stop byte.  If there are leading
+// spaces, they are skipped.
+func scanToSpaceOr(buf []byte, i int, stop byte) (int, []byte) {
+	start := i
+	for {
+		// reached the end of buf?
+		if i >= len(buf) {
+			break
+		}
+
+		if buf[i] == '\\' {
+			i += 2
+			continue
+		}
+		// reached end of block?
+		if buf[i] == stop || buf[i] == ' ' {
+			break
+		}
+		i += 1
+	}
+
+	return i, buf[start:i]
+}
+
+func scanTagValue(buf []byte, i int) (int, []byte) {
+	start := i
+	for {
+		if i >= len(buf) {
+			break
+		}
+
+		if buf[i] == '\\' {
+			i += 2
+			continue
+		}
+
+		if buf[i] == ',' {
+			break
+		}
+		i += 1
+	}
+	return i, buf[start:i]
+}
+
+func scanFieldValue(buf []byte, i int) (int, []byte) {
+	start := i
+	quoted := false
+	for {
+		if i >= len(buf) {
+			break
+		}
+
+		// If we see a double quote, makes sure it is not escaped
+		if buf[i] == '"' && buf[i-1] != '\\' {
+			i += 1
+			quoted = !quoted
+			continue
+		}
+
+		if buf[i] == '\\' {
+			i += 2
+			continue
+		}
+
+		if buf[i] == ',' && !quoted {
+			break
+		}
+		i += 1
+	}
+	return i, buf[start:i]
+}
+
+func escape(in []byte) []byte {
+	for b, esc := range escapeCodes {
+		in = bytes.Replace(in, []byte{b}, esc, -1)
+	}
+	return in
+}
+
+func escapeString(in string) string {
+	for b, esc := range escapeCodesStr {
+		in = strings.Replace(in, b, esc, -1)
+	}
+	return in
+}
+
+func unescape(in []byte) []byte {
+	for b, esc := range escapeCodes {
+		in = bytes.Replace(in, esc, []byte{b}, -1)
+	}
+	return in
+}
+
+func unescapeString(in string) string {
+	for b, esc := range escapeCodesStr {
+		in = strings.Replace(in, esc, b, -1)
+	}
+	return in
+}
+
+// escapeQuoteString returns a copy of in with any double quotes that
+// have not been escaped with escaped quotes
+func escapeQuoteString(in string) string {
+	if strings.IndexAny(in, `"`) == -1 {
+		return in
+	}
+	return quoteReplacer.ReplaceAllString(in, `$1\"`)
+}
+
+// unescapeQuoteString returns a copy of in with any escaped double-quotes
+// with unescaped double quotes
+func unescapeQuoteString(in string) string {
+	return strings.Replace(in, `\"`, `"`, -1)
+}
+
+// NewPoint returns a new point with the given measurement name, tags, fields and timestamp
+func NewPoint(name string, tags Tags, fields Fields, time time.Time) Point {
+	return &point{
+		key:    makeKey([]byte(name), tags),
+		time:   time,
+		fields: fields.MarshalBinary(),
+	}
+}
+
+func (p *point) Data() []byte {
+	return p.data
+}
+
+func (p *point) SetData(b []byte) {
+	p.data = b
+}
+
+func (p *point) Key() []byte {
+	return p.key
+}
+
+func (p *point) name() []byte {
+	_, name := scanTo(p.key, 0, ',')
+	return name
+}
+
+// Name return the measurement name for the point
+func (p *point) Name() string {
+	return string(unescape(p.name()))
+}
+
+// SetName updates the measurement name for the point
+func (p *point) SetName(name string) {
+	p.key = makeKey([]byte(name), p.Tags())
+}
+
+// Time return the timestamp for the point
+func (p *point) Time() time.Time {
+	return p.time
+}
+
+// SetTime updates the timestamp for the point
+func (p *point) SetTime(t time.Time) {
+	p.time = t
+}
+
+// Tags returns the tag set for the point
+func (p *point) Tags() Tags {
+	tags := map[string]string{}
+
+	if len(p.key) != 0 {
+		pos, name := scanTo(p.key, 0, ',')
+
+		// it's an empyt key, so there are no tags
+		if len(name) == 0 {
+			return tags
+		}
+
+		i := pos + 1
+		var key, value []byte
+		for {
+			if i >= len(p.key) {
+				break
+			}
+			i, key = scanTo(p.key, i, '=')
+			i, value = scanTagValue(p.key, i+1)
+
+			tags[string(unescape(key))] = string(unescape(value))
+
+			i += 1
+		}
+	}
+	return tags
+}
+
+func makeKey(name []byte, tags Tags) []byte {
+	return append(escape(name), tags.hashKey()...)
+}
+
+// SetTags replaces the tags for the point
+func (p *point) SetTags(tags Tags) {
+	p.key = makeKey(p.name(), tags)
+}
+
+// AddTag adds or replaces a tag value for a point
+func (p *point) AddTag(key, value string) {
+	tags := p.Tags()
+	tags[key] = value
+	p.key = makeKey(p.name(), tags)
+}
+
+// Fields returns the fields for the point
+func (p *point) Fields() Fields {
+	return p.unmarshalBinary()
+}
+
+// AddField adds or replaces a field value for a point
+func (p *point) AddField(name string, value interface{}) {
+	fields := p.Fields()
+	fields[name] = value
+	p.fields = fields.MarshalBinary()
+}
+
+// SetPrecision will round a time to the specified precision
+func (p *point) SetPrecision(precision string) {
+	switch precision {
+	case "n":
+	case "u":
+		p.SetTime(p.Time().Truncate(time.Microsecond))
+	case "ms":
+		p.SetTime(p.Time().Truncate(time.Millisecond))
+	case "s":
+		p.SetTime(p.Time().Truncate(time.Second))
+	case "m":
+		p.SetTime(p.Time().Truncate(time.Minute))
+	case "h":
+		p.SetTime(p.Time().Truncate(time.Hour))
+	}
+}
+
+// GetPrecisionMultiplier will return a multiplier for the precision specified
+func (p *point) GetPrecisionMultiplier(precision string) int64 {
+	d := time.Nanosecond
+	switch precision {
+	case "u":
+		d = time.Microsecond
+	case "ms":
+		d = time.Millisecond
+	case "s":
+		d = time.Second
+	case "m":
+		d = time.Minute
+	case "h":
+		d = time.Hour
+	}
+	return int64(d)
+}
+
+func (p *point) String() string {
+	if p.Time().IsZero() {
+		return fmt.Sprintf("%s %s", p.Key(), string(p.fields))
+	}
+	return fmt.Sprintf("%s %s %d", p.Key(), string(p.fields), p.UnixNano())
+}
+
+func (p *point) unmarshalBinary() Fields {
+	return newFieldsFromBinary(p.fields)
+}
+
+func (p *point) HashID() uint64 {
+	h := fnv.New64a()
+	h.Write(p.key)
+	sum := h.Sum64()
+	return sum
+}
+
+func (p *point) UnixNano() int64 {
+	return p.Time().UnixNano()
+}
+
+type Tags map[string]string
+
+func (t Tags) hashKey() []byte {
+	// Empty maps marshal to empty bytes.
+	if len(t) == 0 {
+		return nil
+	}
+
+	escaped := Tags{}
+	for k, v := range t {
+		ek := escapeString(k)
+		ev := escapeString(v)
+		escaped[ek] = ev
+	}
+
+	// Extract keys and determine final size.
+	sz := len(escaped) + (len(escaped) * 2) // separators
+	keys := make([]string, len(escaped)+1)
+	i := 0
+	for k, v := range escaped {
+		keys[i] = k
+		i += 1
+		sz += len(k) + len(v)
+	}
+	keys = keys[:i]
+	sort.Strings(keys)
+	// Generate marshaled bytes.
+	b := make([]byte, sz)
+	buf := b
+	idx := 0
+	for _, k := range keys {
+		buf[idx] = ','
+		idx += 1
+		copy(buf[idx:idx+len(k)], k)
+		idx += len(k)
+		buf[idx] = '='
+		idx += 1
+		v := escaped[k]
+		copy(buf[idx:idx+len(v)], v)
+		idx += len(v)
+	}
+	return b[:idx]
+}
+
+type Fields map[string]interface{}
+
+func parseNumber(val []byte) (interface{}, error) {
+	for i := 0; i < len(val); i++ {
+		// If there is a decimal or an N (NaN), I (Inf), parse as float
+		if val[i] == '.' || val[i] == 'N' || val[i] == 'n' || val[i] == 'I' || val[i] == 'i' || val[i] == 'e' {
+			return strconv.ParseFloat(string(val), 64)
+		}
+		if val[i] < '0' && val[i] > '9' {
+			return string(val), nil
+		}
+	}
+	return strconv.ParseInt(string(val), 10, 64)
+}
+
+func newFieldsFromBinary(buf []byte) Fields {
+	fields := Fields{}
+	var (
+		i              int
+		name, valueBuf []byte
+		value          interface{}
+		err            error
+	)
+	for {
+		if i >= len(buf) {
+			break
+		}
+
+		i, name = scanTo(buf, i, '=')
+		if len(name) == 0 {
+			continue
+		}
+
+		i, valueBuf = scanFieldValue(buf, i+1)
+		if len(valueBuf) == 0 {
+			fields[string(name)] = nil
+			continue
+		}
+
+		// If the first char is a double-quote, then unmarshal as string
+		if valueBuf[0] == '"' {
+			value = unescapeQuoteString(string(valueBuf[1 : len(valueBuf)-1]))
+			// Check for numeric characters and special NaN or Inf
+		} else if (valueBuf[0] >= '0' && valueBuf[0] <= '9') || valueBuf[0] == '-' || valueBuf[0] == '+' || valueBuf[0] == '.' ||
+			valueBuf[0] == 'N' || valueBuf[0] == 'n' || // NaN
+			valueBuf[0] == 'I' || valueBuf[0] == 'i' { // Inf
+
+			value, err = parseNumber(valueBuf)
+			if err != nil {
+				panic(fmt.Sprintf("unable to parse number value '%v': %v", string(valueBuf), err))
+			}
+
+			// Otherwise parse it as bool
+		} else {
+			value, err = strconv.ParseBool(string(valueBuf))
+			if err != nil {
+				panic(fmt.Sprintf("unable to parse bool value '%v': %v\n", string(valueBuf), err))
+			}
+		}
+		fields[string(unescape(name))] = value
+		i += 1
+	}
+	return fields
+}
+
+func (p Fields) MarshalBinary() []byte {
+	b := []byte{}
+	keys := make([]string, len(p))
+	i := 0
+	for k, _ := range p {
+		keys[i] = k
+		i += 1
+	}
+	sort.Strings(keys)
+
+	for _, k := range keys {
+		v := p[k]
+		b = append(b, []byte(escapeString(k))...)
+		b = append(b, '=')
+		switch t := v.(type) {
+		case int:
+			b = append(b, []byte(strconv.FormatInt(int64(t), 10))...)
+		case int32:
+			b = append(b, []byte(strconv.FormatInt(int64(t), 10))...)
+		case uint64:
+			b = append(b, []byte(strconv.FormatUint(t, 10))...)
+		case int64:
+			b = append(b, []byte(strconv.FormatInt(t, 10))...)
+		case float64:
+			// ensure there is a decimal in the encoded for
+
+			val := []byte(strconv.FormatFloat(t, 'f', -1, 64))
+			_, frac := math.Modf(t)
+			hasDecimal := frac != 0
+			b = append(b, val...)
+			if !hasDecimal {
+				b = append(b, []byte(".0")...)
+			}
+		case bool:
+			b = append(b, []byte(strconv.FormatBool(t))...)
+		case []byte:
+			b = append(b, t...)
+		case string:
+			b = append(b, '"')
+			b = append(b, []byte(escapeQuoteString(t))...)
+			b = append(b, '"')
+		case nil:
+			// skip
+		default:
+			// Can't determine the type, so convert to string
+			b = append(b, '"')
+			b = append(b, []byte(escapeQuoteString(fmt.Sprintf("%v", v)))...)
+			b = append(b, '"')
+
+		}
+		b = append(b, ',')
+	}
+	if len(b) > 0 {
+		return b[0 : len(b)-1]
+	}
+	return b
+}
+
+type indexedSlice struct {
+	indices []int
+	b       []byte
+}
+
+func (s *indexedSlice) Less(i, j int) bool {
+	_, a := scanTo(s.b, s.indices[i], '=')
+	_, b := scanTo(s.b, s.indices[j], '=')
+	return bytes.Compare(a, b) < 0
+}
+
+func (s *indexedSlice) Swap(i, j int) {
+	s.indices[i], s.indices[j] = s.indices[j], s.indices[i]
+}
+
+func (s *indexedSlice) Len() int {
+	return len(s.indices)
+}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/query_executor.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/query_executor.go
new file mode 100644
index 00000000000..4aca953b1ff
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/query_executor.go
@@ -0,0 +1,1031 @@
+package tsdb
+
+import (
+	"errors"
+	"fmt"
+	"log"
+	"os"
+	"sort"
+	"strings"
+	"time"
+
+	"github.com/influxdb/influxdb/influxql"
+	"github.com/influxdb/influxdb/meta"
+)
+
+// QueryExecutor executes every statement in an influxdb Query. It is responsible for
+// coordinating between the local tsdb.Store, the meta.Store, and the other nodes in
+// the cluster to run the query against their local tsdb.Stores. There should be one executor
+// in a running process
+type QueryExecutor struct {
+	// The meta store for accessing and updating cluster and schema data.
+	MetaStore interface {
+		Database(name string) (*meta.DatabaseInfo, error)
+		Databases() ([]meta.DatabaseInfo, error)
+		User(name string) (*meta.UserInfo, error)
+		AdminUserExists() (bool, error)
+		Authenticate(username, password string) (*meta.UserInfo, error)
+		RetentionPolicy(database, name string) (rpi *meta.RetentionPolicyInfo, err error)
+		UserCount() (int, error)
+		ShardGroupsByTimeRange(database, policy string, min, max time.Time) (a []meta.ShardGroupInfo, err error)
+		NodeID() uint64
+	}
+
+	// Executes statements relating to meta data.
+	MetaStatementExecutor interface {
+		ExecuteStatement(stmt influxql.Statement) *influxql.Result
+	}
+
+	// Maps shards for queries.
+	ShardMapper interface {
+		CreateMapper(shard meta.ShardInfo, stmt string, chunkSize int) (Mapper, error)
+	}
+
+	Logger *log.Logger
+
+	// the local data store
+	store *Store
+}
+
+// NewQueryExecutor returns an initialized QueryExecutor
+func NewQueryExecutor(store *Store) *QueryExecutor {
+	return &QueryExecutor{
+		store:  store,
+		Logger: log.New(os.Stderr, "[query] ", log.LstdFlags),
+	}
+}
+
+// Authorize user u to execute query q on database.
+// database can be "" for queries that do not require a database.
+// If no user is provided it will return an error unless the query's first statement is to create
+// a root user.
+func (q *QueryExecutor) Authorize(u *meta.UserInfo, query *influxql.Query, database string) error {
+	// Special case if no users exist.
+	if count, err := q.MetaStore.UserCount(); count == 0 && err == nil {
+		// Ensure there is at least one statement.
+		if len(query.Statements) > 0 {
+			// First statement in the query must create a user with admin privilege.
+			cu, ok := query.Statements[0].(*influxql.CreateUserStatement)
+			if ok && cu.Admin == true {
+				return nil
+			}
+		}
+		return NewErrAuthorize(q, query, "", database, "create admin user first or disable authentication")
+	}
+
+	if u == nil {
+		return NewErrAuthorize(q, query, "", database, "no user provided")
+	}
+
+	// Admin privilege allows the user to execute all statements.
+	if u.Admin {
+		return nil
+	}
+
+	// Check each statement in the query.
+	for _, stmt := range query.Statements {
+		// Get the privileges required to execute the statement.
+		privs := stmt.RequiredPrivileges()
+
+		// Make sure the user has the privileges required to execute
+		// each statement.
+		for _, p := range privs {
+			if p.Admin {
+				// Admin privilege already checked so statement requiring admin
+				// privilege cannot be run.
+				msg := fmt.Sprintf("statement '%s', requires admin privilege", stmt)
+				return NewErrAuthorize(q, query, u.Name, database, msg)
+			}
+
+			// Use the db name specified by the statement or the db
+			// name passed by the caller if one wasn't specified by
+			// the statement.
+			db := p.Name
+			if db == "" {
+				db = database
+			}
+			if !u.Authorize(p.Privilege, db) {
+				msg := fmt.Sprintf("statement '%s', requires %s on %s", stmt, p.Privilege.String(), db)
+				return NewErrAuthorize(q, query, u.Name, database, msg)
+			}
+		}
+	}
+	return nil
+}
+
+// ExecuteQuery executes an InfluxQL query against the server.
+// It sends results down the passed in chan and closes it when done. It will close the chan
+// on the first statement that throws an error.
+func (q *QueryExecutor) ExecuteQuery(query *influxql.Query, database string, chunkSize int) (<-chan *influxql.Result, error) {
+	// Execute each statement. Keep the iterator external so we can
+	// track how many of the statements were executed
+	results := make(chan *influxql.Result)
+	go func() {
+		var i int
+		var stmt influxql.Statement
+		for i, stmt = range query.Statements {
+			// If a default database wasn't passed in by the caller, check the statement.
+			// Some types of statements have an associated default database, even if it
+			// is not explicitly included.
+			defaultDB := database
+			if defaultDB == "" {
+				if s, ok := stmt.(influxql.HasDefaultDatabase); ok {
+					defaultDB = s.DefaultDatabase()
+				}
+			}
+
+			// Normalize each statement.
+			if err := q.normalizeStatement(stmt, defaultDB); err != nil {
+				results <- &influxql.Result{Err: err}
+				break
+			}
+
+			var res *influxql.Result
+			switch stmt := stmt.(type) {
+			case *influxql.SelectStatement:
+				if err := q.executeSelectStatement(i, stmt, results, chunkSize); err != nil {
+					results <- &influxql.Result{Err: err}
+					break
+				}
+			case *influxql.DropSeriesStatement:
+				// TODO: handle this in a cluster
+				res = q.executeDropSeriesStatement(stmt, database)
+			case *influxql.ShowSeriesStatement:
+				res = q.executeShowSeriesStatement(stmt, database)
+			case *influxql.DropMeasurementStatement:
+				// TODO: handle this in a cluster
+				res = q.executeDropMeasurementStatement(stmt, database)
+			case *influxql.ShowMeasurementsStatement:
+				res = q.executeShowMeasurementsStatement(stmt, database)
+			case *influxql.ShowTagKeysStatement:
+				res = q.executeShowTagKeysStatement(stmt, database)
+			case *influxql.ShowTagValuesStatement:
+				res = q.executeShowTagValuesStatement(stmt, database)
+			case *influxql.ShowFieldKeysStatement:
+				res = q.executeShowFieldKeysStatement(stmt, database)
+			case *influxql.ShowDiagnosticsStatement:
+				res = q.executeShowDiagnosticsStatement(stmt)
+			case *influxql.DeleteStatement:
+				res = &influxql.Result{Err: ErrInvalidQuery}
+			case *influxql.DropDatabaseStatement:
+				// TODO: handle this in a cluster
+				res = q.executeDropDatabaseStatement(stmt)
+			default:
+				// Delegate all other meta statements to a separate executor. They don't hit tsdb storage.
+				res = q.MetaStatementExecutor.ExecuteStatement(stmt)
+			}
+
+			if res != nil {
+				// set the StatementID for the handler on the other side to combine results
+				res.StatementID = i
+
+				// If an error occurs then stop processing remaining statements.
+				results <- res
+				if res.Err != nil {
+					break
+				}
+			}
+		}
+
+		// if there was an error send results that the remaining statements weren't executed
+		for ; i < len(query.Statements)-1; i++ {
+			results <- &influxql.Result{Err: ErrNotExecuted}
+		}
+
+		close(results)
+	}()
+
+	return results, nil
+}
+
+// Plan creates an execution plan for the given SelectStatement and returns an Executor.
+func (q *QueryExecutor) plan(stmt *influxql.SelectStatement, chunkSize int) (Executor, error) {
+	shards := map[uint64]meta.ShardInfo{} // Shards requiring mappers.
+
+	// Replace instances of "now()" with the current time, and check the resultant times.
+	stmt.Condition = influxql.Reduce(stmt.Condition, &influxql.NowValuer{Now: time.Now().UTC()})
+	tmin, tmax := influxql.TimeRange(stmt.Condition)
+	if tmax.IsZero() {
+		tmax = time.Now()
+	}
+	if tmin.IsZero() {
+		tmin = time.Unix(0, 0)
+	}
+
+	for _, src := range stmt.Sources {
+		mm, ok := src.(*influxql.Measurement)
+		if !ok {
+			return nil, fmt.Errorf("invalid source type: %#v", src)
+		}
+
+		// Build the set of target shards. Using shard IDs as keys ensures each shard ID
+		// occurs only once.
+		shardGroups, err := q.MetaStore.ShardGroupsByTimeRange(mm.Database, mm.RetentionPolicy, tmin, tmax)
+		if err != nil {
+			return nil, err
+		}
+		for _, g := range shardGroups {
+			for _, sh := range g.Shards {
+				shards[sh.ID] = sh
+			}
+		}
+	}
+
+	// Build the Mappers, one per shard.
+	mappers := []Mapper{}
+	for _, sh := range shards {
+		m, err := q.ShardMapper.CreateMapper(sh, stmt.String(), chunkSize)
+		if err != nil {
+			return nil, err
+		}
+		if m == nil {
+			// No data for this shard, skip it.
+			continue
+		}
+		mappers = append(mappers, m)
+	}
+
+	var executor Executor
+	if len(mappers) > 0 {
+		// All Mapper are of same type, so check first to determine correct Executor type.
+		if _, ok := mappers[0].(*RawMapper); ok {
+			executor = NewRawExecutor(stmt, mappers, chunkSize)
+		} else {
+			executor = NewAggregateExecutor(stmt, mappers)
+		}
+	} else {
+		// With no mappers, the Executor type doesn't matter.
+		executor = NewRawExecutor(stmt, nil, chunkSize)
+	}
+	return executor, nil
+}
+
+// executeSelectStatement plans and executes a select statement against a database.
+func (q *QueryExecutor) executeSelectStatement(statementID int, stmt *influxql.SelectStatement, results chan *influxql.Result, chunkSize int) error {
+	// Perform any necessary query re-writing.
+	stmt, err := q.rewriteSelectStatement(stmt)
+	if err != nil {
+		return err
+	}
+
+	// Plan statement execution.
+	e, err := q.plan(stmt, chunkSize)
+	if err != nil {
+		return err
+	}
+
+	// Execute plan.
+	ch := e.Execute()
+
+	// Stream results from the channel. We should send an empty result if nothing comes through.
+	resultSent := false
+	for row := range ch {
+		if row.Err != nil {
+			return row.Err
+		} else {
+			resultSent = true
+			results <- &influxql.Result{StatementID: statementID, Series: []*influxql.Row{row}}
+		}
+	}
+
+	if !resultSent {
+		results <- &influxql.Result{StatementID: statementID, Series: make([]*influxql.Row, 0)}
+	}
+
+	return nil
+}
+
+// rewriteSelectStatement performs any necessary query re-writing.
+func (q *QueryExecutor) rewriteSelectStatement(stmt *influxql.SelectStatement) (*influxql.SelectStatement, error) {
+	var err error
+
+	// Expand regex expressions in the FROM clause.
+	sources, err := q.expandSources(stmt.Sources)
+	if err != nil {
+		return nil, err
+	}
+	stmt.Sources = sources
+
+	// Expand wildcards in the fields or GROUP BY.
+	if stmt.HasWildcard() {
+		stmt, err = q.expandWildcards(stmt)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	stmt.RewriteDistinct()
+
+	return stmt, nil
+}
+
+// expandWildcards returns a new SelectStatement with wildcards in the fields
+// and/or GROUP BY expanded with actual field names.
+func (q *QueryExecutor) expandWildcards(stmt *influxql.SelectStatement) (*influxql.SelectStatement, error) {
+	// If there are no wildcards in the statement, return it as-is.
+	if !stmt.HasWildcard() {
+		return stmt, nil
+	}
+
+	// Use sets to avoid duplicate field names.
+	fieldSet := map[string]struct{}{}
+	dimensionSet := map[string]struct{}{}
+
+	var fields influxql.Fields
+	var dimensions influxql.Dimensions
+
+	// Iterate measurements in the FROM clause getting the fields & dimensions for each.
+	for _, src := range stmt.Sources {
+		if m, ok := src.(*influxql.Measurement); ok {
+			// Lookup the database. The database may not exist if no data for this database
+			// was ever written to the shard.
+			db := q.store.DatabaseIndex(m.Database)
+			if db == nil {
+				return stmt, nil
+			}
+
+			// Lookup the measurement in the database.
+			mm := db.measurements[m.Name]
+			if mm == nil {
+				return nil, ErrMeasurementNotFound(m.String())
+			}
+
+			// Get the fields for this measurement.
+			for _, name := range mm.FieldNames() {
+				if _, ok := fieldSet[name]; ok {
+					continue
+				}
+				fieldSet[name] = struct{}{}
+				fields = append(fields, &influxql.Field{Expr: &influxql.VarRef{Val: name}})
+			}
+
+			// Get the dimensions for this measurement.
+			for _, t := range mm.TagKeys() {
+				if _, ok := dimensionSet[t]; ok {
+					continue
+				}
+				dimensionSet[t] = struct{}{}
+				dimensions = append(dimensions, &influxql.Dimension{Expr: &influxql.VarRef{Val: t}})
+			}
+		}
+	}
+
+	// Return a new SelectStatement with the wild cards rewritten.
+	return stmt.RewriteWildcards(fields, dimensions), nil
+}
+
+// expandSources expands regex sources and removes duplicates.
+// NOTE: sources must be normalized (db and rp set) before calling this function.
+func (q *QueryExecutor) expandSources(sources influxql.Sources) (influxql.Sources, error) {
+	// Use a map as a set to prevent duplicates. Two regexes might produce
+	// duplicates when expanded.
+	set := map[string]influxql.Source{}
+	names := []string{}
+
+	// Iterate all sources, expanding regexes when they're found.
+	for _, source := range sources {
+		switch src := source.(type) {
+		case *influxql.Measurement:
+			if src.Regex == nil {
+				name := src.String()
+				set[name] = src
+				names = append(names, name)
+				continue
+			}
+
+			// Lookup the database.
+			db := q.store.DatabaseIndex(src.Database)
+			if db == nil {
+				return nil, nil
+			}
+
+			// Get measurements from the database that match the regex.
+			measurements := db.measurementsByRegex(src.Regex.Val)
+
+			// Add those measurements to the set.
+			for _, m := range measurements {
+				m2 := &influxql.Measurement{
+					Database:        src.Database,
+					RetentionPolicy: src.RetentionPolicy,
+					Name:            m.Name,
+				}
+
+				name := m2.String()
+				if _, ok := set[name]; !ok {
+					set[name] = m2
+					names = append(names, name)
+				}
+			}
+
+		default:
+			return nil, fmt.Errorf("expandSources: unsuported source type: %T", source)
+		}
+	}
+
+	// Sort the list of source names.
+	sort.Strings(names)
+
+	// Convert set to a list of Sources.
+	expanded := make(influxql.Sources, 0, len(set))
+	for _, name := range names {
+		expanded = append(expanded, set[name])
+	}
+
+	return expanded, nil
+}
+
+// executeDropDatabaseStatement closes all local shards for the database and removes the directory. It then calls to the metastore to remove the database from there.
+// TODO: make this work in a cluster/distributed
+func (q *QueryExecutor) executeDropDatabaseStatement(stmt *influxql.DropDatabaseStatement) *influxql.Result {
+	dbi, err := q.MetaStore.Database(stmt.Name)
+	if err != nil {
+		return &influxql.Result{Err: err}
+	} else if dbi == nil {
+		return &influxql.Result{Err: ErrDatabaseNotFound(stmt.Name)}
+	}
+
+	var shardIDs []uint64
+	for _, rp := range dbi.RetentionPolicies {
+		for _, sg := range rp.ShardGroups {
+			for _, s := range sg.Shards {
+				shardIDs = append(shardIDs, s.ID)
+			}
+		}
+	}
+
+	err = q.store.DeleteDatabase(stmt.Name, shardIDs)
+	if err != nil {
+		return &influxql.Result{Err: err}
+	}
+
+	return q.MetaStatementExecutor.ExecuteStatement(stmt)
+}
+
+// executeDropMeasurementStatement removes the measurement and all series data from the local store for the given measurement
+func (q *QueryExecutor) executeDropMeasurementStatement(stmt *influxql.DropMeasurementStatement, database string) *influxql.Result {
+	// Find the database.
+	db := q.store.DatabaseIndex(database)
+	if db == nil {
+		return &influxql.Result{}
+	}
+
+	m := db.Measurement(stmt.Name)
+	if m == nil {
+		return &influxql.Result{Err: ErrMeasurementNotFound(stmt.Name)}
+	}
+
+	// first remove from the index
+	db.DropMeasurement(m.Name)
+
+	// now drop the raw data
+	if err := q.store.deleteMeasurement(m.Name, m.SeriesKeys()); err != nil {
+		return &influxql.Result{Err: err}
+	}
+
+	return &influxql.Result{}
+}
+
+// executeDropSeriesStatement removes all series from the local store that match the drop query
+func (q *QueryExecutor) executeDropSeriesStatement(stmt *influxql.DropSeriesStatement, database string) *influxql.Result {
+	// Find the database.
+	db := q.store.DatabaseIndex(database)
+	if db == nil {
+		return &influxql.Result{}
+	}
+
+	// Expand regex expressions in the FROM clause.
+	sources, err := q.expandSources(stmt.Sources)
+	if err != nil {
+		return &influxql.Result{Err: err}
+	}
+
+	measurements, err := measurementsFromSourcesOrDB(db, sources...)
+	if err != nil {
+		return &influxql.Result{Err: err}
+	}
+
+	var seriesKeys []string
+	for _, m := range measurements {
+		var ids seriesIDs
+		if stmt.Condition != nil {
+			// Get series IDs that match the WHERE clause.
+			ids, _, err = m.walkWhereForSeriesIds(stmt.Condition)
+			if err != nil {
+				return &influxql.Result{Err: err}
+			}
+		} else {
+			// No WHERE clause so get all series IDs for this measurement.
+			ids = m.seriesIDs
+		}
+
+		for _, id := range ids {
+			seriesKeys = append(seriesKeys, m.seriesByID[id].Key)
+		}
+	}
+
+	// delete the raw series data
+	if err := q.store.deleteSeries(seriesKeys); err != nil {
+		return &influxql.Result{Err: err}
+	}
+	// remove them from the index
+	db.DropSeries(seriesKeys)
+
+	return &influxql.Result{}
+}
+
+func (q *QueryExecutor) executeShowSeriesStatement(stmt *influxql.ShowSeriesStatement, database string) *influxql.Result {
+	// Find the database.
+	db := q.store.DatabaseIndex(database)
+	if db == nil {
+		return &influxql.Result{}
+	}
+
+	// Expand regex expressions in the FROM clause.
+	sources, err := q.expandSources(stmt.Sources)
+	if err != nil {
+		return &influxql.Result{Err: err}
+	}
+
+	// Get the list of measurements we're interested in.
+	measurements, err := measurementsFromSourcesOrDB(db, sources...)
+	if err != nil {
+		return &influxql.Result{Err: err}
+	}
+
+	// Create result struct that will be populated and returned.
+	result := &influxql.Result{
+		Series: make(influxql.Rows, 0, len(measurements)),
+	}
+
+	// Loop through measurements to build result. One result row / measurement.
+	for _, m := range measurements {
+		var ids seriesIDs
+
+		if stmt.Condition != nil {
+			// Get series IDs that match the WHERE clause.
+			ids, _, err = m.walkWhereForSeriesIds(stmt.Condition)
+			if err != nil {
+				return &influxql.Result{Err: err}
+			}
+
+			// If no series matched, then go to the next measurement.
+			if len(ids) == 0 {
+				continue
+			}
+
+			// TODO: check return of walkWhereForSeriesIds for fields
+		} else {
+			// No WHERE clause so get all series IDs for this measurement.
+			ids = m.seriesIDs
+		}
+
+		// Make a new row for this measurement.
+		r := &influxql.Row{
+			Name:    m.Name,
+			Columns: m.TagKeys(),
+		}
+
+		// Loop through series IDs getting matching tag sets.
+		for _, id := range ids {
+			if s, ok := m.seriesByID[id]; ok {
+				values := make([]interface{}, 0, len(r.Columns))
+
+				// make the series key the first value
+				values = append(values, s.Key)
+
+				for _, column := range r.Columns {
+					values = append(values, s.Tags[column])
+				}
+
+				// Add the tag values to the row.
+				r.Values = append(r.Values, values)
+			}
+		}
+		// make the id the first column
+		r.Columns = append([]string{"_key"}, r.Columns...)
+
+		// Append the row to the result.
+		result.Series = append(result.Series, r)
+	}
+
+	if stmt.Limit > 0 || stmt.Offset > 0 {
+		result.Series = q.filterShowSeriesResult(stmt.Limit, stmt.Offset, result.Series)
+	}
+
+	return result
+}
+
+// filterShowSeriesResult will limit the number of series returned based on the limit and the offset.
+// Unlike limit and offset on SELECT statements, the limit and offset don't apply to the number of Rows, but
+// to the number of total Values returned, since each Value represents a unique series.
+func (q *QueryExecutor) filterShowSeriesResult(limit, offset int, rows influxql.Rows) influxql.Rows {
+	var filteredSeries influxql.Rows
+	seriesCount := 0
+	for _, r := range rows {
+		var currentSeries [][]interface{}
+
+		// filter the values
+		for _, v := range r.Values {
+			if seriesCount >= offset && seriesCount-offset < limit {
+				currentSeries = append(currentSeries, v)
+			}
+			seriesCount++
+		}
+
+		// only add the row back in if there are some values in it
+		if len(currentSeries) > 0 {
+			r.Values = currentSeries
+			filteredSeries = append(filteredSeries, r)
+			if seriesCount > limit+offset {
+				return filteredSeries
+			}
+		}
+	}
+	return filteredSeries
+}
+
+func (q *QueryExecutor) executeShowMeasurementsStatement(stmt *influxql.ShowMeasurementsStatement, database string) *influxql.Result {
+	// Find the database.
+	db := q.store.DatabaseIndex(database)
+	if db == nil {
+		return &influxql.Result{}
+	}
+
+	var measurements Measurements
+
+	// If a WHERE clause was specified, filter the measurements.
+	if stmt.Condition != nil {
+		var err error
+		measurements, err = db.measurementsByExpr(stmt.Condition)
+		if err != nil {
+			return &influxql.Result{Err: err}
+		}
+	} else {
+		// Otherwise, get all measurements from the database.
+		measurements = db.Measurements()
+	}
+	sort.Sort(measurements)
+
+	offset := stmt.Offset
+	limit := stmt.Limit
+
+	// If OFFSET is past the end of the array, return empty results.
+	if offset > len(measurements)-1 {
+		return &influxql.Result{}
+	}
+
+	// Calculate last index based on LIMIT.
+	end := len(measurements)
+	if limit > 0 && offset+limit < end {
+		limit = offset + limit
+	} else {
+		limit = end
+	}
+
+	// Make a result row to hold all measurement names.
+	row := &influxql.Row{
+		Name:    "measurements",
+		Columns: []string{"name"},
+	}
+
+	// Add one value to the row for each measurement name.
+	for i := offset; i < limit; i++ {
+		m := measurements[i]
+		v := interface{}(m.Name)
+		row.Values = append(row.Values, []interface{}{v})
+	}
+
+	// Make a result.
+	result := &influxql.Result{
+		Series: []*influxql.Row{row},
+	}
+
+	return result
+}
+
+func (q *QueryExecutor) executeShowTagKeysStatement(stmt *influxql.ShowTagKeysStatement, database string) *influxql.Result {
+	// Find the database.
+	db := q.store.DatabaseIndex(database)
+	if db == nil {
+		return &influxql.Result{}
+	}
+
+	// Expand regex expressions in the FROM clause.
+	sources, err := q.expandSources(stmt.Sources)
+	if err != nil {
+		return &influxql.Result{Err: err}
+	}
+
+	// Get the list of measurements we're interested in.
+	measurements, err := measurementsFromSourcesOrDB(db, sources...)
+	if err != nil {
+		return &influxql.Result{Err: err}
+	}
+
+	// Make result.
+	result := &influxql.Result{
+		Series: make(influxql.Rows, 0, len(measurements)),
+	}
+
+	// Add one row per measurement to the result.
+	for _, m := range measurements {
+		// TODO: filter tag keys by stmt.Condition
+
+		// Get the tag keys in sorted order.
+		keys := m.TagKeys()
+
+		// Convert keys to an [][]interface{}.
+		values := make([][]interface{}, 0, len(m.seriesByTagKeyValue))
+		for _, k := range keys {
+			v := interface{}(k)
+			values = append(values, []interface{}{v})
+		}
+
+		// Make a result row for the measurement.
+		r := &influxql.Row{
+			Name:    m.Name,
+			Columns: []string{"tagKey"},
+			Values:  values,
+		}
+
+		result.Series = append(result.Series, r)
+	}
+
+	// TODO: LIMIT & OFFSET
+
+	return result
+}
+
+func (q *QueryExecutor) executeShowTagValuesStatement(stmt *influxql.ShowTagValuesStatement, database string) *influxql.Result {
+	// Find the database.
+	db := q.store.DatabaseIndex(database)
+	if db == nil {
+		return &influxql.Result{}
+	}
+
+	// Expand regex expressions in the FROM clause.
+	sources, err := q.expandSources(stmt.Sources)
+	if err != nil {
+		return &influxql.Result{Err: err}
+	}
+
+	// Get the list of measurements we're interested in.
+	measurements, err := measurementsFromSourcesOrDB(db, sources...)
+	if err != nil {
+		return &influxql.Result{Err: err}
+	}
+
+	// Make result.
+	result := &influxql.Result{
+		Series: make(influxql.Rows, 0),
+	}
+
+	tagValues := make(map[string]stringSet)
+	for _, m := range measurements {
+		var ids seriesIDs
+
+		if stmt.Condition != nil {
+			// Get series IDs that match the WHERE clause.
+			ids, _, err = m.walkWhereForSeriesIds(stmt.Condition)
+			if err != nil {
+				return &influxql.Result{Err: err}
+			}
+
+			// If no series matched, then go to the next measurement.
+			if len(ids) == 0 {
+				continue
+			}
+
+			// TODO: check return of walkWhereForSeriesIds for fields
+		} else {
+			// No WHERE clause so get all series IDs for this measurement.
+			ids = m.seriesIDs
+		}
+
+		for k, v := range m.tagValuesByKeyAndSeriesID(stmt.TagKeys, ids) {
+			_, ok := tagValues[k]
+			if !ok {
+				tagValues[k] = v
+			}
+			tagValues[k] = tagValues[k].union(v)
+		}
+	}
+
+	for k, v := range tagValues {
+		r := &influxql.Row{
+			Name:    k + "TagValues",
+			Columns: []string{k},
+		}
+
+		vals := v.list()
+		sort.Strings(vals)
+
+		for _, val := range vals {
+			v := interface{}(val)
+			r.Values = append(r.Values, []interface{}{v})
+		}
+
+		result.Series = append(result.Series, r)
+	}
+
+	sort.Sort(result.Series)
+	return result
+}
+
+func (q *QueryExecutor) executeShowFieldKeysStatement(stmt *influxql.ShowFieldKeysStatement, database string) *influxql.Result {
+	var err error
+
+	// Find the database.
+	db := q.store.DatabaseIndex(database)
+	if db == nil {
+		return &influxql.Result{}
+	}
+
+	// Expand regex expressions in the FROM clause.
+	sources, err := q.expandSources(stmt.Sources)
+	if err != nil {
+		return &influxql.Result{Err: err}
+	}
+
+	measurements, err := measurementsFromSourcesOrDB(db, sources...)
+	if err != nil {
+		return &influxql.Result{Err: err}
+	}
+
+	// Make result.
+	result := &influxql.Result{
+		Series: make(influxql.Rows, 0, len(measurements)),
+	}
+
+	// Loop through measurements, adding a result row for each.
+	for _, m := range measurements {
+		// Create a new row.
+		r := &influxql.Row{
+			Name:    m.Name,
+			Columns: []string{"fieldKey"},
+		}
+
+		// Get a list of field names from the measurement then sort them.
+		names := m.FieldNames()
+		sort.Strings(names)
+
+		// Add the field names to the result row values.
+		for _, n := range names {
+			v := interface{}(n)
+			r.Values = append(r.Values, []interface{}{v})
+		}
+
+		// Append the row to the result.
+		result.Series = append(result.Series, r)
+	}
+
+	return result
+}
+
+// measurementsFromSourcesOrDB returns a list of measurements from the
+// sources passed in or, if sources is empty, a list of all
+// measurement names from the database passed in.
+func measurementsFromSourcesOrDB(db *DatabaseIndex, sources ...influxql.Source) (Measurements, error) {
+	var measurements Measurements
+	if len(sources) > 0 {
+		for _, source := range sources {
+			if m, ok := source.(*influxql.Measurement); ok {
+				measurement := db.measurements[m.Name]
+				if measurement == nil {
+					return nil, ErrMeasurementNotFound(m.Name)
+				}
+
+				measurements = append(measurements, measurement)
+			} else {
+				return nil, errors.New("identifiers in FROM clause must be measurement names")
+			}
+		}
+	} else {
+		// No measurements specified in FROM clause so get all measurements that have series.
+		for _, m := range db.Measurements() {
+			if m.HasSeries() {
+				measurements = append(measurements, m)
+			}
+		}
+	}
+	sort.Sort(measurements)
+
+	return measurements, nil
+}
+
+// normalizeStatement adds a default database and policy to the measurements in statement.
+func (q *QueryExecutor) normalizeStatement(stmt influxql.Statement, defaultDatabase string) (err error) {
+	// Track prefixes for replacing field names.
+	prefixes := make(map[string]string)
+
+	// Qualify all measurements.
+	influxql.WalkFunc(stmt, func(n influxql.Node) {
+		if err != nil {
+			return
+		}
+		switch n := n.(type) {
+		case *influxql.Measurement:
+			e := q.normalizeMeasurement(n, defaultDatabase)
+			if e != nil {
+				err = e
+				return
+			}
+			prefixes[n.Name] = n.Name
+		}
+	})
+	if err != nil {
+		return err
+	}
+
+	// Replace all variable references that used measurement prefixes.
+	influxql.WalkFunc(stmt, func(n influxql.Node) {
+		switch n := n.(type) {
+		case *influxql.VarRef:
+			for k, v := range prefixes {
+				if strings.HasPrefix(n.Val, k+".") {
+					n.Val = v + "." + influxql.QuoteIdent(n.Val[len(k)+1:])
+				}
+			}
+		}
+	})
+
+	return
+}
+
+// normalizeMeasurement inserts the default database or policy into all measurement names,
+// if required.
+func (q *QueryExecutor) normalizeMeasurement(m *influxql.Measurement, defaultDatabase string) error {
+	if m.Name == "" && m.Regex == nil {
+		return errors.New("invalid measurement")
+	}
+
+	// Measurement does not have an explicit database? Insert default.
+	if m.Database == "" {
+		m.Database = defaultDatabase
+	}
+
+	// The database must now be specified by this point.
+	if m.Database == "" {
+		return errors.New("database name required")
+	}
+
+	// Find database.
+	di, err := q.MetaStore.Database(m.Database)
+	if err != nil {
+		return err
+	} else if di == nil {
+		return ErrDatabaseNotFound(m.Database)
+	}
+
+	// If no retention policy was specified, use the default.
+	if m.RetentionPolicy == "" {
+		if di.DefaultRetentionPolicy == "" {
+			return fmt.Errorf("default retention policy not set for: %s", di.Name)
+		}
+		m.RetentionPolicy = di.DefaultRetentionPolicy
+	}
+
+	return nil
+}
+
+func (q *QueryExecutor) executeShowDiagnosticsStatement(stmt *influxql.ShowDiagnosticsStatement) *influxql.Result {
+	return &influxql.Result{Err: fmt.Errorf("SHOW DIAGNOSTICS is not implemented yet")}
+}
+
+// ErrAuthorize represents an authorization error.
+type ErrAuthorize struct {
+	q        *QueryExecutor
+	query    *influxql.Query
+	user     string
+	database string
+	message  string
+}
+
+const authErrLogFmt string = "unauthorized request | user: %q | query: %q | database %q\n"
+
+// newAuthorizationError returns a new instance of AuthorizationError.
+func NewErrAuthorize(qe *QueryExecutor, q *influxql.Query, u, db, m string) *ErrAuthorize {
+	return &ErrAuthorize{q: qe, query: q, user: u, database: db, message: m}
+}
+
+// Error returns the text of the error.
+func (e ErrAuthorize) Error() string {
+	e.q.Logger.Printf(authErrLogFmt, e.user, e.query.String(), e.database)
+	if e.user == "" {
+		return fmt.Sprint(e.message)
+	}
+	return fmt.Sprintf("%s not authorized to execute %s", e.user, e.message)
+}
+
+var (
+	// ErrInvalidQuery is returned when executing an unknown query type.
+	ErrInvalidQuery = errors.New("invalid query")
+
+	// ErrNotExecuted is returned when a statement is not executed in a query.
+	// This can occur when a previous statement in the same query has errored.
+	ErrNotExecuted = errors.New("not executed")
+)
+
+func ErrDatabaseNotFound(name string) error { return fmt.Errorf("database not found: %s", name) }
+
+func ErrMeasurementNotFound(name string) error { return fmt.Errorf("measurement not found: %s", name) }
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/shard.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/shard.go
new file mode 100644
index 00000000000..2f7570ae218
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/shard.go
@@ -0,0 +1,1218 @@
+package tsdb
+
+import (
+	"bytes"
+	"encoding/binary"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"hash/fnv"
+	"io"
+	"log"
+	"math"
+	"os"
+	"sort"
+	"sync"
+	"time"
+
+	"github.com/influxdb/influxdb/influxql"
+	"github.com/influxdb/influxdb/tsdb/internal"
+
+	"github.com/boltdb/bolt"
+	"github.com/gogo/protobuf/proto"
+)
+
+var (
+	// ErrFieldOverflow is returned when too many fields are created on a measurement.
+	ErrFieldOverflow = errors.New("field overflow")
+
+	// ErrFieldTypeConflict is returned when a new field already exists with a different type.
+	ErrFieldTypeConflict = errors.New("field type conflict")
+
+	// ErrFieldNotFound is returned when a field cannot be found.
+	ErrFieldNotFound = errors.New("field not found")
+
+	// ErrFieldUnmappedID is returned when the system is presented, during decode, with a field ID
+	// there is no mapping for.
+	ErrFieldUnmappedID = errors.New("field ID not mapped")
+
+	// ErrWALPartitionNotFound is returns when flushing a WAL partition that
+	// does not exist.
+	ErrWALPartitionNotFound = errors.New("wal partition not found")
+)
+
+// topLevelBucketN is the number of non-series buckets in the bolt db.
+const topLevelBucketN = 3
+
+// Shard represents a self-contained time series database. An inverted index of
+// the measurement and tag data is kept along with the raw time series data.
+// Data can be split across many shards. The query engine in TSDB is responsible
+// for combining the output of many shards into a single query result.
+type Shard struct {
+	db    *bolt.DB // underlying data store
+	index *DatabaseIndex
+	path  string
+	cache map[uint8]map[string][][]byte // values by <wal partition,series>
+
+	walSize    int           // approximate size of the WAL, in bytes
+	flush      chan struct{} // signals background flush
+	flushTimer *time.Timer   // signals time-based flush
+
+	mu                sync.RWMutex
+	measurementFields map[string]*measurementFields // measurement name to their fields
+
+	// These coordinate closing and waiting for running goroutines.
+	wg      sync.WaitGroup
+	closing chan struct{}
+
+	// Used for out-of-band error messages.
+	logger *log.Logger
+
+	// The maximum size and time thresholds for flushing the WAL.
+	MaxWALSize             int
+	WALFlushInterval       time.Duration
+	WALPartitionFlushDelay time.Duration
+
+	// The writer used by the logger.
+	LogOutput io.Writer
+}
+
+// NewShard returns a new initialized Shard
+func NewShard(index *DatabaseIndex, path string) *Shard {
+	s := &Shard{
+		index:             index,
+		path:              path,
+		flush:             make(chan struct{}, 1),
+		measurementFields: make(map[string]*measurementFields),
+
+		MaxWALSize:             DefaultMaxWALSize,
+		WALFlushInterval:       DefaultWALFlushInterval,
+		WALPartitionFlushDelay: DefaultWALPartitionFlushDelay,
+
+		LogOutput: os.Stderr,
+	}
+
+	// Initialize all partitions of the cache.
+	s.cache = make(map[uint8]map[string][][]byte)
+	for i := uint8(0); i < WALPartitionN; i++ {
+		s.cache[i] = make(map[string][][]byte)
+	}
+
+	return s
+}
+
+// Path returns the path set on the shard when it was created.
+func (s *Shard) Path() string { return s.path }
+
+// open initializes and opens the shard's store.
+func (s *Shard) Open() error {
+	if err := func() error {
+		s.mu.Lock()
+		defer s.mu.Unlock()
+
+		// Return if the shard is already open
+		if s.db != nil {
+			return nil
+		}
+
+		// Open store on shard.
+		store, err := bolt.Open(s.path, 0666, &bolt.Options{Timeout: 1 * time.Second})
+		if err != nil {
+			return err
+		}
+		s.db = store
+
+		// Initialize store.
+		if err := s.db.Update(func(tx *bolt.Tx) error {
+			_, _ = tx.CreateBucketIfNotExists([]byte("series"))
+			_, _ = tx.CreateBucketIfNotExists([]byte("fields"))
+			_, _ = tx.CreateBucketIfNotExists([]byte("wal"))
+
+			return nil
+		}); err != nil {
+			return fmt.Errorf("init: %s", err)
+		}
+
+		if err := s.loadMetadataIndex(); err != nil {
+			return fmt.Errorf("load metadata index: %s", err)
+		}
+
+		// Initialize logger.
+		s.logger = log.New(s.LogOutput, "[shard] ", log.LstdFlags)
+
+		// Start flush interval timer.
+		s.flushTimer = time.NewTimer(s.WALFlushInterval)
+
+		// Start background goroutines.
+		s.wg.Add(1)
+		s.closing = make(chan struct{})
+		go s.autoflusher(s.closing)
+
+		return nil
+	}(); err != nil {
+		s.close()
+		return err
+	}
+
+	// Flush on-disk WAL before we return to the caller.
+	if err := s.Flush(0); err != nil {
+		return fmt.Errorf("flush: %s", err)
+	}
+
+	return nil
+}
+
+// Close shuts down the shard's store.
+func (s *Shard) Close() error {
+	s.mu.Lock()
+	err := s.close()
+	s.mu.Unlock()
+
+	// Wait for open goroutines to finish.
+	s.wg.Wait()
+
+	return err
+}
+
+func (s *Shard) close() error {
+	if s.db != nil {
+		s.db.Close()
+	}
+	if s.closing != nil {
+		close(s.closing)
+		s.closing = nil
+	}
+	return nil
+}
+
+// TODO: this is temporarily exported to make tx.go work. When the query engine gets refactored
+// into the tsdb package this should be removed. No one outside tsdb should know the underlying store.
+func (s *Shard) DB() *bolt.DB {
+	return s.db
+}
+
+// TODO: this is temporarily exported to make tx.go work. When the query engine gets refactored
+// into the tsdb package this should be removed. No one outside tsdb should know the underlying field encoding scheme.
+func (s *Shard) FieldCodec(measurementName string) *FieldCodec {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	m := s.measurementFields[measurementName]
+	if m == nil {
+		return nil
+	}
+	return m.codec
+}
+
+// struct to hold information for a field to create on a measurement
+type fieldCreate struct {
+	measurement string
+	field       *field
+}
+
+// struct to hold information for a series to create
+type seriesCreate struct {
+	measurement string
+	series      *Series
+}
+
+// WritePoints will write the raw data points and any new metadata to the index in the shard
+func (s *Shard) WritePoints(points []Point) error {
+	seriesToCreate, fieldsToCreate, err := s.validateSeriesAndFields(points)
+	if err != nil {
+		return err
+	}
+
+	// add any new series to the in-memory index
+	if len(seriesToCreate) > 0 {
+		s.index.mu.Lock()
+		for _, ss := range seriesToCreate {
+			s.index.createSeriesIndexIfNotExists(ss.measurement, ss.series)
+		}
+		s.index.mu.Unlock()
+	}
+
+	// add any new fields and keep track of what needs to be saved
+	measurementFieldsToSave, err := s.createFieldsAndMeasurements(fieldsToCreate)
+	if err != nil {
+		return err
+	}
+
+	// make sure all data is encoded before attempting to save to bolt
+	for _, p := range points {
+		// marshal the raw data if it hasn't been marshaled already
+		if p.Data() == nil {
+			// this was populated earlier, don't need to validate that it's there.
+			s.mu.RLock()
+			mf := s.measurementFields[p.Name()]
+			s.mu.RUnlock()
+
+			// If a measurement is dropped while writes for it are in progress, this could be nil
+			if mf == nil {
+				return ErrFieldNotFound
+			}
+
+			data, err := mf.codec.EncodeFields(p.Fields())
+			if err != nil {
+				return err
+			}
+			p.SetData(data)
+		}
+	}
+
+	// save to the underlying bolt instance
+	if err := s.db.Update(func(tx *bolt.Tx) error {
+		// save any new metadata
+		if len(seriesToCreate) > 0 {
+			b := tx.Bucket([]byte("series"))
+			for _, sc := range seriesToCreate {
+				data, err := sc.series.MarshalBinary()
+				if err != nil {
+					return err
+				}
+				if err := b.Put([]byte(sc.series.Key), data); err != nil {
+					return err
+				}
+			}
+		}
+		if len(measurementFieldsToSave) > 0 {
+			b := tx.Bucket([]byte("fields"))
+			for name, m := range measurementFieldsToSave {
+				data, err := m.MarshalBinary()
+				if err != nil {
+					return err
+				}
+				if err := b.Put([]byte(name), data); err != nil {
+					return err
+				}
+			}
+		}
+
+		// Write points to WAL bucket.
+		wal := tx.Bucket([]byte("wal"))
+		for _, p := range points {
+			// Retrieve partition bucket.
+			key := p.Key()
+			b, err := wal.CreateBucketIfNotExists([]byte{WALPartition(key)})
+			if err != nil {
+				return fmt.Errorf("create WAL partition bucket: %s", err)
+			}
+
+			// Generate an autoincrementing index for the WAL partition.
+			id, _ := b.NextSequence()
+
+			// Append points sequentially to the WAL bucket.
+			v := marshalWALEntry(key, p.UnixNano(), p.Data())
+			if err := b.Put(u64tob(id), v); err != nil {
+				return fmt.Errorf("put wal: %s", err)
+			}
+		}
+
+		return nil
+	}); err != nil {
+		return err
+	}
+
+	// If successful then save points to in-memory cache.
+	if err := func() error {
+		s.mu.Lock()
+		defer s.mu.Unlock()
+
+		// tracks which in-memory caches need to be resorted
+		resorts := map[uint8]map[string]struct{}{}
+
+		for _, p := range points {
+			// Generate in-memory cache entry of <timestamp,data>.
+			key, data := p.Key(), p.Data()
+			v := make([]byte, 8+len(data))
+			binary.BigEndian.PutUint64(v[0:8], uint64(p.UnixNano()))
+			copy(v[8:], data)
+
+			// Determine if we are appending.
+			partitionID := WALPartition(key)
+			a := s.cache[partitionID][string(key)]
+			appending := (len(a) == 0 || bytes.Compare(a[len(a)-1], v) == -1)
+
+			// Append to cache list.
+			a = append(a, v)
+
+			// If not appending, keep track of cache lists that need to be resorted.
+			if !appending {
+				series := resorts[partitionID]
+				if series == nil {
+					series = map[string]struct{}{}
+					resorts[partitionID] = series
+				}
+				series[string(key)] = struct{}{}
+			}
+
+			s.cache[partitionID][string(key)] = a
+
+			// Calculate estimated WAL size.
+			s.walSize += len(key) + len(v)
+		}
+
+		// Sort by timestamp if not appending.
+		for partitionID, cache := range resorts {
+			for key, _ := range cache {
+				sort.Sort(byteSlices(s.cache[partitionID][key]))
+			}
+		}
+
+		// Check for flush threshold.
+		s.triggerAutoFlush()
+
+		return nil
+	}(); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// Flush writes all points from the write ahead log to the index.
+func (s *Shard) Flush(partitionFlushDelay time.Duration) error {
+	// Retrieve a list of WAL buckets.
+	var partitionIDs []uint8
+	if err := s.db.View(func(tx *bolt.Tx) error {
+		return tx.Bucket([]byte("wal")).ForEach(func(key, _ []byte) error {
+			partitionIDs = append(partitionIDs, uint8(key[0]))
+			return nil
+		})
+	}); err != nil {
+		return err
+	}
+
+	// Continue flushing until there are no more partition buckets.
+	for _, partitionID := range partitionIDs {
+		if err := s.FlushPartition(partitionID); err != nil {
+			return fmt.Errorf("flush partition: id=%d, err=%s", partitionID, err)
+		}
+
+		// Wait momentarily so other threads can process.
+		time.Sleep(partitionFlushDelay)
+	}
+
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	// Reset WAL size.
+	s.walSize = 0
+
+	// Reset the timer.
+	s.flushTimer.Reset(s.WALFlushInterval)
+
+	return nil
+}
+
+// FlushPartition flushes a single WAL partition.
+func (s *Shard) FlushPartition(partitionID uint8) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	startTime := time.Now()
+
+	var pointN int
+	if err := s.db.Update(func(tx *bolt.Tx) error {
+		// Retrieve partition bucket. Exit if it doesn't exist.
+		pb := tx.Bucket([]byte("wal")).Bucket([]byte{byte(partitionID)})
+		if pb == nil {
+			return ErrWALPartitionNotFound
+		}
+
+		// Iterate over keys in the WAL partition bucket.
+		c := pb.Cursor()
+		for k, v := c.First(); k != nil; k, v = c.Next() {
+			key, timestamp, data := unmarshalWALEntry(v)
+
+			// Create bucket for entry.
+			b, err := tx.CreateBucketIfNotExists(key)
+			if err != nil {
+				return fmt.Errorf("create bucket: %s", err)
+			}
+
+			// Write point to bucket.
+			if err := b.Put(u64tob(uint64(timestamp)), data); err != nil {
+				return fmt.Errorf("put: %s", err)
+			}
+
+			// Remove entry in the WAL.
+			if err := c.Delete(); err != nil {
+				return fmt.Errorf("delete: %s", err)
+			}
+
+			pointN++
+		}
+
+		return nil
+	}); err != nil {
+		return err
+	}
+
+	// Reset cache.
+	s.cache[partitionID] = make(map[string][][]byte)
+
+	if pointN > 0 {
+		s.logger.Printf("flush %d points in %.3fs", pointN, time.Since(startTime).Seconds())
+	}
+
+	return nil
+}
+
+// autoflusher waits for notification of a flush and kicks it off in the background.
+// This method runs in a separate goroutine.
+func (s *Shard) autoflusher(closing chan struct{}) {
+	defer s.wg.Done()
+
+	for {
+		// Wait for close or flush signal.
+		select {
+		case <-closing:
+			return
+		case <-s.flushTimer.C:
+			if err := s.Flush(s.WALPartitionFlushDelay); err != nil {
+				s.logger.Printf("flush error: %s", err)
+			}
+		case <-s.flush:
+			if err := s.Flush(s.WALPartitionFlushDelay); err != nil {
+				s.logger.Printf("flush error: %s", err)
+			}
+		}
+	}
+}
+
+// triggerAutoFlush signals that a flush should occur if the size is above the threshold.
+// This function must be called within the context of a lock.
+func (s *Shard) triggerAutoFlush() {
+	// Ignore if we haven't reached the threshold.
+	if s.walSize < s.MaxWALSize {
+		return
+	}
+
+	// Otherwise send a non-blocking signal.
+	select {
+	case s.flush <- struct{}{}:
+	default:
+	}
+}
+
+func (s *Shard) ValidateAggregateFieldsInStatement(measurementName string, stmt *influxql.SelectStatement) error {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+
+	validateType := func(aname, fname string, t influxql.DataType) error {
+		if t != influxql.Float && t != influxql.Integer {
+			return fmt.Errorf("aggregate '%s' requires numerical field values. Field '%s' is of type %s",
+				aname, fname, t)
+		}
+		return nil
+	}
+
+	m := s.measurementFields[measurementName]
+	if m == nil {
+		return fmt.Errorf("measurement not found: %s", measurementName)
+	}
+
+	// If a numerical aggregate is requested, ensure it is only performed on numeric data or on a
+	// nested aggregate on numeric data.
+	for _, a := range stmt.FunctionCalls() {
+		// Check for fields like `derivative(mean(value), 1d)`
+		var nested *influxql.Call = a
+		if fn, ok := nested.Args[0].(*influxql.Call); ok {
+			nested = fn
+		}
+
+		switch lit := nested.Args[0].(type) {
+		case *influxql.VarRef:
+			if influxql.IsNumeric(nested) {
+				f := m.Fields[lit.Val]
+				if err := validateType(a.Name, f.Name, f.Type); err != nil {
+					return err
+				}
+			}
+		case *influxql.Distinct:
+			if nested.Name != "count" {
+				return fmt.Errorf("aggregate call didn't contain a field %s", a.String())
+			}
+			if influxql.IsNumeric(nested) {
+				f := m.Fields[lit.Val]
+				if err := validateType(a.Name, f.Name, f.Type); err != nil {
+					return err
+				}
+			}
+		default:
+			return fmt.Errorf("aggregate call didn't contain a field %s", a.String())
+		}
+	}
+
+	return nil
+}
+
+// deleteSeries deletes the buckets and the metadata for the given series keys
+func (s *Shard) deleteSeries(keys []string) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	if err := s.db.Update(func(tx *bolt.Tx) error {
+		b := tx.Bucket([]byte("series"))
+		for _, k := range keys {
+			if err := b.Delete([]byte(k)); err != nil {
+				return err
+			}
+			if err := tx.DeleteBucket([]byte(k)); err != nil && err != bolt.ErrBucketNotFound {
+				return err
+			}
+			delete(s.cache[WALPartition([]byte(k))], k)
+		}
+		return nil
+	}); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// deleteMeasurement deletes the measurement field encoding information and all underlying series from the shard
+func (s *Shard) deleteMeasurement(name string, seriesKeys []string) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	if err := s.db.Update(func(tx *bolt.Tx) error {
+		bm := tx.Bucket([]byte("fields"))
+		if err := bm.Delete([]byte(name)); err != nil {
+			return err
+		}
+		b := tx.Bucket([]byte("series"))
+		for _, k := range seriesKeys {
+			if err := b.Delete([]byte(k)); err != nil {
+				return err
+			}
+			if err := tx.DeleteBucket([]byte(k)); err != nil && err != bolt.ErrBucketNotFound {
+				return err
+			}
+			delete(s.cache[WALPartition([]byte(k))], k)
+		}
+
+		return nil
+	}); err != nil {
+		return err
+	}
+
+	// Remove entry from shard index.
+	delete(s.measurementFields, name)
+	return nil
+}
+
+func (s *Shard) createFieldsAndMeasurements(fieldsToCreate []*fieldCreate) (map[string]*measurementFields, error) {
+	if len(fieldsToCreate) == 0 {
+		return nil, nil
+	}
+
+	s.index.mu.Lock()
+	s.mu.Lock()
+	defer s.index.mu.Unlock()
+	defer s.mu.Unlock()
+
+	// add fields
+	measurementsToSave := make(map[string]*measurementFields)
+	for _, f := range fieldsToCreate {
+
+		m := s.measurementFields[f.measurement]
+		if m == nil {
+			m = measurementsToSave[f.measurement]
+			if m == nil {
+				m = &measurementFields{Fields: make(map[string]*field)}
+			}
+			s.measurementFields[f.measurement] = m
+		}
+
+		measurementsToSave[f.measurement] = m
+
+		// add the field to the in memory index
+		if err := m.createFieldIfNotExists(f.field.Name, f.field.Type); err != nil {
+			return nil, err
+		}
+
+		// ensure the measurement is in the index and the field is there
+		measurement := s.index.createMeasurementIndexIfNotExists(f.measurement)
+		measurement.fieldNames[f.field.Name] = struct{}{}
+	}
+
+	return measurementsToSave, nil
+}
+
+// validateSeriesAndFields checks which series and fields are new and whose metadata should be saved and indexed
+func (s *Shard) validateSeriesAndFields(points []Point) ([]*seriesCreate, []*fieldCreate, error) {
+	var seriesToCreate []*seriesCreate
+	var fieldsToCreate []*fieldCreate
+
+	// get the mutex for the in memory index, which is shared across shards
+	s.index.mu.RLock()
+	defer s.index.mu.RUnlock()
+
+	// get the shard mutex for locally defined fields
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+
+	for _, p := range points {
+		// see if the series should be added to the index
+		if ss := s.index.series[string(p.Key())]; ss == nil {
+			series := &Series{Key: string(p.Key()), Tags: p.Tags()}
+			seriesToCreate = append(seriesToCreate, &seriesCreate{p.Name(), series})
+		}
+
+		// see if the field definitions need to be saved to the shard
+		mf := s.measurementFields[p.Name()]
+		if mf == nil {
+			for name, value := range p.Fields() {
+				fieldsToCreate = append(fieldsToCreate, &fieldCreate{p.Name(), &field{Name: name, Type: influxql.InspectDataType(value)}})
+			}
+			continue // skip validation since all fields are new
+		}
+
+		// validate field types and encode data
+		for name, value := range p.Fields() {
+			if f := mf.Fields[name]; f != nil {
+				// Field present in shard metadata, make sure there is no type conflict.
+				if f.Type != influxql.InspectDataType(value) {
+					return nil, nil, fmt.Errorf("field type conflict: input field \"%s\" on measurement \"%s\" is type %T, already exists as type %s", name, p.Name(), value, f.Type)
+				}
+
+				continue // Field is present, and it's of the same type. Nothing more to do.
+			}
+
+			fieldsToCreate = append(fieldsToCreate, &fieldCreate{p.Name(), &field{Name: name, Type: influxql.InspectDataType(value)}})
+		}
+	}
+
+	return seriesToCreate, fieldsToCreate, nil
+}
+
+// loadsMetadataIndex loads the shard metadata into memory. This should only be called by Open
+func (s *Shard) loadMetadataIndex() error {
+	return s.db.View(func(tx *bolt.Tx) error {
+		s.index.mu.Lock()
+		defer s.index.mu.Unlock()
+
+		// load measurement metadata
+		meta := tx.Bucket([]byte("fields"))
+		c := meta.Cursor()
+		for k, v := c.First(); k != nil; k, v = c.Next() {
+			m := s.index.createMeasurementIndexIfNotExists(string(k))
+			mf := &measurementFields{}
+			if err := mf.UnmarshalBinary(v); err != nil {
+				return err
+			}
+			for name, _ := range mf.Fields {
+				m.fieldNames[name] = struct{}{}
+			}
+			mf.codec = newFieldCodec(mf.Fields)
+			s.measurementFields[m.Name] = mf
+		}
+
+		// load series metadata
+		meta = tx.Bucket([]byte("series"))
+		c = meta.Cursor()
+		for k, v := c.First(); k != nil; k, v = c.Next() {
+			series := &Series{}
+			if err := series.UnmarshalBinary(v); err != nil {
+				return err
+			}
+			s.index.createSeriesIndexIfNotExists(measurementFromSeriesKey(string(k)), series)
+		}
+		return nil
+	})
+}
+
+// SeriesCount returns the number of series buckets on the shard.
+// This does not include a count from the WAL.
+func (s *Shard) SeriesCount() (n int, err error) {
+	err = s.db.View(func(tx *bolt.Tx) error {
+		return tx.ForEach(func(_ []byte, _ *bolt.Bucket) error {
+			n++
+			return nil
+		})
+	})
+
+	// Remove top-level buckets.
+	n -= topLevelBucketN
+
+	return
+}
+
+type measurementFields struct {
+	Fields map[string]*field `json:"fields"`
+	codec  *FieldCodec
+}
+
+// MarshalBinary encodes the object to a binary format.
+func (m *measurementFields) MarshalBinary() ([]byte, error) {
+	var pb internal.MeasurementFields
+	for _, f := range m.Fields {
+		id := int32(f.ID)
+		name := f.Name
+		t := int32(f.Type)
+		pb.Fields = append(pb.Fields, &internal.Field{ID: &id, Name: &name, Type: &t})
+	}
+	return proto.Marshal(&pb)
+}
+
+// UnmarshalBinary decodes the object from a binary format.
+func (m *measurementFields) UnmarshalBinary(buf []byte) error {
+	var pb internal.MeasurementFields
+	if err := proto.Unmarshal(buf, &pb); err != nil {
+		return err
+	}
+	m.Fields = make(map[string]*field)
+	for _, f := range pb.Fields {
+		m.Fields[f.GetName()] = &field{ID: uint8(f.GetID()), Name: f.GetName(), Type: influxql.DataType(f.GetType())}
+	}
+	return nil
+}
+
+// createFieldIfNotExists creates a new field with an autoincrementing ID.
+// Returns an error if 255 fields have already been created on the measurement or
+// the fields already exists with a different type.
+func (m *measurementFields) createFieldIfNotExists(name string, typ influxql.DataType) error {
+	// Ignore if the field already exists.
+	if f := m.Fields[name]; f != nil {
+		if f.Type != typ {
+			return ErrFieldTypeConflict
+		}
+		return nil
+	}
+
+	// Only 255 fields are allowed. If we go over that then return an error.
+	if len(m.Fields)+1 > math.MaxUint8 {
+		return ErrFieldOverflow
+	}
+
+	// Create and append a new field.
+	f := &field{
+		ID:   uint8(len(m.Fields) + 1),
+		Name: name,
+		Type: typ,
+	}
+	m.Fields[name] = f
+	m.codec = newFieldCodec(m.Fields)
+
+	return nil
+}
+
+// Field represents a series field.
+type field struct {
+	ID   uint8             `json:"id,omitempty"`
+	Name string            `json:"name,omitempty"`
+	Type influxql.DataType `json:"type,omitempty"`
+}
+
+// FieldCodec provides encoding and decoding functionality for the fields of a given
+// Measurement. It is a distinct type to avoid locking writes on this node while
+// potentially long-running queries are executing.
+//
+// It is not affected by changes to the Measurement object after codec creation.
+// TODO: this shouldn't be exported. nothing outside the shard should know about field encodings.
+//       However, this is here until tx.go and the engine get refactored into tsdb.
+type FieldCodec struct {
+	fieldsByID   map[uint8]*field
+	fieldsByName map[string]*field
+}
+
+// NewFieldCodec returns a FieldCodec for the given Measurement. Must be called with
+// a RLock that protects the Measurement.
+func newFieldCodec(fields map[string]*field) *FieldCodec {
+	fieldsByID := make(map[uint8]*field, len(fields))
+	fieldsByName := make(map[string]*field, len(fields))
+	for _, f := range fields {
+		fieldsByID[f.ID] = f
+		fieldsByName[f.Name] = f
+	}
+	return &FieldCodec{fieldsByID: fieldsByID, fieldsByName: fieldsByName}
+}
+
+// EncodeFields converts a map of values with string keys to a byte slice of field
+// IDs and values.
+//
+// If a field exists in the codec, but its type is different, an error is returned. If
+// a field is not present in the codec, the system panics.
+func (f *FieldCodec) EncodeFields(values map[string]interface{}) ([]byte, error) {
+	// Allocate byte slice
+	b := make([]byte, 0, 10)
+
+	for k, v := range values {
+		field := f.fieldsByName[k]
+		if field == nil {
+			panic(fmt.Sprintf("field does not exist for %s", k))
+		} else if influxql.InspectDataType(v) != field.Type {
+			return nil, fmt.Errorf("field \"%s\" is type %T, mapped as type %s", k, v, field.Type)
+		}
+
+		var buf []byte
+
+		switch field.Type {
+		case influxql.Float:
+			value := v.(float64)
+			buf = make([]byte, 9)
+			binary.BigEndian.PutUint64(buf[1:9], math.Float64bits(value))
+		case influxql.Integer:
+			var value uint64
+			switch v.(type) {
+			case int:
+				value = uint64(v.(int))
+			case int32:
+				value = uint64(v.(int32))
+			case int64:
+				value = uint64(v.(int64))
+			default:
+				panic(fmt.Sprintf("invalid integer type: %T", v))
+			}
+			buf = make([]byte, 9)
+			binary.BigEndian.PutUint64(buf[1:9], value)
+		case influxql.Boolean:
+			value := v.(bool)
+
+			// Only 1 byte need for a boolean.
+			buf = make([]byte, 2)
+			if value {
+				buf[1] = byte(1)
+			}
+		case influxql.String:
+			value := v.(string)
+			if len(value) > maxStringLength {
+				value = value[:maxStringLength]
+			}
+			// Make a buffer for field ID (1 bytes), the string length (2 bytes), and the string.
+			buf = make([]byte, len(value)+3)
+
+			// Set the string length, then copy the string itself.
+			binary.BigEndian.PutUint16(buf[1:3], uint16(len(value)))
+			for i, c := range []byte(value) {
+				buf[i+3] = byte(c)
+			}
+		default:
+			panic(fmt.Sprintf("unsupported value type during encode fields: %T", v))
+		}
+
+		// Always set the field ID as the leading byte.
+		buf[0] = field.ID
+
+		// Append temp buffer to the end.
+		b = append(b, buf...)
+	}
+
+	return b, nil
+}
+
+// TODO: this shouldn't be exported. remove when tx.go and engine.go get refactored into tsdb
+func (f *FieldCodec) FieldIDByName(s string) (uint8, error) {
+	fi := f.fieldsByName[s]
+	if fi == nil {
+		return 0, ErrFieldNotFound
+	}
+	return fi.ID, nil
+}
+
+// DecodeFields decodes a byte slice into a set of field ids and values.
+func (f *FieldCodec) DecodeFields(b []byte) (map[uint8]interface{}, error) {
+	if len(b) == 0 {
+		return nil, nil
+	}
+
+	// Create a map to hold the decoded data.
+	values := make(map[uint8]interface{}, 0)
+
+	for {
+		if len(b) < 1 {
+			// No more bytes.
+			break
+		}
+
+		// First byte is the field identifier.
+		fieldID := b[0]
+		field := f.fieldsByID[fieldID]
+		if field == nil {
+			// See note in DecodeByID() regarding field-mapping failures.
+			return nil, ErrFieldUnmappedID
+		}
+
+		var value interface{}
+		switch field.Type {
+		case influxql.Float:
+			value = math.Float64frombits(binary.BigEndian.Uint64(b[1:9]))
+			// Move bytes forward.
+			b = b[9:]
+		case influxql.Integer:
+			value = int64(binary.BigEndian.Uint64(b[1:9]))
+			// Move bytes forward.
+			b = b[9:]
+		case influxql.Boolean:
+			if b[1] == 1 {
+				value = true
+			} else {
+				value = false
+			}
+			// Move bytes forward.
+			b = b[2:]
+		case influxql.String:
+			size := binary.BigEndian.Uint16(b[1:3])
+			value = string(b[3 : size+3])
+			// Move bytes forward.
+			b = b[size+3:]
+		default:
+			panic(fmt.Sprintf("unsupported value type during decode fields: %T", f.fieldsByID[fieldID]))
+		}
+
+		values[fieldID] = value
+
+	}
+
+	return values, nil
+}
+
+// DecodeFieldsWithNames decodes a byte slice into a set of field names and values
+// TODO: shouldn't be exported. refactor engine
+func (f *FieldCodec) DecodeFieldsWithNames(b []byte) (map[string]interface{}, error) {
+	fields, err := f.DecodeFields(b)
+	if err != nil {
+		return nil, err
+	}
+	m := make(map[string]interface{})
+	for id, v := range fields {
+		field := f.fieldsByID[id]
+		if field != nil {
+			m[field.Name] = v
+		}
+	}
+	return m, nil
+}
+
+// DecodeByID scans a byte slice for a field with the given ID, converts it to its
+// expected type, and return that value.
+// TODO: shouldn't be exported. refactor engine
+func (f *FieldCodec) DecodeByID(targetID uint8, b []byte) (interface{}, error) {
+	if len(b) == 0 {
+		return 0, ErrFieldNotFound
+	}
+
+	for {
+		if len(b) < 1 {
+			// No more bytes.
+			break
+		}
+		field, ok := f.fieldsByID[b[0]]
+		if !ok {
+			// This can happen, though is very unlikely. If this node receives encoded data, to be written
+			// to disk, and is queried for that data before its metastore is updated, there will be no field
+			// mapping for the data during decode. All this can happen because data is encoded by the node
+			// that first received the write request, not the node that actually writes the data to disk.
+			// So if this happens, the read must be aborted.
+			return 0, ErrFieldUnmappedID
+		}
+
+		var value interface{}
+		switch field.Type {
+		case influxql.Float:
+			// Move bytes forward.
+			value = math.Float64frombits(binary.BigEndian.Uint64(b[1:9]))
+			b = b[9:]
+		case influxql.Integer:
+			value = int64(binary.BigEndian.Uint64(b[1:9]))
+			b = b[9:]
+		case influxql.Boolean:
+			if b[1] == 1 {
+				value = true
+			} else {
+				value = false
+			}
+			// Move bytes forward.
+			b = b[2:]
+		case influxql.String:
+			size := binary.BigEndian.Uint16(b[1:3])
+			value = string(b[3 : 3+size])
+			// Move bytes forward.
+			b = b[size+3:]
+		default:
+			panic(fmt.Sprintf("unsupported value type during decode by id: %T", field.Type))
+		}
+
+		if field.ID == targetID {
+			return value, nil
+		}
+	}
+
+	return 0, ErrFieldNotFound
+}
+
+// DecodeByName scans a byte slice for a field with the given name, converts it to its
+// expected type, and return that value.
+func (f *FieldCodec) DecodeByName(name string, b []byte) (interface{}, error) {
+	if fi := f.fieldByName(name); fi == nil {
+		return 0, ErrFieldNotFound
+	} else {
+		return f.DecodeByID(fi.ID, b)
+	}
+}
+
+// FieldByName returns the field by its name. It will return a nil if not found
+func (f *FieldCodec) fieldByName(name string) *field {
+	return f.fieldsByName[name]
+}
+
+// mustMarshal encodes a value to JSON.
+// This will panic if an error occurs. This should only be used internally when
+// an invalid marshal will cause corruption and a panic is appropriate.
+func mustMarshalJSON(v interface{}) []byte {
+	b, err := json.Marshal(v)
+	if err != nil {
+		panic("marshal: " + err.Error())
+	}
+	return b
+}
+
+// mustUnmarshalJSON decodes a value from JSON.
+// This will panic if an error occurs. This should only be used internally when
+// an invalid unmarshal will cause corruption and a panic is appropriate.
+func mustUnmarshalJSON(b []byte, v interface{}) {
+	if err := json.Unmarshal(b, v); err != nil {
+		panic("unmarshal: " + err.Error())
+	}
+}
+
+// u64tob converts a uint64 into an 8-byte slice.
+func u64tob(v uint64) []byte {
+	b := make([]byte, 8)
+	binary.BigEndian.PutUint64(b, v)
+	return b
+}
+
+// marshalWALEntry encodes point data into a single byte slice.
+//
+// The format of the byte slice is:
+//
+//     uint64 timestamp
+//     uint32 key length
+//     []byte key
+//     []byte data
+//
+func marshalWALEntry(key []byte, timestamp int64, data []byte) []byte {
+	v := make([]byte, 8+4, 8+4+len(key)+len(data))
+	binary.BigEndian.PutUint64(v[0:8], uint64(timestamp))
+	binary.BigEndian.PutUint32(v[8:12], uint32(len(key)))
+	v = append(v, key...)
+	v = append(v, data...)
+	return v
+}
+
+// unmarshalWALEntry decodes a WAL entry into it's separate parts.
+// Returned byte slices point to the original slice.
+func unmarshalWALEntry(v []byte) (key []byte, timestamp int64, data []byte) {
+	keyLen := binary.BigEndian.Uint32(v[8:12])
+	key = v[12 : 12+keyLen]
+	timestamp = int64(binary.BigEndian.Uint64(v[0:8]))
+	data = v[12+keyLen:]
+	return
+}
+
+// marshalCacheEntry encodes the timestamp and data to a single byte slice.
+//
+// The format of the byte slice is:
+//
+//     uint64 timestamp
+//     []byte data
+//
+func marshalCacheEntry(timestamp int64, data []byte) []byte {
+	buf := make([]byte, 8, 8+len(data))
+	binary.BigEndian.PutUint64(buf[0:8], uint64(timestamp))
+	return append(buf, data...)
+}
+
+// unmarshalCacheEntry returns the timestamp and data from an encoded byte slice.
+func unmarshalCacheEntry(buf []byte) (timestamp int64, data []byte) {
+	timestamp = int64(binary.BigEndian.Uint64(buf[0:8]))
+	data = buf[8:]
+	return
+}
+
+// byteSlices represents a sortable slice of byte slices.
+type byteSlices [][]byte
+
+func (a byteSlices) Len() int           { return len(a) }
+func (a byteSlices) Less(i, j int) bool { return bytes.Compare(a[i], a[j]) == -1 }
+func (a byteSlices) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+
+// shardCursor provides ordered iteration across a Bolt bucket and shard cache.
+type shardCursor struct {
+	// Bolt cursor and readahead buffer.
+	cursor *bolt.Cursor
+	buf    struct {
+		key, value []byte
+	}
+
+	// Cache and current cache index.
+	cache [][]byte
+	index int
+}
+
+// Seek moves the cursor to a position and returns the closest key/value pair.
+func (sc *shardCursor) Seek(seek []byte) (key, value []byte) {
+	// Seek bolt cursor.
+	if sc.cursor != nil {
+		sc.buf.key, sc.buf.value = sc.cursor.Seek(seek)
+	}
+
+	// Seek cache index.
+	sc.index = sort.Search(len(sc.cache), func(i int) bool {
+		return bytes.Compare(sc.cache[i][0:8], seek) != -1
+	})
+
+	return sc.read()
+}
+
+// Next returns the next key/value pair from the cursor.
+func (sc *shardCursor) Next() (key, value []byte) {
+	// Read next bolt key/value if not bufferred.
+	if sc.buf.key == nil && sc.cursor != nil {
+		sc.buf.key, sc.buf.value = sc.cursor.Next()
+	}
+
+	return sc.read()
+}
+
+// read returns the next key/value in the cursor buffer or cache.
+func (sc *shardCursor) read() (key, value []byte) {
+	// If neither a buffer or cache exists then return nil.
+	if sc.buf.key == nil && sc.index >= len(sc.cache) {
+		return nil, nil
+	}
+
+	// Use the buffer if it exists and there's no cache or if it is lower than the cache.
+	if sc.buf.key != nil && (sc.index >= len(sc.cache) || bytes.Compare(sc.buf.key, sc.cache[sc.index][0:8]) == -1) {
+		key, value = sc.buf.key, sc.buf.value
+		sc.buf.key, sc.buf.value = nil, nil
+		return
+	}
+
+	// Otherwise read from the cache.
+	// Continue skipping ahead through duplicate keys in the cache list.
+	for {
+		// Read the current cache key/value pair.
+		key, value = sc.cache[sc.index][0:8], sc.cache[sc.index][8:]
+		sc.index++
+
+		// Exit loop if we're at the end of the cache or the next key is different.
+		if sc.index >= len(sc.cache) || !bytes.Equal(key, sc.cache[sc.index][0:8]) {
+			break
+		}
+	}
+
+	return
+}
+
+// WALPartitionN is the number of partitions in the write ahead log.
+const WALPartitionN = 8
+
+// WALPartition returns the partition number that key belongs to.
+func WALPartition(key []byte) uint8 {
+	h := fnv.New64a()
+	h.Write(key)
+	return uint8(h.Sum64() % WALPartitionN)
+}
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/snapshot_writer.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/snapshot_writer.go
new file mode 100644
index 00000000000..785ca13908c
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/snapshot_writer.go
@@ -0,0 +1,124 @@
+package tsdb
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"time"
+
+	"github.com/boltdb/bolt"
+	"github.com/influxdb/influxdb/snapshot"
+)
+
+// NewSnapshotWriter returns a new snapshot.Writer that will write
+// metadata and the store's shards to an archive.
+func NewSnapshotWriter(meta []byte, store *Store) (*snapshot.Writer, error) {
+	// Create snapshot writer.
+	sw := snapshot.NewWriter()
+	if err := func() error {
+		// Create meta file.
+		f := &snapshot.File{
+			Name:    "meta",
+			Size:    int64(len(meta)),
+			ModTime: time.Now(),
+		}
+		sw.Manifest.Files = append(sw.Manifest.Files, *f)
+		sw.FileWriters[f.Name] = NopWriteToCloser(bytes.NewReader(meta))
+
+		// Create files for each shard.
+		if err := appendShardSnapshotFiles(sw, store); err != nil {
+			return fmt.Errorf("create shard snapshot files: %s", err)
+		}
+
+		return nil
+	}(); err != nil {
+		_ = sw.Close()
+		return nil, err
+	}
+
+	return sw, nil
+}
+
+// appendShardSnapshotFiles adds snapshot files for each shard in the store.
+func appendShardSnapshotFiles(sw *snapshot.Writer, store *Store) error {
+	// Calculate absolute path of store to use for relative shard paths.
+	storePath, err := filepath.Abs(store.Path())
+	if err != nil {
+		return fmt.Errorf("store abs path: %s", err)
+	}
+
+	// Create files for each shard.
+	for _, shardID := range store.ShardIDs() {
+		// Retrieve shard.
+		sh := store.Shard(shardID)
+		if sh == nil {
+			return fmt.Errorf("shard not found: %d", shardID)
+		}
+
+		// Calculate relative path from store.
+		shardPath, err := filepath.Abs(sh.Path())
+		if err != nil {
+			return fmt.Errorf("shard abs path: %s", err)
+		}
+		name, err := filepath.Rel(storePath, shardPath)
+		if err != nil {
+			return fmt.Errorf("shard rel path: %s", err)
+		}
+
+		if err := appendShardSnapshotFile(sw, sh, name); err != nil {
+			return fmt.Errorf("append shard: name=%s, err=%s", name, err)
+		}
+	}
+
+	return nil
+}
+
+func appendShardSnapshotFile(sw *snapshot.Writer, sh *Shard, name string) error {
+	// Stat the underlying data file to retrieve last modified date.
+	fi, err := os.Stat(sh.Path())
+	if err != nil {
+		return fmt.Errorf("stat shard data file: %s", err)
+	}
+
+	// Begin transaction.
+	tx, err := sh.db.Begin(false)
+	if err != nil {
+		return fmt.Errorf("begin: %s", err)
+	}
+
+	// Create file.
+	f := snapshot.File{
+		Name:    name,
+		Size:    tx.Size(),
+		ModTime: fi.ModTime(),
+	}
+
+	// Append to snapshot writer.
+	sw.Manifest.Files = append(sw.Manifest.Files, f)
+	sw.FileWriters[f.Name] = &boltTxCloser{tx}
+	return nil
+}
+
+// boltTxCloser wraps a Bolt transaction to implement io.Closer.
+type boltTxCloser struct {
+	*bolt.Tx
+}
+
+// Close rolls back the transaction.
+func (tx *boltTxCloser) Close() error { return tx.Rollback() }
+
+// NopWriteToCloser returns an io.WriterTo that implements io.Closer.
+func NopWriteToCloser(w io.WriterTo) interface {
+	io.WriterTo
+	io.Closer
+} {
+	return &nopWriteToCloser{w}
+}
+
+type nopWriteToCloser struct {
+	io.WriterTo
+}
+
+func (w *nopWriteToCloser) Close() error { return nil }
diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/store.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/store.go
new file mode 100644
index 00000000000..787b5073399
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/store.go
@@ -0,0 +1,343 @@
+package tsdb
+
+import (
+	"fmt"
+	"io/ioutil"
+	"log"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/influxdb/influxdb/influxql"
+)
+
+func NewStore(path string) *Store {
+	return &Store{
+		path:                   path,
+		MaxWALSize:             DefaultMaxWALSize,
+		WALFlushInterval:       DefaultWALFlushInterval,
+		WALPartitionFlushDelay: DefaultWALPartitionFlushDelay,
+		Logger:                 log.New(os.Stderr, "[store] ", log.LstdFlags),
+	}
+}
+
+var (
+	ErrShardNotFound = fmt.Errorf("shard not found")
+)
+
+type Store struct {
+	mu   sync.RWMutex
+	path string
+
+	databaseIndexes map[string]*DatabaseIndex
+	shards          map[uint64]*Shard
+
+	MaxWALSize             int
+	WALFlushInterval       time.Duration
+	WALPartitionFlushDelay time.Duration
+
+	Logger *log.Logger
+}
+
+// Path returns the store's root path.
+func (s *Store) Path() string { return s.path }
+
+func (s *Store) CreateShard(database, retentionPolicy string, shardID uint64) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	// shard already exists
+	if _, ok := s.shards[shardID]; ok {
+		return nil
+	}
+
+	// created the db and retention policy dirs if they don't exist
+	if err := os.MkdirAll(filepath.Join(s.path, database, retentionPolicy), 0700); err != nil {
+		return err
+	}
+
+	// create the database index if it does not exist
+	db, ok := s.databaseIndexes[database]
+	if !ok {
+		db = NewDatabaseIndex()
+		s.databaseIndexes[database] = db
+	}
+
+	shardPath := filepath.Join(s.path, database, retentionPolicy, strconv.FormatUint(shardID, 10))
+	shard := s.newShard(db, shardPath)
+	if err := shard.Open(); err != nil {
+		return err
+	}
+
+	s.shards[shardID] = shard
+
+	return nil
+}
+
+// DeleteShard removes a shard from disk.
+func (s *Store) DeleteShard(shardID uint64) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	// ensure shard exists
+	sh, ok := s.shards[shardID]
+	if !ok {
+		return nil
+	}
+
+	if err := sh.Close(); err != nil {
+		return err
+	}
+
+	if err := os.Remove(sh.path); err != nil {
+		return err
+	}
+
+	delete(s.shards, shardID)
+
+	return nil
+}
+
+// newShard returns a shard and copies configuration settings from the store.
+func (s *Store) newShard(index *DatabaseIndex, path string) *Shard {
+	sh := NewShard(index, path)
+	sh.MaxWALSize = s.MaxWALSize
+	sh.WALFlushInterval = s.WALFlushInterval
+	sh.WALPartitionFlushDelay = s.WALPartitionFlushDelay
+	return sh
+}
+
+// DeleteDatabase will close all shards associated with a database and remove the directory and files from disk.
+func (s *Store) DeleteDatabase(name string, shardIDs []uint64) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	for _, id := range shardIDs {
+		shard := s.shards[id]
+		if shard != nil {
+			shard.Close()
+		}
+	}
+	if err := os.RemoveAll(filepath.Join(s.path, name)); err != nil {
+		return err
+	}
+	delete(s.databaseIndexes, name)
+	return nil
+}
+
+func (s *Store) Shard(shardID uint64) *Shard {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	return s.shards[shardID]
+}
+
+// ShardIDs returns a slice of all ShardIDs under management.
+func (s *Store) ShardIDs() []uint64 {
+	ids := make([]uint64, 0, len(s.shards))
+	for i, _ := range s.shards {
+		ids = append(ids, i)
+	}
+	return ids
+}
+
+func (s *Store) ValidateAggregateFieldsInStatement(shardID uint64, measurementName string, stmt *influxql.SelectStatement) error {
+	s.mu.RLock()
+	shard := s.shards[shardID]
+	s.mu.RUnlock()
+	if shard == nil {
+		return ErrShardNotFound
+	}
+	return shard.ValidateAggregateFieldsInStatement(measurementName, stmt)
+}
+
+func (s *Store) DatabaseIndex(name string) *DatabaseIndex {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	return s.databaseIndexes[name]
+}
+
+func (s *Store) Measurement(database, name string) *Measurement {
+	s.mu.RLock()
+	db := s.databaseIndexes[database]
+	s.mu.RUnlock()
+	if db == nil {
+		return nil
+	}
+	return db.Measurement(name)
+}
+
+// deleteSeries loops through the local shards and deletes the series data and metadata for the passed in series keys
+func (s *Store) deleteSeries(keys []string) error {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	for _, sh := range s.shards {
+		if err := sh.deleteSeries(keys); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// deleteMeasurement loops through the local shards and removes the measurement field encodings from each shard
+func (s *Store) deleteMeasurement(name string, seriesKeys []string) error {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	for _, sh := range s.shards {
+		if err := sh.deleteMeasurement(name, seriesKeys); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (s *Store) loadIndexes() error {
+	dbs, err := ioutil.ReadDir(s.path)
+	if err != nil {
+		return err
+	}
+	for _, db := range dbs {
+		if !db.IsDir() {
+			s.Logger.Printf("Skipping database dir: %s. Not a directory", db.Name())
+			continue
+		}
+		s.databaseIndexes[db.Name()] = NewDatabaseIndex()
+	}
+	return nil
+}
+
+func (s *Store) loadShards() error {
+	// loop through the current database indexes
+	for db := range s.databaseIndexes {
+		rps, err := ioutil.ReadDir(filepath.Join(s.path, db))
+		if err != nil {
+			return err
+		}
+
+		for _, rp := range rps {
+			// retention policies should be directories.  Skip anything that is not a dir.
+			if !rp.IsDir() {
+				s.Logger.Printf("Skipping retention policy dir: %s. Not a directory", rp.Name())
+				continue
+			}
+
+			shards, err := ioutil.ReadDir(filepath.Join(s.path, db, rp.Name()))
+			if err != nil {
+				return err
+			}
+			for _, sh := range shards {
+				path := filepath.Join(s.path, db, rp.Name(), sh.Name())
+
+				// Shard file names are numeric shardIDs
+				shardID, err := strconv.ParseUint(sh.Name(), 10, 64)
+				if err != nil {
+					s.Logger.Printf("Skipping shard: %s. Not a valid path", rp.Name())
+					continue
+				}
+
+				shard := s.newShard(s.databaseIndexes[db], path)
+				shard.Open()
+				s.shards[shardID] = shard
+			}
+		}
+	}
+	return nil
+
+}
+
+func (s *Store) Open() error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	s.shards = map[uint64]*Shard{}
+	s.databaseIndexes = map[string]*DatabaseIndex{}
+
+	// Create directory.
+	if err := os.MkdirAll(s.path, 0777); err != nil {
+		return err
+	}
+
+	// TODO: Start AE for Node
+	if err := s.loadIndexes(); err != nil {
+		return err
+	}
+
+	if err := s.loadShards(); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func (s *Store) WriteToShard(shardID uint64, points []Point) error {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	sh, ok := s.shards[shardID]
+	if !ok {
+		return ErrShardNotFound
+	}
+
+	return sh.WritePoints(points)
+}
+
+// Flush forces all shards to write their WAL data to the index.
+func (s *Store) Flush() error {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	for shardID, sh := range s.shards {
+		if err := sh.Flush(s.WALPartitionFlushDelay); err != nil {
+			return fmt.Errorf("flush: shard=%d, err=%s", shardID, err)
+		}
+	}
+	return nil
+}
+
+func (s *Store) CreateMapper(shardID uint64, query string, chunkSize int) (Mapper, error) {
+	q, err := influxql.NewParser(strings.NewReader(query)).ParseStatement()
+	if err != nil {
+		return nil, err
+	}
+	stmt, ok := q.(*influxql.SelectStatement)
+	if !ok {
+		return nil, fmt.Errorf("query is not a SELECT statement: %s", err.Error())
+	}
+
+	shard := s.Shard(shardID)
+	if shard == nil {
+		// This can happen if the shard has been assigned, but hasn't actually been created yet.
+		return nil, nil
+	}
+
+	if (stmt.IsRawQuery && !stmt.HasDistinct()) || stmt.IsSimpleDerivative() {
+		return NewRawMapper(shard, stmt, chunkSize), nil
+	}
+	return NewAggMapper(shard, stmt), nil
+}
+
+func (s *Store) Close() error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	for _, sh := range s.shards {
+		if err := sh.Close(); err != nil {
+			return err
+		}
+	}
+	s.shards = nil
+	s.databaseIndexes = nil
+
+	return nil
+}
+
+// IsRetryable returns true if this error is temporary and could be retried
+func IsRetryable(err error) bool {
+	if err == nil {
+		return true
+	}
+
+	if strings.Contains(err.Error(), "field type conflict") {
+		return false
+	}
+	return true
+}
diff --git a/cluster/addons/cluster-monitoring/google/heapster-controller.yaml b/cluster/addons/cluster-monitoring/google/heapster-controller.yaml
index 215d805cce0..562459b41a9 100644
--- a/cluster/addons/cluster-monitoring/google/heapster-controller.yaml
+++ b/cluster/addons/cluster-monitoring/google/heapster-controller.yaml
@@ -7,26 +7,26 @@
 apiVersion: v1
 kind: ReplicationController
 metadata:
-  name: heapster-v12
+  name: heapster-v13
   namespace: kube-system
   labels:
     k8s-app: heapster
-    version: v12
+    version: v13
     kubernetes.io/cluster-service: "true"
 spec:
   replicas: 1
   selector:
     k8s-app: heapster
-    version: v12
+    version: v13
   template:
     metadata:
       labels:
         k8s-app: heapster
-        version: v12
+        version: v13
         kubernetes.io/cluster-service: "true"
     spec:
       containers:
-        - image: gcr.io/google_containers/heapster:v0.18.5
+        - image: gcr.io/google_containers/heapster:v0.20.0-alpha2
           name: heapster
           resources:
             # keep request = limit to keep this container in guaranteed class
@@ -40,10 +40,24 @@ spec:
             - /heapster
             - --source=kubernetes:''
             - --sink=gcm
-            - --sink=gcmautoscaling
+          volumeMounts:
+            - name: ssl-certs
+              mountPath: /etc/ssl/certs
+              readOnly: true
+        - image: gcr.io/google_containers/heapster:v0.20.0-alpha2
+          name: eventer
+          resources:
+            # keep request = limit to keep this container in guaranteed class
+            limits:
+              cpu: 100m
+              memory: {{ heapster_memory }}
+            requests:
+              cpu: 100m
+              memory: {{ heapster_memory }}
+          command:
+            - /eventer
+            - --source=kubernetes:''
             - --sink=gcl
-            - --stats_resolution=30s
-            - --sink_frequency=1m
           volumeMounts:
             - name: ssl-certs
               mountPath: /etc/ssl/certs
diff --git a/cluster/addons/cluster-monitoring/googleinfluxdb/heapster-controller-combined.yaml b/cluster/addons/cluster-monitoring/googleinfluxdb/heapster-controller-combined.yaml
index b6e31c405d6..9581723b3bd 100644
--- a/cluster/addons/cluster-monitoring/googleinfluxdb/heapster-controller-combined.yaml
+++ b/cluster/addons/cluster-monitoring/googleinfluxdb/heapster-controller-combined.yaml
@@ -7,26 +7,26 @@
 apiVersion: v1
 kind: ReplicationController
 metadata:
-  name: heapster-v12
+  name: heapster-v13
   namespace: kube-system
   labels:
     k8s-app: heapster
-    version: v12
+    version: v13
     kubernetes.io/cluster-service: "true"
 spec:
   replicas: 1
   selector:
     k8s-app: heapster
-    version: v12
+    version: v13
   template:
     metadata:
       labels:
         k8s-app: heapster
-        version: v12
+        version: v13
         kubernetes.io/cluster-service: "true"
     spec:
       containers:
-        - image: gcr.io/google_containers/heapster:v0.18.5
+        - image: gcr.io/google_containers/heapster:v0.20.0-alpha2
           name: heapster
           resources:
             # keep request = limit to keep this container in guaranteed class
@@ -39,11 +39,25 @@ spec:
           command:
             - /heapster
             - --source=kubernetes:''
-            - --sink=gcl
-            - --sink=gcmautoscaling
             - --sink=influxdb:http://monitoring-influxdb:8086
-            - --stats_resolution=30s
-            - --sink_frequency=1m
+          volumeMounts:
+            - name: ssl-certs
+              mountPath: /etc/ssl/certs
+              readOnly: true
+        - image: gcr.io/google_containers/heapster:v0.20.0-alpha2
+          name: eventer
+          resources:
+            # keep request = limit to keep this container in guaranteed class
+            limits:
+              cpu: 100m
+              memory: {{ heapster_memory }}
+            requests:
+              cpu: 100m
+              memory: {{ heapster_memory }}
+          command:
+            - /eventer
+            - --source=kubernetes:''
+            - --sink=gcl
           volumeMounts:
             - name: ssl-certs
               mountPath: /etc/ssl/certs
diff --git a/cluster/addons/cluster-monitoring/influxdb/heapster-controller.yaml b/cluster/addons/cluster-monitoring/influxdb/heapster-controller.yaml
index ee84caff14b..bf9d4c4b44c 100644
--- a/cluster/addons/cluster-monitoring/influxdb/heapster-controller.yaml
+++ b/cluster/addons/cluster-monitoring/influxdb/heapster-controller.yaml
@@ -7,26 +7,26 @@
 apiVersion: v1
 kind: ReplicationController
 metadata:
-  name: heapster-v12
+  name: heapster-v13
   namespace: kube-system
   labels:
     k8s-app: heapster
-    version: v12
+    version: v13
     kubernetes.io/cluster-service: "true"
 spec:
   replicas: 1
   selector:
     k8s-app: heapster
-    version: v12
+    version: v13
   template:
     metadata:
       labels:
         k8s-app: heapster
-        version: v12
+        version: v13
         kubernetes.io/cluster-service: "true"
     spec:
       containers:
-        - image: gcr.io/google_containers/heapster:v0.18.5
+        - image: gcr.io/google_containers/heapster:v0.20.0-alpha2
           name: heapster
           resources:
             # keep request = limit to keep this container in guaranteed class
@@ -40,5 +40,17 @@ spec:
             - /heapster
             - --source=kubernetes:''
             - --sink=influxdb:http://monitoring-influxdb:8086
-            - --stats_resolution=30s
-            - --sink_frequency=1m
+        - image: gcr.io/google_containers/heapster:v0.20.0-alpha2
+          name: eventer
+          resources:
+            # keep request = limit to keep this container in guaranteed class
+            limits:
+              cpu: 100m
+              memory: {{ heapster_memory }}
+            requests:
+              cpu: 100m
+              memory: {{ heapster_memory }}
+          command:
+            - /eventer
+            - --source=kubernetes:''
+            - --sink=influxdb:http://monitoring-influxdb:8086
diff --git a/cluster/addons/cluster-monitoring/influxdb/influxdb-grafana-controller.yaml b/cluster/addons/cluster-monitoring/influxdb/influxdb-grafana-controller.yaml
index f62e83ac3c3..b7281ae3e7f 100644
--- a/cluster/addons/cluster-monitoring/influxdb/influxdb-grafana-controller.yaml
+++ b/cluster/addons/cluster-monitoring/influxdb/influxdb-grafana-controller.yaml
@@ -1,26 +1,26 @@
 apiVersion: v1
 kind: ReplicationController
 metadata:
-  name: monitoring-influxdb-grafana-v2
+  name: monitoring-influxdb-grafana-v3
   namespace: kube-system
   labels: 
     k8s-app: influxGrafana
-    version: v2
+    version: v3
     kubernetes.io/cluster-service: "true"
 spec: 
   replicas: 1
   selector: 
     k8s-app: influxGrafana
-    version: v2
+    version: v3
   template: 
     metadata: 
       labels: 
         k8s-app: influxGrafana
-        version: v2
+        version: v3
         kubernetes.io/cluster-service: "true"
     spec: 
       containers: 
-        - image: gcr.io/google_containers/heapster_influxdb:v0.4
+        - image: gcr.io/google_containers/heapster_influxdb:v0.5
           name: influxdb
           resources:
             # keep request = limit to keep this container in guaranteed class
@@ -68,7 +68,6 @@ spec:
           volumeMounts:
           - name: grafana-persistent-storage
             mountPath: /var
-              
       volumes:
       - name: influxdb-persistent-storage
         emptyDir: {}
diff --git a/cluster/addons/cluster-monitoring/standalone/heapster-controller.yaml b/cluster/addons/cluster-monitoring/standalone/heapster-controller.yaml
index 2d49587c152..43b637ede5a 100644
--- a/cluster/addons/cluster-monitoring/standalone/heapster-controller.yaml
+++ b/cluster/addons/cluster-monitoring/standalone/heapster-controller.yaml
@@ -7,26 +7,26 @@
 apiVersion: v1
 kind: ReplicationController
 metadata:
-  name: heapster-v12
+  name: heapster-v13
   namespace: kube-system
   labels:
     k8s-app: heapster
-    version: v12
+    version: v13
     kubernetes.io/cluster-service: "true"
 spec:
   replicas: 1
   selector:
     k8s-app: heapster
-    version: v12
+    version: v13
   template:
     metadata:
       labels:
         k8s-app: heapster
-        version: v12
+        version: v13
         kubernetes.io/cluster-service: "true"
     spec:
       containers:
-        - image: gcr.io/google_containers/heapster:v0.18.5
+        - image: gcr.io/google_containers/heapster:v0.20.0-alpha2
           name: heapster
           resources:
             # keep request = limit to keep this container in guaranteed class
diff --git a/hack/jenkins/e2e.sh b/hack/jenkins/e2e.sh
index d10b5155cd8..c8dfb695701 100755
--- a/hack/jenkins/e2e.sh
+++ b/hack/jenkins/e2e.sh
@@ -358,8 +358,6 @@ case ${JOB_NAME} in
     : ${PROJECT:="k8s-jnks-e2e-gce-autoscaling"}
     : ${FAIL_ON_GCP_RESOURCE_LEAK:="true"}
     : ${ENABLE_DEPLOYMENTS:=true}
-    # Override GCE default for cluster size autoscaling purposes.
-    ENABLE_CLUSTER_MONITORING="googleinfluxdb"
     ADMISSION_CONTROL="NamespaceLifecycle,InitialResources,LimitRanger,SecurityContextDeny,ServiceAccount,ResourceQuota"
     ;;
 
diff --git a/plugin/pkg/admission/initialresources/influxdb.go b/plugin/pkg/admission/initialresources/influxdb.go
index 3c6b7564007..d72f16fcb0c 100644
--- a/plugin/pkg/admission/initialresources/influxdb.go
+++ b/plugin/pkg/admission/initialresources/influxdb.go
@@ -21,7 +21,6 @@ import (
 	"strings"
 	"time"
 
-	"github.com/golang/glog"
 	influxdb "github.com/influxdb/influxdb/client"
 	"k8s.io/kubernetes/pkg/api"
 )
@@ -35,81 +34,15 @@ const (
 )
 
 // TODO(piosz): rewrite this once we will migrate into InfluxDB v0.9.
-type influxdbSource struct {
-	conf *influxdb.ClientConfig
-}
+type influxdbSource struct{}
 
 func newInfluxdbSource(host, user, password, db string) (dataSource, error) {
-	conf := &influxdb.ClientConfig{
-		Host:     host,
-		Username: user,
-		Password: password,
-		Database: db,
-	}
-	source := &influxdbSource{
-		conf: conf,
-	}
-	go source.ensureAutoscalingSeriesExist()
-	return source, nil
+	return &influxdbSource{}, nil
 }
 
-func ensureSeriesExists(conn *influxdb.Client, existingQueries *influxdb.Series, seriesName, contQuery string) error {
-	queryExists := false
-	for _, p := range existingQueries.GetPoints() {
-		id := p[1].(float64)
-		query := p[2].(string)
-		if strings.Contains(query, "into "+seriesName) {
-			if query != contQuery {
-				if _, err := conn.Query(fmt.Sprintf("drop continuous query %v", id), influxdb.Second); err != nil {
-					return err
-				}
-			} else {
-				queryExists = true
-			}
-		}
-	}
-	if !queryExists {
-		if _, err := conn.Query("drop series "+seriesName, influxdb.Second); err != nil {
-			return err
-		}
-		if _, err := conn.Query(contQuery, influxdb.Second); err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-func (s *influxdbSource) ensureAutoscalingSeriesExist() {
-	for {
-		time.Sleep(30 * time.Second)
-		client, err := influxdb.NewClient(s.conf)
-		if err != nil {
-			glog.Errorf("Error while trying to create InfluxDB client: %v", err)
-			continue
-		}
-		series, err := client.Query("list continuous queries", influxdb.Second)
-		if err != nil {
-			glog.Errorf("Error while trying to list continuous queries: %v", err)
-			continue
-		}
-		if err := ensureSeriesExists(client, series[0], cpuSeriesName, cpuContinuousQuery); err != nil {
-			glog.Errorf("Error while trying to create create autoscaling series: %v", err)
-			continue
-		}
-		if err := ensureSeriesExists(client, series[0], memSeriesName, memContinuousQuery); err != nil {
-			glog.Errorf("Error while trying to create create autoscaling series: %v", err)
-			continue
-		}
-		break
-	}
-}
-
-func (s *influxdbSource) query(query string, precision ...influxdb.TimePrecision) ([]*influxdb.Series, error) {
-	client, err := influxdb.NewClient(s.conf)
-	if err != nil {
-		return nil, err
-	}
-	return client.Query(query, precision...)
+func (s *influxdbSource) query(query string) ([]*influxdb.Response, error) {
+	// TODO(piosz): add support again
+	return nil, fmt.Errorf("temporary not supported; see #18826 for more details")
 }
 
 func (s *influxdbSource) GetUsagePercentile(kind api.ResourceName, perc int64, image, namespace string, exactMatch bool, start, end time.Time) (int64, int64, error) {
@@ -133,26 +66,8 @@ func (s *influxdbSource) GetUsagePercentile(kind api.ResourceName, perc int64, i
 	}
 
 	query := fmt.Sprintf("select percentile(value, %v), count(pod_id) from %v where container_base_image%v%v and time > '%v' and time < '%v'", perc, series, imgPattern, namespaceCond, start.UTC().Format(timeFormat), end.UTC().Format(timeFormat))
-	var res []*influxdb.Series
-	var err error
-	if res, err = s.query(query, influxdb.Second); err != nil {
+	if _, err := s.query(query); err != nil {
 		return 0, 0, fmt.Errorf("Error while trying to query InfluxDB: %v", err)
 	}
-
-	// TODO(pszczesniak): fix issue with dropped data base
-	if len(res) == 0 {
-		return 0, 0, fmt.Errorf("Missing data in series %v in InfluxDB", series)
-	}
-	points := res[0].GetPoints()
-	if len(points) == 0 {
-		return 0, 0, fmt.Errorf("Missing data in series %v in InfluxDB", series)
-	}
-	p := points[0]
-	usage := p[1].(float64)
-	count := p[2].(float64)
-	if kind == api.ResourceCPU {
-		// convert from ns to millicores
-		usage = usage / 1000000
-	}
-	return int64(usage), int64(count), nil
+	return 0, 0, nil
 }
diff --git a/test/e2e/initial_resources.go b/test/e2e/initial_resources.go
index a23b3e7f67a..f250db69708 100644
--- a/test/e2e/initial_resources.go
+++ b/test/e2e/initial_resources.go
@@ -20,7 +20,6 @@ import (
 	"fmt"
 	"time"
 
-	influxdb "github.com/influxdb/influxdb/client"
 	. "github.com/onsi/ginkgo"
 	. "github.com/onsi/gomega"
 	"k8s.io/kubernetes/pkg/api"
@@ -34,14 +33,7 @@ var _ = Describe("Initial Resources [Feature:InitialResources] [Flaky]", func()
 	f := NewFramework("initial-resources")
 
 	It("should set initial resources based on historical data", func() {
-		// Cleanup data in InfluxDB that left from previous tests.
-		influxdbClient, err := getInfluxdbClient(f.Client)
-		expectNoError(err, "failed to create influxdb client")
-		_, err = influxdbClient.Query("drop series autoscaling.cpu.usage.2m", influxdb.Second)
-		expectNoError(err)
-		_, err = influxdbClient.Query("drop series autoscaling.memory.usage.2m", influxdb.Second)
-		expectNoError(err)
-
+		// TODO(piosz): Add cleanup data in InfluxDB that left from previous tests.
 		cpu := 100
 		mem := 200
 		for i := 0; i < 10; i++ {
diff --git a/test/e2e/monitoring.go b/test/e2e/monitoring.go
index ae93b9f5e0a..43f49531383 100644
--- a/test/e2e/monitoring.go
+++ b/test/e2e/monitoring.go
@@ -17,6 +17,8 @@ limitations under the License.
 package e2e
 
 import (
+	"bytes"
+	"encoding/json"
 	"fmt"
 	"net/url"
 	"time"
@@ -49,10 +51,8 @@ var _ = Describe("[Flaky] Monitoring", func() {
 const (
 	influxdbService      = "monitoring-influxdb"
 	influxdbDatabaseName = "k8s"
-	influxdbUser         = "root"
-	influxdbPW           = "root"
-	podlistQuery         = "select distinct(pod_id) from \"cpu/usage_ns_cumulative\""
-	nodelistQuery        = "select distinct(hostname) from \"cpu/usage_ns_cumulative\""
+	podlistQuery         = "show tag values from \"cpu/usage\" with key = pod_id"
+	nodelistQuery        = "show tag values from \"cpu/usage\" with key = hostname"
 	sleepBetweenAttempts = 5 * time.Second
 	testTimeout          = 5 * time.Minute
 )
@@ -65,6 +65,35 @@ var (
 	}
 )
 
+// Query sends a command to the server and returns the Response
+func Query(c *client.Client, query string) (*influxdb.Response, error) {
+	result, err := c.Get().
+		Prefix("proxy").
+		Namespace("kube-system").
+		Resource("services").
+		Name(influxdbService+":api").
+		Suffix("query").
+		Param("q", query).
+		Param("db", influxdbDatabaseName).
+		Param("epoch", "s").
+		Do().
+		Raw()
+
+	if err != nil {
+		return nil, err
+	}
+
+	var response influxdb.Response
+	dec := json.NewDecoder(bytes.NewReader(result))
+	dec.UseNumber()
+	err = dec.Decode(&response)
+
+	if err != nil {
+		return nil, err
+	}
+	return &response, nil
+}
+
 func verifyExpectedRcsExistAndGetExpectedPods(c *client.Client) ([]string, error) {
 	expectedPods := []string{}
 	// Iterate over the labels that identify the replication controllers that we
@@ -135,41 +164,24 @@ func getAllNodesInCluster(c *client.Client) ([]string, error) {
 	return result, nil
 }
 
-func getInfluxdbClient(c *client.Client) (*influxdb.Client, error) {
-	proxyUrl := fmt.Sprintf("%s/api/v1/proxy/namespaces/%s/services/%s:api/", getMasterHost(), api.NamespaceSystem, influxdbService)
-	config := &influxdb.ClientConfig{
-		Host: proxyUrl,
-		// TODO(vishh): Infer username and pw from the Pod spec.
-		Username:   influxdbUser,
-		Password:   influxdbPW,
-		Database:   influxdbDatabaseName,
-		HttpClient: c.Client,
-		IsSecure:   true,
-	}
-	return influxdb.NewClient(config)
-}
-
-func getInfluxdbData(c *influxdb.Client, query string) (map[string]bool, error) {
-	series, err := c.Query(query, influxdb.Second)
+func getInfluxdbData(c *client.Client, query string, tag string) (map[string]bool, error) {
+	response, err := Query(c, query)
 	if err != nil {
 		return nil, err
 	}
-	if len(series) != 1 {
-		return nil, fmt.Errorf("expected only one series from Influxdb for query %q. Got %+v", query, series)
+	if len(response.Results) != 1 {
+		return nil, fmt.Errorf("expected only one result from Influxdb for query %q. Got %+v", query, response)
 	}
-	if len(series[0].GetColumns()) != 2 {
-		Failf("Expected two columns for query %q. Found %v", query, series[0].GetColumns())
+	if len(response.Results[0].Series) != 1 {
+		return nil, fmt.Errorf("expected exactly one series for query %q.", query)
+	}
+	if len(response.Results[0].Series[0].Columns) != 1 {
+		Failf("Expected one column for query %q. Found %v", query, response.Results[0].Series[0].Columns)
 	}
 	result := map[string]bool{}
-	for _, point := range series[0].GetPoints() {
-		if len(point) != 2 {
-			Failf("Expected only two entries in a point for query %q. Got %v", query, point)
-		}
-		name, ok := point[1].(string)
-		if !ok {
-			Failf("expected %v to be a string, but it is %T", point[1], point[1])
-		}
-		result[name] = false
+	for _, value := range response.Results[0].Series[0].Values {
+		name := value[0].(string)
+		result[name] = true
 	}
 	return result, nil
 }
@@ -186,14 +198,14 @@ func expectedItemsExist(expectedItems []string, actualItems map[string]bool) boo
 	return true
 }
 
-func validatePodsAndNodes(influxdbClient *influxdb.Client, expectedPods, expectedNodes []string) bool {
-	pods, err := getInfluxdbData(influxdbClient, podlistQuery)
+func validatePodsAndNodes(c *client.Client, expectedPods, expectedNodes []string) bool {
+	pods, err := getInfluxdbData(c, podlistQuery, "pod_id")
 	if err != nil {
 		// We don't fail the test here because the influxdb service might still not be running.
 		Logf("failed to query list of pods from influxdb. Query: %q, Err: %v", podlistQuery, err)
 		return false
 	}
-	nodes, err := getInfluxdbData(influxdbClient, nodelistQuery)
+	nodes, err := getInfluxdbData(c, nodelistQuery, "hostname")
 	if err != nil {
 		Logf("failed to query list of nodes from influxdb. Query: %q, Err: %v", nodelistQuery, err)
 		return false
@@ -222,14 +234,11 @@ func testMonitoringUsingHeapsterInfluxdb(c *client.Client) {
 	expectNoError(expectedServicesExist(c))
 	// TODO: Wait for all pods and services to be running.
 
-	influxdbClient, err := getInfluxdbClient(c)
-	expectNoError(err, "failed to create influxdb client")
-
 	expectedNodes, err := getAllNodesInCluster(c)
 	expectNoError(err)
 	startTime := time.Now()
 	for {
-		if validatePodsAndNodes(influxdbClient, expectedPods, expectedNodes) {
+		if validatePodsAndNodes(c, expectedPods, expectedNodes) {
 			return
 		}
 		if time.Since(startTime) >= testTimeout {