Revert "Bumped Heapster version to v0.20.0-alpha and Influxdb to 0.9.2.1"

This commit is contained in:
Filip Grzadkowski 2016-01-27 17:22:36 +01:00
parent b2f6e7d08f
commit 0dedf869c7
109 changed files with 970 additions and 36165 deletions

45
Godeps/Godeps.json generated
View File

@ -47,10 +47,6 @@
"Comment": "v0.7.4-6-g5d54e27",
"Rev": "5d54e27f1764a0309eafe12c9df7bac03f241646"
},
{
"ImportPath": "github.com/armon/go-metrics",
"Rev": "345426c77237ece5dab0e1605c3e4b35c3f54757"
},
{
"ImportPath": "github.com/aws/aws-sdk-go/aws",
"Comment": "v1.0.7",
@ -678,18 +674,6 @@
"ImportPath": "github.com/gorilla/mux",
"Rev": "8096f47503459bcc74d1f4c487b7e6e42e5746b5"
},
{
"ImportPath": "github.com/hashicorp/go-msgpack/codec",
"Rev": "fa3f63826f7c23912c15263591e65d54d080b458"
},
{
"ImportPath": "github.com/hashicorp/raft",
"Rev": "057b893fd996696719e98b6c44649ea14968c811"
},
{
"ImportPath": "github.com/hashicorp/raft-boltdb",
"Rev": "d1e82c1ec3f15ee991f7cc7ffd5b67ff6f5bbaee"
},
{
"ImportPath": "github.com/imdario/mergo",
"Comment": "0.1.3-8-g6633656",
@ -701,33 +685,8 @@
},
{
"ImportPath": "github.com/influxdb/influxdb/client",
"Comment": "v0.9.2.1",
"Rev": "b237c68bab4756507baf6840023be103853e77db"
},
{
"ImportPath": "github.com/influxdb/influxdb/influxql",
"Comment": "v0.9.2.1",
"Rev": "b237c68bab4756507baf6840023be103853e77db"
},
{
"ImportPath": "github.com/influxdb/influxdb/meta",
"Comment": "v0.9.2.1",
"Rev": "b237c68bab4756507baf6840023be103853e77db"
},
{
"ImportPath": "github.com/influxdb/influxdb/snapshot",
"Comment": "v0.9.2.1",
"Rev": "b237c68bab4756507baf6840023be103853e77db"
},
{
"ImportPath": "github.com/influxdb/influxdb/toml",
"Comment": "v0.9.2.1",
"Rev": "b237c68bab4756507baf6840023be103853e77db"
},
{
"ImportPath": "github.com/influxdb/influxdb/tsdb",
"Comment": "v0.9.2.1",
"Rev": "b237c68bab4756507baf6840023be103853e77db"
"Comment": "v0.8.8",
"Rev": "afde71eb1740fd763ab9450e1f700ba0e53c36d0"
},
{
"ImportPath": "github.com/jmespath/go-jmespath",

4
Godeps/LICENSES.md generated
View File

@ -8,7 +8,6 @@ bitbucket.org/ww/goautoneg | spdxBSD3
github.com/abbot/go-http-auth | Apache-2
github.com/appc/cni | Apache-2
github.com/appc/spec | Apache-2
github.com/armon/go-metrics | MITname
github.com/aws/aws-sdk-go | Apache-2
github.com/beorn7/perks/quantile | MIT?
github.com/blang/semver | MITname
@ -50,9 +49,6 @@ github.com/google/cadvisor | Apache-2
github.com/google/gofuzz | Apache-2
github.com/gorilla/context | spdxBSD3
github.com/gorilla/mux | spdxBSD3
github.com/hashicorp/go-msgpack | spdxBSD3
github.com/hashicorp/raft | IntelPart08
github.com/hashicorp/raft-boltdb | IntelPart08
github.com/imdario/mergo | spdxBSD3
github.com/inconshreveable/mousetrap | Apache-2
github.com/influxdb/influxdb | MITname

View File

@ -1,22 +0,0 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe

View File

@ -1,20 +0,0 @@
The MIT License (MIT)
Copyright (c) 2013 Armon Dadgar
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@ -1,71 +0,0 @@
go-metrics
==========
This library provides a `metrics` package which can be used to instrument code,
expose application metrics, and profile runtime performance in a flexible manner.
Current API: [![GoDoc](https://godoc.org/github.com/armon/go-metrics?status.svg)](https://godoc.org/github.com/armon/go-metrics)
Sinks
=====
The `metrics` package makes use of a `MetricSink` interface to support delivery
to any type of backend. Currently the following sinks are provided:
* StatsiteSink : Sinks to a [statsite](https://github.com/armon/statsite/) instance (TCP)
* StatsdSink: Sinks to a [StatsD](https://github.com/etsy/statsd/) / statsite instance (UDP)
* PrometheusSink: Sinks to a [Prometheus](http://prometheus.io/) metrics endpoint (exposed via HTTP for scrapes)
* InmemSink : Provides in-memory aggregation, can be used to export stats
* FanoutSink : Sinks to multiple sinks. Enables writing to multiple statsite instances for example.
* BlackholeSink : Sinks to nowhere
In addition to the sinks, the `InmemSignal` can be used to catch a signal,
and dump a formatted output of recent metrics. For example, when a process gets
a SIGUSR1, it can dump to stderr recent performance metrics for debugging.
Examples
========
Here is an example of using the package:
func SlowMethod() {
// Profiling the runtime of a method
defer metrics.MeasureSince([]string{"SlowMethod"}, time.Now())
}
// Configure a statsite sink as the global metrics sink
sink, _ := metrics.NewStatsiteSink("statsite:8125")
metrics.NewGlobal(metrics.DefaultConfig("service-name"), sink)
// Emit a Key/Value pair
metrics.EmitKey([]string{"questions", "meaning of life"}, 42)
Here is an example of setting up an signal handler:
// Setup the inmem sink and signal handler
inm := metrics.NewInmemSink(10*time.Second, time.Minute)
sig := metrics.DefaultInmemSignal(inm)
metrics.NewGlobal(metrics.DefaultConfig("service-name"), inm)
// Run some code
inm.SetGauge([]string{"foo"}, 42)
inm.EmitKey([]string{"bar"}, 30)
inm.IncrCounter([]string{"baz"}, 42)
inm.IncrCounter([]string{"baz"}, 1)
inm.IncrCounter([]string{"baz"}, 80)
inm.AddSample([]string{"method", "wow"}, 42)
inm.AddSample([]string{"method", "wow"}, 100)
inm.AddSample([]string{"method", "wow"}, 22)
....
When a signal comes in, output like the following will be dumped to stderr:
[2014-01-28 14:57:33.04 -0800 PST][G] 'foo': 42.000
[2014-01-28 14:57:33.04 -0800 PST][P] 'bar': 30.000
[2014-01-28 14:57:33.04 -0800 PST][C] 'baz': Count: 3 Min: 1.000 Mean: 41.000 Max: 80.000 Stddev: 39.509
[2014-01-28 14:57:33.04 -0800 PST][S] 'method.wow': Count: 3 Min: 22.000 Mean: 54.667 Max: 100.000 Stddev: 40.513

View File

@ -1,12 +0,0 @@
// +build !windows
package metrics
import (
"syscall"
)
const (
// DefaultSignal is used with DefaultInmemSignal
DefaultSignal = syscall.SIGUSR1
)

View File

@ -1,13 +0,0 @@
// +build windows
package metrics
import (
"syscall"
)
const (
// DefaultSignal is used with DefaultInmemSignal
// Windows has no SIGUSR1, use SIGBREAK
DefaultSignal = syscall.Signal(21)
)

View File

@ -1,125 +0,0 @@
package datadog
import (
"fmt"
"strings"
"github.com/DataDog/datadog-go/statsd"
)
// DogStatsdSink provides a MetricSink that can be used
// with a dogstatsd server. It utilizes the Dogstatsd client at github.com/DataDog/datadog-go/statsd
type DogStatsdSink struct {
client *statsd.Client
hostName string
propagateHostname bool
}
// NewDogStatsdSink is used to create a new DogStatsdSink with sane defaults
func NewDogStatsdSink(addr string, hostName string) (*DogStatsdSink, error) {
client, err := statsd.New(addr)
if err != nil {
return nil, err
}
sink := &DogStatsdSink{
client: client,
hostName: hostName,
propagateHostname: false,
}
return sink, nil
}
// SetTags sets common tags on the Dogstatsd Client that will be sent
// along with all dogstatsd packets.
// Ref: http://docs.datadoghq.com/guides/dogstatsd/#tags
func (s *DogStatsdSink) SetTags(tags []string) {
s.client.Tags = tags
}
// EnableHostnamePropagation forces a Dogstatsd `host` tag with the value specified by `s.HostName`
// Since the go-metrics package has its own mechanism for attaching a hostname to metrics,
// setting the `propagateHostname` flag ensures that `s.HostName` overrides the host tag naively set by the DogStatsd server
func (s *DogStatsdSink) EnableHostNamePropagation() {
s.propagateHostname = true
}
func (s *DogStatsdSink) flattenKey(parts []string) string {
joined := strings.Join(parts, ".")
return strings.Map(func(r rune) rune {
switch r {
case ':':
fallthrough
case ' ':
return '_'
default:
return r
}
}, joined)
}
func (s *DogStatsdSink) parseKey(key []string) ([]string, []string) {
// Since DogStatsd supports dimensionality via tags on metric keys, this sink's approach is to splice the hostname out of the key in favor of a `host` tag
// The `host` tag is either forced here, or set downstream by the DogStatsd server
var tags []string
hostName := s.hostName
//Splice the hostname out of the key
for i, el := range key {
if el == hostName {
key = append(key[:i], key[i+1:]...)
}
}
if s.propagateHostname {
tags = append(tags, fmt.Sprintf("host:%s", hostName))
}
return key, tags
}
// Implementation of methods in the MetricSink interface
func (s *DogStatsdSink) SetGauge(key []string, val float32) {
s.SetGaugeWithTags(key, val, []string{})
}
func (s *DogStatsdSink) IncrCounter(key []string, val float32) {
s.IncrCounterWithTags(key, val, []string{})
}
// EmitKey is not implemented since DogStatsd does not provide a metric type that holds an
// arbitrary number of values
func (s *DogStatsdSink) EmitKey(key []string, val float32) {
}
func (s *DogStatsdSink) AddSample(key []string, val float32) {
s.AddSampleWithTags(key, val, []string{})
}
// The following ...WithTags methods correspond to Datadog's Tag extension to Statsd.
// http://docs.datadoghq.com/guides/dogstatsd/#tags
func (s *DogStatsdSink) SetGaugeWithTags(key []string, val float32, tags []string) {
flatKey, tags := s.getFlatkeyAndCombinedTags(key, tags)
rate := 1.0
s.client.Gauge(flatKey, float64(val), tags, rate)
}
func (s *DogStatsdSink) IncrCounterWithTags(key []string, val float32, tags []string) {
flatKey, tags := s.getFlatkeyAndCombinedTags(key, tags)
rate := 1.0
s.client.Count(flatKey, int64(val), tags, rate)
}
func (s *DogStatsdSink) AddSampleWithTags(key []string, val float32, tags []string) {
flatKey, tags := s.getFlatkeyAndCombinedTags(key, tags)
rate := 1.0
s.client.TimeInMilliseconds(flatKey, float64(val), tags, rate)
}
func (s *DogStatsdSink) getFlatkeyAndCombinedTags(key []string, tags []string) (flattenedKey string, combinedTags []string) {
key, hostTags := s.parseKey(key)
flatKey := s.flattenKey(key)
tags = append(tags, hostTags...)
return flatKey, tags
}

View File

@ -1,241 +0,0 @@
package metrics
import (
"fmt"
"math"
"strings"
"sync"
"time"
)
// InmemSink provides a MetricSink that does in-memory aggregation
// without sending metrics over a network. It can be embedded within
// an application to provide profiling information.
type InmemSink struct {
// How long is each aggregation interval
interval time.Duration
// Retain controls how many metrics interval we keep
retain time.Duration
// maxIntervals is the maximum length of intervals.
// It is retain / interval.
maxIntervals int
// intervals is a slice of the retained intervals
intervals []*IntervalMetrics
intervalLock sync.RWMutex
}
// IntervalMetrics stores the aggregated metrics
// for a specific interval
type IntervalMetrics struct {
sync.RWMutex
// The start time of the interval
Interval time.Time
// Gauges maps the key to the last set value
Gauges map[string]float32
// Points maps the string to the list of emitted values
// from EmitKey
Points map[string][]float32
// Counters maps the string key to a sum of the counter
// values
Counters map[string]*AggregateSample
// Samples maps the key to an AggregateSample,
// which has the rolled up view of a sample
Samples map[string]*AggregateSample
}
// NewIntervalMetrics creates a new IntervalMetrics for a given interval
func NewIntervalMetrics(intv time.Time) *IntervalMetrics {
return &IntervalMetrics{
Interval: intv,
Gauges: make(map[string]float32),
Points: make(map[string][]float32),
Counters: make(map[string]*AggregateSample),
Samples: make(map[string]*AggregateSample),
}
}
// AggregateSample is used to hold aggregate metrics
// about a sample
type AggregateSample struct {
Count int // The count of emitted pairs
Sum float64 // The sum of values
SumSq float64 // The sum of squared values
Min float64 // Minimum value
Max float64 // Maximum value
LastUpdated time.Time // When value was last updated
}
// Computes a Stddev of the values
func (a *AggregateSample) Stddev() float64 {
num := (float64(a.Count) * a.SumSq) - math.Pow(a.Sum, 2)
div := float64(a.Count * (a.Count - 1))
if div == 0 {
return 0
}
return math.Sqrt(num / div)
}
// Computes a mean of the values
func (a *AggregateSample) Mean() float64 {
if a.Count == 0 {
return 0
}
return a.Sum / float64(a.Count)
}
// Ingest is used to update a sample
func (a *AggregateSample) Ingest(v float64) {
a.Count++
a.Sum += v
a.SumSq += (v * v)
if v < a.Min || a.Count == 1 {
a.Min = v
}
if v > a.Max || a.Count == 1 {
a.Max = v
}
a.LastUpdated = time.Now()
}
func (a *AggregateSample) String() string {
if a.Count == 0 {
return "Count: 0"
} else if a.Stddev() == 0 {
return fmt.Sprintf("Count: %d Sum: %0.3f LastUpdated: %s", a.Count, a.Sum, a.LastUpdated)
} else {
return fmt.Sprintf("Count: %d Min: %0.3f Mean: %0.3f Max: %0.3f Stddev: %0.3f Sum: %0.3f LastUpdated: %s",
a.Count, a.Min, a.Mean(), a.Max, a.Stddev(), a.Sum, a.LastUpdated)
}
}
// NewInmemSink is used to construct a new in-memory sink.
// Uses an aggregation interval and maximum retention period.
func NewInmemSink(interval, retain time.Duration) *InmemSink {
i := &InmemSink{
interval: interval,
retain: retain,
maxIntervals: int(retain / interval),
}
i.intervals = make([]*IntervalMetrics, 0, i.maxIntervals)
return i
}
func (i *InmemSink) SetGauge(key []string, val float32) {
k := i.flattenKey(key)
intv := i.getInterval()
intv.Lock()
defer intv.Unlock()
intv.Gauges[k] = val
}
func (i *InmemSink) EmitKey(key []string, val float32) {
k := i.flattenKey(key)
intv := i.getInterval()
intv.Lock()
defer intv.Unlock()
vals := intv.Points[k]
intv.Points[k] = append(vals, val)
}
func (i *InmemSink) IncrCounter(key []string, val float32) {
k := i.flattenKey(key)
intv := i.getInterval()
intv.Lock()
defer intv.Unlock()
agg := intv.Counters[k]
if agg == nil {
agg = &AggregateSample{}
intv.Counters[k] = agg
}
agg.Ingest(float64(val))
}
func (i *InmemSink) AddSample(key []string, val float32) {
k := i.flattenKey(key)
intv := i.getInterval()
intv.Lock()
defer intv.Unlock()
agg := intv.Samples[k]
if agg == nil {
agg = &AggregateSample{}
intv.Samples[k] = agg
}
agg.Ingest(float64(val))
}
// Data is used to retrieve all the aggregated metrics
// Intervals may be in use, and a read lock should be acquired
func (i *InmemSink) Data() []*IntervalMetrics {
// Get the current interval, forces creation
i.getInterval()
i.intervalLock.RLock()
defer i.intervalLock.RUnlock()
intervals := make([]*IntervalMetrics, len(i.intervals))
copy(intervals, i.intervals)
return intervals
}
func (i *InmemSink) getExistingInterval(intv time.Time) *IntervalMetrics {
i.intervalLock.RLock()
defer i.intervalLock.RUnlock()
n := len(i.intervals)
if n > 0 && i.intervals[n-1].Interval == intv {
return i.intervals[n-1]
}
return nil
}
func (i *InmemSink) createInterval(intv time.Time) *IntervalMetrics {
i.intervalLock.Lock()
defer i.intervalLock.Unlock()
// Check for an existing interval
n := len(i.intervals)
if n > 0 && i.intervals[n-1].Interval == intv {
return i.intervals[n-1]
}
// Add the current interval
current := NewIntervalMetrics(intv)
i.intervals = append(i.intervals, current)
n++
// Truncate the intervals if they are too long
if n >= i.maxIntervals {
copy(i.intervals[0:], i.intervals[n-i.maxIntervals:])
i.intervals = i.intervals[:i.maxIntervals]
}
return current
}
// getInterval returns the current interval to write to
func (i *InmemSink) getInterval() *IntervalMetrics {
intv := time.Now().Truncate(i.interval)
if m := i.getExistingInterval(intv); m != nil {
return m
}
return i.createInterval(intv)
}
// Flattens the key for formatting, removes spaces
func (i *InmemSink) flattenKey(parts []string) string {
joined := strings.Join(parts, ".")
return strings.Replace(joined, " ", "_", -1)
}

View File

@ -1,100 +0,0 @@
package metrics
import (
"bytes"
"fmt"
"io"
"os"
"os/signal"
"sync"
"syscall"
)
// InmemSignal is used to listen for a given signal, and when received,
// to dump the current metrics from the InmemSink to an io.Writer
type InmemSignal struct {
signal syscall.Signal
inm *InmemSink
w io.Writer
sigCh chan os.Signal
stop bool
stopCh chan struct{}
stopLock sync.Mutex
}
// NewInmemSignal creates a new InmemSignal which listens for a given signal,
// and dumps the current metrics out to a writer
func NewInmemSignal(inmem *InmemSink, sig syscall.Signal, w io.Writer) *InmemSignal {
i := &InmemSignal{
signal: sig,
inm: inmem,
w: w,
sigCh: make(chan os.Signal, 1),
stopCh: make(chan struct{}),
}
signal.Notify(i.sigCh, sig)
go i.run()
return i
}
// DefaultInmemSignal returns a new InmemSignal that responds to SIGUSR1
// and writes output to stderr. Windows uses SIGBREAK
func DefaultInmemSignal(inmem *InmemSink) *InmemSignal {
return NewInmemSignal(inmem, DefaultSignal, os.Stderr)
}
// Stop is used to stop the InmemSignal from listening
func (i *InmemSignal) Stop() {
i.stopLock.Lock()
defer i.stopLock.Unlock()
if i.stop {
return
}
i.stop = true
close(i.stopCh)
signal.Stop(i.sigCh)
}
// run is a long running routine that handles signals
func (i *InmemSignal) run() {
for {
select {
case <-i.sigCh:
i.dumpStats()
case <-i.stopCh:
return
}
}
}
// dumpStats is used to dump the data to output writer
func (i *InmemSignal) dumpStats() {
buf := bytes.NewBuffer(nil)
data := i.inm.Data()
// Skip the last period which is still being aggregated
for i := 0; i < len(data)-1; i++ {
intv := data[i]
intv.RLock()
for name, val := range intv.Gauges {
fmt.Fprintf(buf, "[%v][G] '%s': %0.3f\n", intv.Interval, name, val)
}
for name, vals := range intv.Points {
for _, val := range vals {
fmt.Fprintf(buf, "[%v][P] '%s': %0.3f\n", intv.Interval, name, val)
}
}
for name, agg := range intv.Counters {
fmt.Fprintf(buf, "[%v][C] '%s': %s\n", intv.Interval, name, agg)
}
for name, agg := range intv.Samples {
fmt.Fprintf(buf, "[%v][S] '%s': %s\n", intv.Interval, name, agg)
}
intv.RUnlock()
}
// Write out the bytes
i.w.Write(buf.Bytes())
}

View File

@ -1,115 +0,0 @@
package metrics
import (
"runtime"
"time"
)
func (m *Metrics) SetGauge(key []string, val float32) {
if m.HostName != "" && m.EnableHostname {
key = insert(0, m.HostName, key)
}
if m.EnableTypePrefix {
key = insert(0, "gauge", key)
}
if m.ServiceName != "" {
key = insert(0, m.ServiceName, key)
}
m.sink.SetGauge(key, val)
}
func (m *Metrics) EmitKey(key []string, val float32) {
if m.EnableTypePrefix {
key = insert(0, "kv", key)
}
if m.ServiceName != "" {
key = insert(0, m.ServiceName, key)
}
m.sink.EmitKey(key, val)
}
func (m *Metrics) IncrCounter(key []string, val float32) {
if m.EnableTypePrefix {
key = insert(0, "counter", key)
}
if m.ServiceName != "" {
key = insert(0, m.ServiceName, key)
}
m.sink.IncrCounter(key, val)
}
func (m *Metrics) AddSample(key []string, val float32) {
if m.EnableTypePrefix {
key = insert(0, "sample", key)
}
if m.ServiceName != "" {
key = insert(0, m.ServiceName, key)
}
m.sink.AddSample(key, val)
}
func (m *Metrics) MeasureSince(key []string, start time.Time) {
if m.EnableTypePrefix {
key = insert(0, "timer", key)
}
if m.ServiceName != "" {
key = insert(0, m.ServiceName, key)
}
now := time.Now()
elapsed := now.Sub(start)
msec := float32(elapsed.Nanoseconds()) / float32(m.TimerGranularity)
m.sink.AddSample(key, msec)
}
// Periodically collects runtime stats to publish
func (m *Metrics) collectStats() {
for {
time.Sleep(m.ProfileInterval)
m.emitRuntimeStats()
}
}
// Emits various runtime statsitics
func (m *Metrics) emitRuntimeStats() {
// Export number of Goroutines
numRoutines := runtime.NumGoroutine()
m.SetGauge([]string{"runtime", "num_goroutines"}, float32(numRoutines))
// Export memory stats
var stats runtime.MemStats
runtime.ReadMemStats(&stats)
m.SetGauge([]string{"runtime", "alloc_bytes"}, float32(stats.Alloc))
m.SetGauge([]string{"runtime", "sys_bytes"}, float32(stats.Sys))
m.SetGauge([]string{"runtime", "malloc_count"}, float32(stats.Mallocs))
m.SetGauge([]string{"runtime", "free_count"}, float32(stats.Frees))
m.SetGauge([]string{"runtime", "heap_objects"}, float32(stats.HeapObjects))
m.SetGauge([]string{"runtime", "total_gc_pause_ns"}, float32(stats.PauseTotalNs))
m.SetGauge([]string{"runtime", "total_gc_runs"}, float32(stats.NumGC))
// Export info about the last few GC runs
num := stats.NumGC
// Handle wrap around
if num < m.lastNumGC {
m.lastNumGC = 0
}
// Ensure we don't scan more than 256
if num-m.lastNumGC >= 256 {
m.lastNumGC = num - 255
}
for i := m.lastNumGC; i < num; i++ {
pause := stats.PauseNs[i%256]
m.AddSample([]string{"runtime", "gc_pause_ns"}, float32(pause))
}
m.lastNumGC = num
}
// Inserts a string value at an index into the slice
func insert(i int, v string, s []string) []string {
s = append(s, "")
copy(s[i+1:], s[i:])
s[i] = v
return s
}

View File

@ -1,88 +0,0 @@
// +build go1.3
package prometheus
import (
"strings"
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
)
type PrometheusSink struct {
mu sync.Mutex
gauges map[string]prometheus.Gauge
summaries map[string]prometheus.Summary
counters map[string]prometheus.Counter
}
func NewPrometheusSink() (*PrometheusSink, error) {
return &PrometheusSink{
gauges: make(map[string]prometheus.Gauge),
summaries: make(map[string]prometheus.Summary),
counters: make(map[string]prometheus.Counter),
}, nil
}
func (p *PrometheusSink) flattenKey(parts []string) string {
joined := strings.Join(parts, "_")
joined = strings.Replace(joined, " ", "_", -1)
joined = strings.Replace(joined, ".", "_", -1)
joined = strings.Replace(joined, "-", "_", -1)
return joined
}
func (p *PrometheusSink) SetGauge(parts []string, val float32) {
p.mu.Lock()
defer p.mu.Unlock()
key := p.flattenKey(parts)
g, ok := p.gauges[key]
if !ok {
g = prometheus.NewGauge(prometheus.GaugeOpts{
Name: key,
Help: key,
})
prometheus.MustRegister(g)
p.gauges[key] = g
}
g.Set(float64(val))
}
func (p *PrometheusSink) AddSample(parts []string, val float32) {
p.mu.Lock()
defer p.mu.Unlock()
key := p.flattenKey(parts)
g, ok := p.summaries[key]
if !ok {
g = prometheus.NewSummary(prometheus.SummaryOpts{
Name: key,
Help: key,
MaxAge: 10 * time.Second,
})
prometheus.MustRegister(g)
p.summaries[key] = g
}
g.Observe(float64(val))
}
// EmitKey is not implemented. Prometheus doesnt offer a type for which an
// arbitrary number of values is retained, as Prometheus works with a pull
// model, rather than a push model.
func (p *PrometheusSink) EmitKey(key []string, val float32) {
}
func (p *PrometheusSink) IncrCounter(parts []string, val float32) {
p.mu.Lock()
defer p.mu.Unlock()
key := p.flattenKey(parts)
g, ok := p.counters[key]
if !ok {
g = prometheus.NewCounter(prometheus.CounterOpts{
Name: key,
Help: key,
})
prometheus.MustRegister(g)
p.counters[key] = g
}
g.Add(float64(val))
}

View File

@ -1,52 +0,0 @@
package metrics
// The MetricSink interface is used to transmit metrics information
// to an external system
type MetricSink interface {
// A Gauge should retain the last value it is set to
SetGauge(key []string, val float32)
// Should emit a Key/Value pair for each call
EmitKey(key []string, val float32)
// Counters should accumulate values
IncrCounter(key []string, val float32)
// Samples are for timing information, where quantiles are used
AddSample(key []string, val float32)
}
// BlackholeSink is used to just blackhole messages
type BlackholeSink struct{}
func (*BlackholeSink) SetGauge(key []string, val float32) {}
func (*BlackholeSink) EmitKey(key []string, val float32) {}
func (*BlackholeSink) IncrCounter(key []string, val float32) {}
func (*BlackholeSink) AddSample(key []string, val float32) {}
// FanoutSink is used to sink to fanout values to multiple sinks
type FanoutSink []MetricSink
func (fh FanoutSink) SetGauge(key []string, val float32) {
for _, s := range fh {
s.SetGauge(key, val)
}
}
func (fh FanoutSink) EmitKey(key []string, val float32) {
for _, s := range fh {
s.EmitKey(key, val)
}
}
func (fh FanoutSink) IncrCounter(key []string, val float32) {
for _, s := range fh {
s.IncrCounter(key, val)
}
}
func (fh FanoutSink) AddSample(key []string, val float32) {
for _, s := range fh {
s.AddSample(key, val)
}
}

View File

@ -1,95 +0,0 @@
package metrics
import (
"os"
"time"
)
// Config is used to configure metrics settings
type Config struct {
ServiceName string // Prefixed with keys to seperate services
HostName string // Hostname to use. If not provided and EnableHostname, it will be os.Hostname
EnableHostname bool // Enable prefixing gauge values with hostname
EnableRuntimeMetrics bool // Enables profiling of runtime metrics (GC, Goroutines, Memory)
EnableTypePrefix bool // Prefixes key with a type ("counter", "gauge", "timer")
TimerGranularity time.Duration // Granularity of timers.
ProfileInterval time.Duration // Interval to profile runtime metrics
}
// Metrics represents an instance of a metrics sink that can
// be used to emit
type Metrics struct {
Config
lastNumGC uint32
sink MetricSink
}
// Shared global metrics instance
var globalMetrics *Metrics
func init() {
// Initialize to a blackhole sink to avoid errors
globalMetrics = &Metrics{sink: &BlackholeSink{}}
}
// DefaultConfig provides a sane default configuration
func DefaultConfig(serviceName string) *Config {
c := &Config{
ServiceName: serviceName, // Use client provided service
HostName: "",
EnableHostname: true, // Enable hostname prefix
EnableRuntimeMetrics: true, // Enable runtime profiling
EnableTypePrefix: false, // Disable type prefix
TimerGranularity: time.Millisecond, // Timers are in milliseconds
ProfileInterval: time.Second, // Poll runtime every second
}
// Try to get the hostname
name, _ := os.Hostname()
c.HostName = name
return c
}
// New is used to create a new instance of Metrics
func New(conf *Config, sink MetricSink) (*Metrics, error) {
met := &Metrics{}
met.Config = *conf
met.sink = sink
// Start the runtime collector
if conf.EnableRuntimeMetrics {
go met.collectStats()
}
return met, nil
}
// NewGlobal is the same as New, but it assigns the metrics object to be
// used globally as well as returning it.
func NewGlobal(conf *Config, sink MetricSink) (*Metrics, error) {
metrics, err := New(conf, sink)
if err == nil {
globalMetrics = metrics
}
return metrics, err
}
// Proxy all the methods to the globalMetrics instance
func SetGauge(key []string, val float32) {
globalMetrics.SetGauge(key, val)
}
func EmitKey(key []string, val float32) {
globalMetrics.EmitKey(key, val)
}
func IncrCounter(key []string, val float32) {
globalMetrics.IncrCounter(key, val)
}
func AddSample(key []string, val float32) {
globalMetrics.AddSample(key, val)
}
func MeasureSince(key []string, start time.Time) {
globalMetrics.MeasureSince(key, start)
}

View File

@ -1,154 +0,0 @@
package metrics
import (
"bytes"
"fmt"
"log"
"net"
"strings"
"time"
)
const (
// statsdMaxLen is the maximum size of a packet
// to send to statsd
statsdMaxLen = 1400
)
// StatsdSink provides a MetricSink that can be used
// with a statsite or statsd metrics server. It uses
// only UDP packets, while StatsiteSink uses TCP.
type StatsdSink struct {
addr string
metricQueue chan string
}
// NewStatsdSink is used to create a new StatsdSink
func NewStatsdSink(addr string) (*StatsdSink, error) {
s := &StatsdSink{
addr: addr,
metricQueue: make(chan string, 4096),
}
go s.flushMetrics()
return s, nil
}
// Close is used to stop flushing to statsd
func (s *StatsdSink) Shutdown() {
close(s.metricQueue)
}
func (s *StatsdSink) SetGauge(key []string, val float32) {
flatKey := s.flattenKey(key)
s.pushMetric(fmt.Sprintf("%s:%f|g\n", flatKey, val))
}
func (s *StatsdSink) EmitKey(key []string, val float32) {
flatKey := s.flattenKey(key)
s.pushMetric(fmt.Sprintf("%s:%f|kv\n", flatKey, val))
}
func (s *StatsdSink) IncrCounter(key []string, val float32) {
flatKey := s.flattenKey(key)
s.pushMetric(fmt.Sprintf("%s:%f|c\n", flatKey, val))
}
func (s *StatsdSink) AddSample(key []string, val float32) {
flatKey := s.flattenKey(key)
s.pushMetric(fmt.Sprintf("%s:%f|ms\n", flatKey, val))
}
// Flattens the key for formatting, removes spaces
func (s *StatsdSink) flattenKey(parts []string) string {
joined := strings.Join(parts, ".")
return strings.Map(func(r rune) rune {
switch r {
case ':':
fallthrough
case ' ':
return '_'
default:
return r
}
}, joined)
}
// Does a non-blocking push to the metrics queue
func (s *StatsdSink) pushMetric(m string) {
select {
case s.metricQueue <- m:
default:
}
}
// Flushes metrics
func (s *StatsdSink) flushMetrics() {
var sock net.Conn
var err error
var wait <-chan time.Time
ticker := time.NewTicker(flushInterval)
defer ticker.Stop()
CONNECT:
// Create a buffer
buf := bytes.NewBuffer(nil)
// Attempt to connect
sock, err = net.Dial("udp", s.addr)
if err != nil {
log.Printf("[ERR] Error connecting to statsd! Err: %s", err)
goto WAIT
}
for {
select {
case metric, ok := <-s.metricQueue:
// Get a metric from the queue
if !ok {
goto QUIT
}
// Check if this would overflow the packet size
if len(metric)+buf.Len() > statsdMaxLen {
_, err := sock.Write(buf.Bytes())
buf.Reset()
if err != nil {
log.Printf("[ERR] Error writing to statsd! Err: %s", err)
goto WAIT
}
}
// Append to the buffer
buf.WriteString(metric)
case <-ticker.C:
if buf.Len() == 0 {
continue
}
_, err := sock.Write(buf.Bytes())
buf.Reset()
if err != nil {
log.Printf("[ERR] Error flushing to statsd! Err: %s", err)
goto WAIT
}
}
}
WAIT:
// Wait for a while
wait = time.After(time.Duration(5) * time.Second)
for {
select {
// Dequeue the messages to avoid backlog
case _, ok := <-s.metricQueue:
if !ok {
goto QUIT
}
case <-wait:
goto CONNECT
}
}
QUIT:
s.metricQueue = nil
}

View File

@ -1,142 +0,0 @@
package metrics
import (
"bufio"
"fmt"
"log"
"net"
"strings"
"time"
)
const (
// We force flush the statsite metrics after this period of
// inactivity. Prevents stats from getting stuck in a buffer
// forever.
flushInterval = 100 * time.Millisecond
)
// StatsiteSink provides a MetricSink that can be used with a
// statsite metrics server
type StatsiteSink struct {
addr string
metricQueue chan string
}
// NewStatsiteSink is used to create a new StatsiteSink
func NewStatsiteSink(addr string) (*StatsiteSink, error) {
s := &StatsiteSink{
addr: addr,
metricQueue: make(chan string, 4096),
}
go s.flushMetrics()
return s, nil
}
// Close is used to stop flushing to statsite
func (s *StatsiteSink) Shutdown() {
close(s.metricQueue)
}
func (s *StatsiteSink) SetGauge(key []string, val float32) {
flatKey := s.flattenKey(key)
s.pushMetric(fmt.Sprintf("%s:%f|g\n", flatKey, val))
}
func (s *StatsiteSink) EmitKey(key []string, val float32) {
flatKey := s.flattenKey(key)
s.pushMetric(fmt.Sprintf("%s:%f|kv\n", flatKey, val))
}
func (s *StatsiteSink) IncrCounter(key []string, val float32) {
flatKey := s.flattenKey(key)
s.pushMetric(fmt.Sprintf("%s:%f|c\n", flatKey, val))
}
func (s *StatsiteSink) AddSample(key []string, val float32) {
flatKey := s.flattenKey(key)
s.pushMetric(fmt.Sprintf("%s:%f|ms\n", flatKey, val))
}
// Flattens the key for formatting, removes spaces
func (s *StatsiteSink) flattenKey(parts []string) string {
joined := strings.Join(parts, ".")
return strings.Map(func(r rune) rune {
switch r {
case ':':
fallthrough
case ' ':
return '_'
default:
return r
}
}, joined)
}
// Does a non-blocking push to the metrics queue
func (s *StatsiteSink) pushMetric(m string) {
select {
case s.metricQueue <- m:
default:
}
}
// Flushes metrics
func (s *StatsiteSink) flushMetrics() {
var sock net.Conn
var err error
var wait <-chan time.Time
var buffered *bufio.Writer
ticker := time.NewTicker(flushInterval)
defer ticker.Stop()
CONNECT:
// Attempt to connect
sock, err = net.Dial("tcp", s.addr)
if err != nil {
log.Printf("[ERR] Error connecting to statsite! Err: %s", err)
goto WAIT
}
// Create a buffered writer
buffered = bufio.NewWriter(sock)
for {
select {
case metric, ok := <-s.metricQueue:
// Get a metric from the queue
if !ok {
goto QUIT
}
// Try to send to statsite
_, err := buffered.Write([]byte(metric))
if err != nil {
log.Printf("[ERR] Error writing to statsite! Err: %s", err)
goto WAIT
}
case <-ticker.C:
if err := buffered.Flush(); err != nil {
log.Printf("[ERR] Error flushing to statsite! Err: %s", err)
goto WAIT
}
}
}
WAIT:
// Wait for a while
wait = time.After(time.Duration(5) * time.Second)
for {
select {
// Dequeue the messages to avoid backlog
case _, ok := <-s.metricQueue:
if !ok {
goto QUIT
}
case <-wait:
goto CONNECT
}
}
QUIT:
s.metricQueue = nil
}

View File

@ -1,25 +0,0 @@
Copyright (c) 2012, 2013 Ugorji Nwoke.
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the author nor the names of its contributors may be used
to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,143 +0,0 @@
// Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
// Use of this source code is governed by a BSD-style license found in the LICENSE file.
/*
High Performance, Feature-Rich Idiomatic Go encoding library for msgpack and binc .
Supported Serialization formats are:
- msgpack: [https://github.com/msgpack/msgpack]
- binc: [http://github.com/ugorji/binc]
To install:
go get github.com/ugorji/go/codec
The idiomatic Go support is as seen in other encoding packages in
the standard library (ie json, xml, gob, etc).
Rich Feature Set includes:
- Simple but extremely powerful and feature-rich API
- Very High Performance.
Our extensive benchmarks show us outperforming Gob, Json and Bson by 2-4X.
This was achieved by taking extreme care on:
- managing allocation
- function frame size (important due to Go's use of split stacks),
- reflection use (and by-passing reflection for common types)
- recursion implications
- zero-copy mode (encoding/decoding to byte slice without using temp buffers)
- Correct.
Care was taken to precisely handle corner cases like:
overflows, nil maps and slices, nil value in stream, etc.
- Efficient zero-copying into temporary byte buffers
when encoding into or decoding from a byte slice.
- Standard field renaming via tags
- Encoding from any value
(struct, slice, map, primitives, pointers, interface{}, etc)
- Decoding into pointer to any non-nil typed value
(struct, slice, map, int, float32, bool, string, reflect.Value, etc)
- Supports extension functions to handle the encode/decode of custom types
- Support Go 1.2 encoding.BinaryMarshaler/BinaryUnmarshaler
- Schema-less decoding
(decode into a pointer to a nil interface{} as opposed to a typed non-nil value).
Includes Options to configure what specific map or slice type to use
when decoding an encoded list or map into a nil interface{}
- Provides a RPC Server and Client Codec for net/rpc communication protocol.
- Msgpack Specific:
- Provides extension functions to handle spec-defined extensions (binary, timestamp)
- Options to resolve ambiguities in handling raw bytes (as string or []byte)
during schema-less decoding (decoding into a nil interface{})
- RPC Server/Client Codec for msgpack-rpc protocol defined at:
https://github.com/msgpack-rpc/msgpack-rpc/blob/master/spec.md
- Fast Paths for some container types:
For some container types, we circumvent reflection and its associated overhead
and allocation costs, and encode/decode directly. These types are:
[]interface{}
[]int
[]string
map[interface{}]interface{}
map[int]interface{}
map[string]interface{}
Extension Support
Users can register a function to handle the encoding or decoding of
their custom types.
There are no restrictions on what the custom type can be. Some examples:
type BisSet []int
type BitSet64 uint64
type UUID string
type MyStructWithUnexportedFields struct { a int; b bool; c []int; }
type GifImage struct { ... }
As an illustration, MyStructWithUnexportedFields would normally be
encoded as an empty map because it has no exported fields, while UUID
would be encoded as a string. However, with extension support, you can
encode any of these however you like.
RPC
RPC Client and Server Codecs are implemented, so the codecs can be used
with the standard net/rpc package.
Usage
Typical usage model:
// create and configure Handle
var (
bh codec.BincHandle
mh codec.MsgpackHandle
)
mh.MapType = reflect.TypeOf(map[string]interface{}(nil))
// configure extensions
// e.g. for msgpack, define functions and enable Time support for tag 1
// mh.AddExt(reflect.TypeOf(time.Time{}), 1, myMsgpackTimeEncodeExtFn, myMsgpackTimeDecodeExtFn)
// create and use decoder/encoder
var (
r io.Reader
w io.Writer
b []byte
h = &bh // or mh to use msgpack
)
dec = codec.NewDecoder(r, h)
dec = codec.NewDecoderBytes(b, h)
err = dec.Decode(&v)
enc = codec.NewEncoder(w, h)
enc = codec.NewEncoderBytes(&b, h)
err = enc.Encode(v)
//RPC Server
go func() {
for {
conn, err := listener.Accept()
rpcCodec := codec.GoRpc.ServerCodec(conn, h)
//OR rpcCodec := codec.MsgpackSpecRpc.ServerCodec(conn, h)
rpc.ServeCodec(rpcCodec)
}
}()
//RPC Communication (client side)
conn, err = net.Dial("tcp", "localhost:5555")
rpcCodec := codec.GoRpc.ClientCodec(conn, h)
//OR rpcCodec := codec.MsgpackSpecRpc.ClientCodec(conn, h)
client := rpc.NewClientWithCodec(rpcCodec)
Representative Benchmark Results
Run the benchmark suite using:
go test -bi -bench=. -benchmem
To run full benchmark suite (including against vmsgpack and bson),
see notes in ext_dep_test.go
*/
package codec

View File

@ -1,174 +0,0 @@
# Codec
High Performance and Feature-Rich Idiomatic Go Library providing
encode/decode support for different serialization formats.
Supported Serialization formats are:
- msgpack: [https://github.com/msgpack/msgpack]
- binc: [http://github.com/ugorji/binc]
To install:
go get github.com/ugorji/go/codec
Online documentation: [http://godoc.org/github.com/ugorji/go/codec]
The idiomatic Go support is as seen in other encoding packages in
the standard library (ie json, xml, gob, etc).
Rich Feature Set includes:
- Simple but extremely powerful and feature-rich API
- Very High Performance.
Our extensive benchmarks show us outperforming Gob, Json and Bson by 2-4X.
This was achieved by taking extreme care on:
- managing allocation
- function frame size (important due to Go's use of split stacks),
- reflection use (and by-passing reflection for common types)
- recursion implications
- zero-copy mode (encoding/decoding to byte slice without using temp buffers)
- Correct.
Care was taken to precisely handle corner cases like:
overflows, nil maps and slices, nil value in stream, etc.
- Efficient zero-copying into temporary byte buffers
when encoding into or decoding from a byte slice.
- Standard field renaming via tags
- Encoding from any value
(struct, slice, map, primitives, pointers, interface{}, etc)
- Decoding into pointer to any non-nil typed value
(struct, slice, map, int, float32, bool, string, reflect.Value, etc)
- Supports extension functions to handle the encode/decode of custom types
- Support Go 1.2 encoding.BinaryMarshaler/BinaryUnmarshaler
- Schema-less decoding
(decode into a pointer to a nil interface{} as opposed to a typed non-nil value).
Includes Options to configure what specific map or slice type to use
when decoding an encoded list or map into a nil interface{}
- Provides a RPC Server and Client Codec for net/rpc communication protocol.
- Msgpack Specific:
- Provides extension functions to handle spec-defined extensions (binary, timestamp)
- Options to resolve ambiguities in handling raw bytes (as string or []byte)
during schema-less decoding (decoding into a nil interface{})
- RPC Server/Client Codec for msgpack-rpc protocol defined at:
https://github.com/msgpack-rpc/msgpack-rpc/blob/master/spec.md
- Fast Paths for some container types:
For some container types, we circumvent reflection and its associated overhead
and allocation costs, and encode/decode directly. These types are:
[]interface{}
[]int
[]string
map[interface{}]interface{}
map[int]interface{}
map[string]interface{}
## Extension Support
Users can register a function to handle the encoding or decoding of
their custom types.
There are no restrictions on what the custom type can be. Some examples:
type BisSet []int
type BitSet64 uint64
type UUID string
type MyStructWithUnexportedFields struct { a int; b bool; c []int; }
type GifImage struct { ... }
As an illustration, MyStructWithUnexportedFields would normally be
encoded as an empty map because it has no exported fields, while UUID
would be encoded as a string. However, with extension support, you can
encode any of these however you like.
## RPC
RPC Client and Server Codecs are implemented, so the codecs can be used
with the standard net/rpc package.
## Usage
Typical usage model:
// create and configure Handle
var (
bh codec.BincHandle
mh codec.MsgpackHandle
)
mh.MapType = reflect.TypeOf(map[string]interface{}(nil))
// configure extensions
// e.g. for msgpack, define functions and enable Time support for tag 1
// mh.AddExt(reflect.TypeOf(time.Time{}), 1, myMsgpackTimeEncodeExtFn, myMsgpackTimeDecodeExtFn)
// create and use decoder/encoder
var (
r io.Reader
w io.Writer
b []byte
h = &bh // or mh to use msgpack
)
dec = codec.NewDecoder(r, h)
dec = codec.NewDecoderBytes(b, h)
err = dec.Decode(&v)
enc = codec.NewEncoder(w, h)
enc = codec.NewEncoderBytes(&b, h)
err = enc.Encode(v)
//RPC Server
go func() {
for {
conn, err := listener.Accept()
rpcCodec := codec.GoRpc.ServerCodec(conn, h)
//OR rpcCodec := codec.MsgpackSpecRpc.ServerCodec(conn, h)
rpc.ServeCodec(rpcCodec)
}
}()
//RPC Communication (client side)
conn, err = net.Dial("tcp", "localhost:5555")
rpcCodec := codec.GoRpc.ClientCodec(conn, h)
//OR rpcCodec := codec.MsgpackSpecRpc.ClientCodec(conn, h)
client := rpc.NewClientWithCodec(rpcCodec)
## Representative Benchmark Results
A sample run of benchmark using "go test -bi -bench=. -benchmem":
/proc/cpuinfo: Intel(R) Core(TM) i7-2630QM CPU @ 2.00GHz (HT)
..............................................
BENCHMARK INIT: 2013-10-16 11:02:50.345970786 -0400 EDT
To run full benchmark comparing encodings (MsgPack, Binc, JSON, GOB, etc), use: "go test -bench=."
Benchmark:
Struct recursive Depth: 1
ApproxDeepSize Of benchmark Struct: 4694 bytes
Benchmark One-Pass Run:
v-msgpack: len: 1600 bytes
bson: len: 3025 bytes
msgpack: len: 1560 bytes
binc: len: 1187 bytes
gob: len: 1972 bytes
json: len: 2538 bytes
..............................................
PASS
Benchmark__Msgpack____Encode 50000 54359 ns/op 14953 B/op 83 allocs/op
Benchmark__Msgpack____Decode 10000 106531 ns/op 14990 B/op 410 allocs/op
Benchmark__Binc_NoSym_Encode 50000 53956 ns/op 14966 B/op 83 allocs/op
Benchmark__Binc_NoSym_Decode 10000 103751 ns/op 14529 B/op 386 allocs/op
Benchmark__Binc_Sym___Encode 50000 65961 ns/op 17130 B/op 88 allocs/op
Benchmark__Binc_Sym___Decode 10000 106310 ns/op 15857 B/op 287 allocs/op
Benchmark__Gob________Encode 10000 135944 ns/op 21189 B/op 237 allocs/op
Benchmark__Gob________Decode 5000 405390 ns/op 83460 B/op 1841 allocs/op
Benchmark__Json_______Encode 20000 79412 ns/op 13874 B/op 102 allocs/op
Benchmark__Json_______Decode 10000 247979 ns/op 14202 B/op 493 allocs/op
Benchmark__Bson_______Encode 10000 121762 ns/op 27814 B/op 514 allocs/op
Benchmark__Bson_______Decode 10000 162126 ns/op 16514 B/op 789 allocs/op
Benchmark__VMsgpack___Encode 50000 69155 ns/op 12370 B/op 344 allocs/op
Benchmark__VMsgpack___Decode 10000 151609 ns/op 20307 B/op 571 allocs/op
ok ugorji.net/codec 30.827s
To run full benchmark suite (including against vmsgpack and bson),
see notes in ext\_dep\_test.go

View File

@ -1,786 +0,0 @@
// Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
// Use of this source code is governed by a BSD-style license found in the LICENSE file.
package codec
import (
"math"
// "reflect"
// "sync/atomic"
"time"
//"fmt"
)
const bincDoPrune = true // No longer needed. Needed before as C lib did not support pruning.
//var _ = fmt.Printf
// vd as low 4 bits (there are 16 slots)
const (
bincVdSpecial byte = iota
bincVdPosInt
bincVdNegInt
bincVdFloat
bincVdString
bincVdByteArray
bincVdArray
bincVdMap
bincVdTimestamp
bincVdSmallInt
bincVdUnicodeOther
bincVdSymbol
bincVdDecimal
_ // open slot
_ // open slot
bincVdCustomExt = 0x0f
)
const (
bincSpNil byte = iota
bincSpFalse
bincSpTrue
bincSpNan
bincSpPosInf
bincSpNegInf
bincSpZeroFloat
bincSpZero
bincSpNegOne
)
const (
bincFlBin16 byte = iota
bincFlBin32
_ // bincFlBin32e
bincFlBin64
_ // bincFlBin64e
// others not currently supported
)
type bincEncDriver struct {
w encWriter
m map[string]uint16 // symbols
s uint32 // symbols sequencer
b [8]byte
}
func (e *bincEncDriver) isBuiltinType(rt uintptr) bool {
return rt == timeTypId
}
func (e *bincEncDriver) encodeBuiltin(rt uintptr, v interface{}) {
switch rt {
case timeTypId:
bs := encodeTime(v.(time.Time))
e.w.writen1(bincVdTimestamp<<4 | uint8(len(bs)))
e.w.writeb(bs)
}
}
func (e *bincEncDriver) encodeNil() {
e.w.writen1(bincVdSpecial<<4 | bincSpNil)
}
func (e *bincEncDriver) encodeBool(b bool) {
if b {
e.w.writen1(bincVdSpecial<<4 | bincSpTrue)
} else {
e.w.writen1(bincVdSpecial<<4 | bincSpFalse)
}
}
func (e *bincEncDriver) encodeFloat32(f float32) {
if f == 0 {
e.w.writen1(bincVdSpecial<<4 | bincSpZeroFloat)
return
}
e.w.writen1(bincVdFloat<<4 | bincFlBin32)
e.w.writeUint32(math.Float32bits(f))
}
func (e *bincEncDriver) encodeFloat64(f float64) {
if f == 0 {
e.w.writen1(bincVdSpecial<<4 | bincSpZeroFloat)
return
}
bigen.PutUint64(e.b[:], math.Float64bits(f))
if bincDoPrune {
i := 7
for ; i >= 0 && (e.b[i] == 0); i-- {
}
i++
if i <= 6 {
e.w.writen1(bincVdFloat<<4 | 0x8 | bincFlBin64)
e.w.writen1(byte(i))
e.w.writeb(e.b[:i])
return
}
}
e.w.writen1(bincVdFloat<<4 | bincFlBin64)
e.w.writeb(e.b[:])
}
func (e *bincEncDriver) encIntegerPrune(bd byte, pos bool, v uint64, lim uint8) {
if lim == 4 {
bigen.PutUint32(e.b[:lim], uint32(v))
} else {
bigen.PutUint64(e.b[:lim], v)
}
if bincDoPrune {
i := pruneSignExt(e.b[:lim], pos)
e.w.writen1(bd | lim - 1 - byte(i))
e.w.writeb(e.b[i:lim])
} else {
e.w.writen1(bd | lim - 1)
e.w.writeb(e.b[:lim])
}
}
func (e *bincEncDriver) encodeInt(v int64) {
const nbd byte = bincVdNegInt << 4
switch {
case v >= 0:
e.encUint(bincVdPosInt<<4, true, uint64(v))
case v == -1:
e.w.writen1(bincVdSpecial<<4 | bincSpNegOne)
default:
e.encUint(bincVdNegInt<<4, false, uint64(-v))
}
}
func (e *bincEncDriver) encodeUint(v uint64) {
e.encUint(bincVdPosInt<<4, true, v)
}
func (e *bincEncDriver) encUint(bd byte, pos bool, v uint64) {
switch {
case v == 0:
e.w.writen1(bincVdSpecial<<4 | bincSpZero)
case pos && v >= 1 && v <= 16:
e.w.writen1(bincVdSmallInt<<4 | byte(v-1))
case v <= math.MaxUint8:
e.w.writen2(bd|0x0, byte(v))
case v <= math.MaxUint16:
e.w.writen1(bd | 0x01)
e.w.writeUint16(uint16(v))
case v <= math.MaxUint32:
e.encIntegerPrune(bd, pos, v, 4)
default:
e.encIntegerPrune(bd, pos, v, 8)
}
}
func (e *bincEncDriver) encodeExtPreamble(xtag byte, length int) {
e.encLen(bincVdCustomExt<<4, uint64(length))
e.w.writen1(xtag)
}
func (e *bincEncDriver) encodeArrayPreamble(length int) {
e.encLen(bincVdArray<<4, uint64(length))
}
func (e *bincEncDriver) encodeMapPreamble(length int) {
e.encLen(bincVdMap<<4, uint64(length))
}
func (e *bincEncDriver) encodeString(c charEncoding, v string) {
l := uint64(len(v))
e.encBytesLen(c, l)
if l > 0 {
e.w.writestr(v)
}
}
func (e *bincEncDriver) encodeSymbol(v string) {
// if WriteSymbolsNoRefs {
// e.encodeString(c_UTF8, v)
// return
// }
//symbols only offer benefit when string length > 1.
//This is because strings with length 1 take only 2 bytes to store
//(bd with embedded length, and single byte for string val).
l := len(v)
switch l {
case 0:
e.encBytesLen(c_UTF8, 0)
return
case 1:
e.encBytesLen(c_UTF8, 1)
e.w.writen1(v[0])
return
}
if e.m == nil {
e.m = make(map[string]uint16, 16)
}
ui, ok := e.m[v]
if ok {
if ui <= math.MaxUint8 {
e.w.writen2(bincVdSymbol<<4, byte(ui))
} else {
e.w.writen1(bincVdSymbol<<4 | 0x8)
e.w.writeUint16(ui)
}
} else {
e.s++
ui = uint16(e.s)
//ui = uint16(atomic.AddUint32(&e.s, 1))
e.m[v] = ui
var lenprec uint8
switch {
case l <= math.MaxUint8:
// lenprec = 0
case l <= math.MaxUint16:
lenprec = 1
case int64(l) <= math.MaxUint32:
lenprec = 2
default:
lenprec = 3
}
if ui <= math.MaxUint8 {
e.w.writen2(bincVdSymbol<<4|0x0|0x4|lenprec, byte(ui))
} else {
e.w.writen1(bincVdSymbol<<4 | 0x8 | 0x4 | lenprec)
e.w.writeUint16(ui)
}
switch lenprec {
case 0:
e.w.writen1(byte(l))
case 1:
e.w.writeUint16(uint16(l))
case 2:
e.w.writeUint32(uint32(l))
default:
e.w.writeUint64(uint64(l))
}
e.w.writestr(v)
}
}
func (e *bincEncDriver) encodeStringBytes(c charEncoding, v []byte) {
l := uint64(len(v))
e.encBytesLen(c, l)
if l > 0 {
e.w.writeb(v)
}
}
func (e *bincEncDriver) encBytesLen(c charEncoding, length uint64) {
//TODO: support bincUnicodeOther (for now, just use string or bytearray)
if c == c_RAW {
e.encLen(bincVdByteArray<<4, length)
} else {
e.encLen(bincVdString<<4, length)
}
}
func (e *bincEncDriver) encLen(bd byte, l uint64) {
if l < 12 {
e.w.writen1(bd | uint8(l+4))
} else {
e.encLenNumber(bd, l)
}
}
func (e *bincEncDriver) encLenNumber(bd byte, v uint64) {
switch {
case v <= math.MaxUint8:
e.w.writen2(bd, byte(v))
case v <= math.MaxUint16:
e.w.writen1(bd | 0x01)
e.w.writeUint16(uint16(v))
case v <= math.MaxUint32:
e.w.writen1(bd | 0x02)
e.w.writeUint32(uint32(v))
default:
e.w.writen1(bd | 0x03)
e.w.writeUint64(uint64(v))
}
}
//------------------------------------
type bincDecDriver struct {
r decReader
bdRead bool
bdType valueType
bd byte
vd byte
vs byte
b [8]byte
m map[uint32]string // symbols (use uint32 as key, as map optimizes for it)
}
func (d *bincDecDriver) initReadNext() {
if d.bdRead {
return
}
d.bd = d.r.readn1()
d.vd = d.bd >> 4
d.vs = d.bd & 0x0f
d.bdRead = true
d.bdType = valueTypeUnset
}
func (d *bincDecDriver) currentEncodedType() valueType {
if d.bdType == valueTypeUnset {
switch d.vd {
case bincVdSpecial:
switch d.vs {
case bincSpNil:
d.bdType = valueTypeNil
case bincSpFalse, bincSpTrue:
d.bdType = valueTypeBool
case bincSpNan, bincSpNegInf, bincSpPosInf, bincSpZeroFloat:
d.bdType = valueTypeFloat
case bincSpZero:
d.bdType = valueTypeUint
case bincSpNegOne:
d.bdType = valueTypeInt
default:
decErr("currentEncodedType: Unrecognized special value 0x%x", d.vs)
}
case bincVdSmallInt:
d.bdType = valueTypeUint
case bincVdPosInt:
d.bdType = valueTypeUint
case bincVdNegInt:
d.bdType = valueTypeInt
case bincVdFloat:
d.bdType = valueTypeFloat
case bincVdString:
d.bdType = valueTypeString
case bincVdSymbol:
d.bdType = valueTypeSymbol
case bincVdByteArray:
d.bdType = valueTypeBytes
case bincVdTimestamp:
d.bdType = valueTypeTimestamp
case bincVdCustomExt:
d.bdType = valueTypeExt
case bincVdArray:
d.bdType = valueTypeArray
case bincVdMap:
d.bdType = valueTypeMap
default:
decErr("currentEncodedType: Unrecognized d.vd: 0x%x", d.vd)
}
}
return d.bdType
}
func (d *bincDecDriver) tryDecodeAsNil() bool {
if d.bd == bincVdSpecial<<4|bincSpNil {
d.bdRead = false
return true
}
return false
}
func (d *bincDecDriver) isBuiltinType(rt uintptr) bool {
return rt == timeTypId
}
func (d *bincDecDriver) decodeBuiltin(rt uintptr, v interface{}) {
switch rt {
case timeTypId:
if d.vd != bincVdTimestamp {
decErr("Invalid d.vd. Expecting 0x%x. Received: 0x%x", bincVdTimestamp, d.vd)
}
tt, err := decodeTime(d.r.readn(int(d.vs)))
if err != nil {
panic(err)
}
var vt *time.Time = v.(*time.Time)
*vt = tt
d.bdRead = false
}
}
func (d *bincDecDriver) decFloatPre(vs, defaultLen byte) {
if vs&0x8 == 0 {
d.r.readb(d.b[0:defaultLen])
} else {
l := d.r.readn1()
if l > 8 {
decErr("At most 8 bytes used to represent float. Received: %v bytes", l)
}
for i := l; i < 8; i++ {
d.b[i] = 0
}
d.r.readb(d.b[0:l])
}
}
func (d *bincDecDriver) decFloat() (f float64) {
//if true { f = math.Float64frombits(d.r.readUint64()); break; }
switch vs := d.vs; vs & 0x7 {
case bincFlBin32:
d.decFloatPre(vs, 4)
f = float64(math.Float32frombits(bigen.Uint32(d.b[0:4])))
case bincFlBin64:
d.decFloatPre(vs, 8)
f = math.Float64frombits(bigen.Uint64(d.b[0:8]))
default:
decErr("only float32 and float64 are supported. d.vd: 0x%x, d.vs: 0x%x", d.vd, d.vs)
}
return
}
func (d *bincDecDriver) decUint() (v uint64) {
// need to inline the code (interface conversion and type assertion expensive)
switch d.vs {
case 0:
v = uint64(d.r.readn1())
case 1:
d.r.readb(d.b[6:])
v = uint64(bigen.Uint16(d.b[6:]))
case 2:
d.b[4] = 0
d.r.readb(d.b[5:])
v = uint64(bigen.Uint32(d.b[4:]))
case 3:
d.r.readb(d.b[4:])
v = uint64(bigen.Uint32(d.b[4:]))
case 4, 5, 6:
lim := int(7 - d.vs)
d.r.readb(d.b[lim:])
for i := 0; i < lim; i++ {
d.b[i] = 0
}
v = uint64(bigen.Uint64(d.b[:]))
case 7:
d.r.readb(d.b[:])
v = uint64(bigen.Uint64(d.b[:]))
default:
decErr("unsigned integers with greater than 64 bits of precision not supported")
}
return
}
func (d *bincDecDriver) decIntAny() (ui uint64, i int64, neg bool) {
switch d.vd {
case bincVdPosInt:
ui = d.decUint()
i = int64(ui)
case bincVdNegInt:
ui = d.decUint()
i = -(int64(ui))
neg = true
case bincVdSmallInt:
i = int64(d.vs) + 1
ui = uint64(d.vs) + 1
case bincVdSpecial:
switch d.vs {
case bincSpZero:
//i = 0
case bincSpNegOne:
neg = true
ui = 1
i = -1
default:
decErr("numeric decode fails for special value: d.vs: 0x%x", d.vs)
}
default:
decErr("number can only be decoded from uint or int values. d.bd: 0x%x, d.vd: 0x%x", d.bd, d.vd)
}
return
}
func (d *bincDecDriver) decodeInt(bitsize uint8) (i int64) {
_, i, _ = d.decIntAny()
checkOverflow(0, i, bitsize)
d.bdRead = false
return
}
func (d *bincDecDriver) decodeUint(bitsize uint8) (ui uint64) {
ui, i, neg := d.decIntAny()
if neg {
decErr("Assigning negative signed value: %v, to unsigned type", i)
}
checkOverflow(ui, 0, bitsize)
d.bdRead = false
return
}
func (d *bincDecDriver) decodeFloat(chkOverflow32 bool) (f float64) {
switch d.vd {
case bincVdSpecial:
d.bdRead = false
switch d.vs {
case bincSpNan:
return math.NaN()
case bincSpPosInf:
return math.Inf(1)
case bincSpZeroFloat, bincSpZero:
return
case bincSpNegInf:
return math.Inf(-1)
default:
decErr("Invalid d.vs decoding float where d.vd=bincVdSpecial: %v", d.vs)
}
case bincVdFloat:
f = d.decFloat()
default:
_, i, _ := d.decIntAny()
f = float64(i)
}
checkOverflowFloat32(f, chkOverflow32)
d.bdRead = false
return
}
// bool can be decoded from bool only (single byte).
func (d *bincDecDriver) decodeBool() (b bool) {
switch d.bd {
case (bincVdSpecial | bincSpFalse):
// b = false
case (bincVdSpecial | bincSpTrue):
b = true
default:
decErr("Invalid single-byte value for bool: %s: %x", msgBadDesc, d.bd)
}
d.bdRead = false
return
}
func (d *bincDecDriver) readMapLen() (length int) {
if d.vd != bincVdMap {
decErr("Invalid d.vd for map. Expecting 0x%x. Got: 0x%x", bincVdMap, d.vd)
}
length = d.decLen()
d.bdRead = false
return
}
func (d *bincDecDriver) readArrayLen() (length int) {
if d.vd != bincVdArray {
decErr("Invalid d.vd for array. Expecting 0x%x. Got: 0x%x", bincVdArray, d.vd)
}
length = d.decLen()
d.bdRead = false
return
}
func (d *bincDecDriver) decLen() int {
if d.vs <= 3 {
return int(d.decUint())
}
return int(d.vs - 4)
}
func (d *bincDecDriver) decodeString() (s string) {
switch d.vd {
case bincVdString, bincVdByteArray:
if length := d.decLen(); length > 0 {
s = string(d.r.readn(length))
}
case bincVdSymbol:
//from vs: extract numSymbolBytes, containsStringVal, strLenPrecision,
//extract symbol
//if containsStringVal, read it and put in map
//else look in map for string value
var symbol uint32
vs := d.vs
//fmt.Printf(">>>> d.vs: 0b%b, & 0x8: %v, & 0x4: %v\n", d.vs, vs & 0x8, vs & 0x4)
if vs&0x8 == 0 {
symbol = uint32(d.r.readn1())
} else {
symbol = uint32(d.r.readUint16())
}
if d.m == nil {
d.m = make(map[uint32]string, 16)
}
if vs&0x4 == 0 {
s = d.m[symbol]
} else {
var slen int
switch vs & 0x3 {
case 0:
slen = int(d.r.readn1())
case 1:
slen = int(d.r.readUint16())
case 2:
slen = int(d.r.readUint32())
case 3:
slen = int(d.r.readUint64())
}
s = string(d.r.readn(slen))
d.m[symbol] = s
}
default:
decErr("Invalid d.vd for string. Expecting string:0x%x, bytearray:0x%x or symbol: 0x%x. Got: 0x%x",
bincVdString, bincVdByteArray, bincVdSymbol, d.vd)
}
d.bdRead = false
return
}
func (d *bincDecDriver) decodeBytes(bs []byte) (bsOut []byte, changed bool) {
var clen int
switch d.vd {
case bincVdString, bincVdByteArray:
clen = d.decLen()
default:
decErr("Invalid d.vd for bytes. Expecting string:0x%x or bytearray:0x%x. Got: 0x%x",
bincVdString, bincVdByteArray, d.vd)
}
if clen > 0 {
// if no contents in stream, don't update the passed byteslice
if len(bs) != clen {
if len(bs) > clen {
bs = bs[:clen]
} else {
bs = make([]byte, clen)
}
bsOut = bs
changed = true
}
d.r.readb(bs)
}
d.bdRead = false
return
}
func (d *bincDecDriver) decodeExt(verifyTag bool, tag byte) (xtag byte, xbs []byte) {
switch d.vd {
case bincVdCustomExt:
l := d.decLen()
xtag = d.r.readn1()
if verifyTag && xtag != tag {
decErr("Wrong extension tag. Got %b. Expecting: %v", xtag, tag)
}
xbs = d.r.readn(l)
case bincVdByteArray:
xbs, _ = d.decodeBytes(nil)
default:
decErr("Invalid d.vd for extensions (Expecting extensions or byte array). Got: 0x%x", d.vd)
}
d.bdRead = false
return
}
func (d *bincDecDriver) decodeNaked() (v interface{}, vt valueType, decodeFurther bool) {
d.initReadNext()
switch d.vd {
case bincVdSpecial:
switch d.vs {
case bincSpNil:
vt = valueTypeNil
case bincSpFalse:
vt = valueTypeBool
v = false
case bincSpTrue:
vt = valueTypeBool
v = true
case bincSpNan:
vt = valueTypeFloat
v = math.NaN()
case bincSpPosInf:
vt = valueTypeFloat
v = math.Inf(1)
case bincSpNegInf:
vt = valueTypeFloat
v = math.Inf(-1)
case bincSpZeroFloat:
vt = valueTypeFloat
v = float64(0)
case bincSpZero:
vt = valueTypeUint
v = int64(0) // int8(0)
case bincSpNegOne:
vt = valueTypeInt
v = int64(-1) // int8(-1)
default:
decErr("decodeNaked: Unrecognized special value 0x%x", d.vs)
}
case bincVdSmallInt:
vt = valueTypeUint
v = uint64(int8(d.vs)) + 1 // int8(d.vs) + 1
case bincVdPosInt:
vt = valueTypeUint
v = d.decUint()
case bincVdNegInt:
vt = valueTypeInt
v = -(int64(d.decUint()))
case bincVdFloat:
vt = valueTypeFloat
v = d.decFloat()
case bincVdSymbol:
vt = valueTypeSymbol
v = d.decodeString()
case bincVdString:
vt = valueTypeString
v = d.decodeString()
case bincVdByteArray:
vt = valueTypeBytes
v, _ = d.decodeBytes(nil)
case bincVdTimestamp:
vt = valueTypeTimestamp
tt, err := decodeTime(d.r.readn(int(d.vs)))
if err != nil {
panic(err)
}
v = tt
case bincVdCustomExt:
vt = valueTypeExt
l := d.decLen()
var re RawExt
re.Tag = d.r.readn1()
re.Data = d.r.readn(l)
v = &re
vt = valueTypeExt
case bincVdArray:
vt = valueTypeArray
decodeFurther = true
case bincVdMap:
vt = valueTypeMap
decodeFurther = true
default:
decErr("decodeNaked: Unrecognized d.vd: 0x%x", d.vd)
}
if !decodeFurther {
d.bdRead = false
}
return
}
//------------------------------------
//BincHandle is a Handle for the Binc Schema-Free Encoding Format
//defined at https://github.com/ugorji/binc .
//
//BincHandle currently supports all Binc features with the following EXCEPTIONS:
// - only integers up to 64 bits of precision are supported.
// big integers are unsupported.
// - Only IEEE 754 binary32 and binary64 floats are supported (ie Go float32 and float64 types).
// extended precision and decimal IEEE 754 floats are unsupported.
// - Only UTF-8 strings supported.
// Unicode_Other Binc types (UTF16, UTF32) are currently unsupported.
//Note that these EXCEPTIONS are temporary and full support is possible and may happen soon.
type BincHandle struct {
BasicHandle
}
func (h *BincHandle) newEncDriver(w encWriter) encDriver {
return &bincEncDriver{w: w}
}
func (h *BincHandle) newDecDriver(r decReader) decDriver {
return &bincDecDriver{r: r}
}
func (_ *BincHandle) writeExt() bool {
return true
}
func (h *BincHandle) getBasicHandle() *BasicHandle {
return &h.BasicHandle
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,589 +0,0 @@
// Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
// Use of this source code is governed by a BSD-style license found in the LICENSE file.
package codec
// Contains code shared by both encode and decode.
import (
"encoding/binary"
"fmt"
"math"
"reflect"
"sort"
"strings"
"sync"
"time"
"unicode"
"unicode/utf8"
)
const (
structTagName = "codec"
// Support
// encoding.BinaryMarshaler: MarshalBinary() (data []byte, err error)
// encoding.BinaryUnmarshaler: UnmarshalBinary(data []byte) error
// This constant flag will enable or disable it.
supportBinaryMarshal = true
// Each Encoder or Decoder uses a cache of functions based on conditionals,
// so that the conditionals are not run every time.
//
// Either a map or a slice is used to keep track of the functions.
// The map is more natural, but has a higher cost than a slice/array.
// This flag (useMapForCodecCache) controls which is used.
useMapForCodecCache = false
// For some common container types, we can short-circuit an elaborate
// reflection dance and call encode/decode directly.
// The currently supported types are:
// - slices of strings, or id's (int64,uint64) or interfaces.
// - maps of str->str, str->intf, id(int64,uint64)->intf, intf->intf
shortCircuitReflectToFastPath = true
// for debugging, set this to false, to catch panic traces.
// Note that this will always cause rpc tests to fail, since they need io.EOF sent via panic.
recoverPanicToErr = true
)
type charEncoding uint8
const (
c_RAW charEncoding = iota
c_UTF8
c_UTF16LE
c_UTF16BE
c_UTF32LE
c_UTF32BE
)
// valueType is the stream type
type valueType uint8
const (
valueTypeUnset valueType = iota
valueTypeNil
valueTypeInt
valueTypeUint
valueTypeFloat
valueTypeBool
valueTypeString
valueTypeSymbol
valueTypeBytes
valueTypeMap
valueTypeArray
valueTypeTimestamp
valueTypeExt
valueTypeInvalid = 0xff
)
var (
bigen = binary.BigEndian
structInfoFieldName = "_struct"
cachedTypeInfo = make(map[uintptr]*typeInfo, 4)
cachedTypeInfoMutex sync.RWMutex
intfSliceTyp = reflect.TypeOf([]interface{}(nil))
intfTyp = intfSliceTyp.Elem()
strSliceTyp = reflect.TypeOf([]string(nil))
boolSliceTyp = reflect.TypeOf([]bool(nil))
uintSliceTyp = reflect.TypeOf([]uint(nil))
uint8SliceTyp = reflect.TypeOf([]uint8(nil))
uint16SliceTyp = reflect.TypeOf([]uint16(nil))
uint32SliceTyp = reflect.TypeOf([]uint32(nil))
uint64SliceTyp = reflect.TypeOf([]uint64(nil))
intSliceTyp = reflect.TypeOf([]int(nil))
int8SliceTyp = reflect.TypeOf([]int8(nil))
int16SliceTyp = reflect.TypeOf([]int16(nil))
int32SliceTyp = reflect.TypeOf([]int32(nil))
int64SliceTyp = reflect.TypeOf([]int64(nil))
float32SliceTyp = reflect.TypeOf([]float32(nil))
float64SliceTyp = reflect.TypeOf([]float64(nil))
mapIntfIntfTyp = reflect.TypeOf(map[interface{}]interface{}(nil))
mapStrIntfTyp = reflect.TypeOf(map[string]interface{}(nil))
mapStrStrTyp = reflect.TypeOf(map[string]string(nil))
mapIntIntfTyp = reflect.TypeOf(map[int]interface{}(nil))
mapInt64IntfTyp = reflect.TypeOf(map[int64]interface{}(nil))
mapUintIntfTyp = reflect.TypeOf(map[uint]interface{}(nil))
mapUint64IntfTyp = reflect.TypeOf(map[uint64]interface{}(nil))
stringTyp = reflect.TypeOf("")
timeTyp = reflect.TypeOf(time.Time{})
rawExtTyp = reflect.TypeOf(RawExt{})
mapBySliceTyp = reflect.TypeOf((*MapBySlice)(nil)).Elem()
binaryMarshalerTyp = reflect.TypeOf((*binaryMarshaler)(nil)).Elem()
binaryUnmarshalerTyp = reflect.TypeOf((*binaryUnmarshaler)(nil)).Elem()
rawExtTypId = reflect.ValueOf(rawExtTyp).Pointer()
intfTypId = reflect.ValueOf(intfTyp).Pointer()
timeTypId = reflect.ValueOf(timeTyp).Pointer()
intfSliceTypId = reflect.ValueOf(intfSliceTyp).Pointer()
strSliceTypId = reflect.ValueOf(strSliceTyp).Pointer()
boolSliceTypId = reflect.ValueOf(boolSliceTyp).Pointer()
uintSliceTypId = reflect.ValueOf(uintSliceTyp).Pointer()
uint8SliceTypId = reflect.ValueOf(uint8SliceTyp).Pointer()
uint16SliceTypId = reflect.ValueOf(uint16SliceTyp).Pointer()
uint32SliceTypId = reflect.ValueOf(uint32SliceTyp).Pointer()
uint64SliceTypId = reflect.ValueOf(uint64SliceTyp).Pointer()
intSliceTypId = reflect.ValueOf(intSliceTyp).Pointer()
int8SliceTypId = reflect.ValueOf(int8SliceTyp).Pointer()
int16SliceTypId = reflect.ValueOf(int16SliceTyp).Pointer()
int32SliceTypId = reflect.ValueOf(int32SliceTyp).Pointer()
int64SliceTypId = reflect.ValueOf(int64SliceTyp).Pointer()
float32SliceTypId = reflect.ValueOf(float32SliceTyp).Pointer()
float64SliceTypId = reflect.ValueOf(float64SliceTyp).Pointer()
mapStrStrTypId = reflect.ValueOf(mapStrStrTyp).Pointer()
mapIntfIntfTypId = reflect.ValueOf(mapIntfIntfTyp).Pointer()
mapStrIntfTypId = reflect.ValueOf(mapStrIntfTyp).Pointer()
mapIntIntfTypId = reflect.ValueOf(mapIntIntfTyp).Pointer()
mapInt64IntfTypId = reflect.ValueOf(mapInt64IntfTyp).Pointer()
mapUintIntfTypId = reflect.ValueOf(mapUintIntfTyp).Pointer()
mapUint64IntfTypId = reflect.ValueOf(mapUint64IntfTyp).Pointer()
// Id = reflect.ValueOf().Pointer()
// mapBySliceTypId = reflect.ValueOf(mapBySliceTyp).Pointer()
binaryMarshalerTypId = reflect.ValueOf(binaryMarshalerTyp).Pointer()
binaryUnmarshalerTypId = reflect.ValueOf(binaryUnmarshalerTyp).Pointer()
intBitsize uint8 = uint8(reflect.TypeOf(int(0)).Bits())
uintBitsize uint8 = uint8(reflect.TypeOf(uint(0)).Bits())
bsAll0x00 = []byte{0, 0, 0, 0, 0, 0, 0, 0}
bsAll0xff = []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
)
type binaryUnmarshaler interface {
UnmarshalBinary(data []byte) error
}
type binaryMarshaler interface {
MarshalBinary() (data []byte, err error)
}
// MapBySlice represents a slice which should be encoded as a map in the stream.
// The slice contains a sequence of key-value pairs.
type MapBySlice interface {
MapBySlice()
}
// WARNING: DO NOT USE DIRECTLY. EXPORTED FOR GODOC BENEFIT. WILL BE REMOVED.
//
// BasicHandle encapsulates the common options and extension functions.
type BasicHandle struct {
extHandle
EncodeOptions
DecodeOptions
}
// Handle is the interface for a specific encoding format.
//
// Typically, a Handle is pre-configured before first time use,
// and not modified while in use. Such a pre-configured Handle
// is safe for concurrent access.
type Handle interface {
writeExt() bool
getBasicHandle() *BasicHandle
newEncDriver(w encWriter) encDriver
newDecDriver(r decReader) decDriver
}
// RawExt represents raw unprocessed extension data.
type RawExt struct {
Tag byte
Data []byte
}
type extTypeTagFn struct {
rtid uintptr
rt reflect.Type
tag byte
encFn func(reflect.Value) ([]byte, error)
decFn func(reflect.Value, []byte) error
}
type extHandle []*extTypeTagFn
// AddExt registers an encode and decode function for a reflect.Type.
// Note that the type must be a named type, and specifically not
// a pointer or Interface. An error is returned if that is not honored.
//
// To Deregister an ext, call AddExt with 0 tag, nil encfn and nil decfn.
func (o *extHandle) AddExt(
rt reflect.Type,
tag byte,
encfn func(reflect.Value) ([]byte, error),
decfn func(reflect.Value, []byte) error,
) (err error) {
// o is a pointer, because we may need to initialize it
if rt.PkgPath() == "" || rt.Kind() == reflect.Interface {
err = fmt.Errorf("codec.Handle.AddExt: Takes named type, especially not a pointer or interface: %T",
reflect.Zero(rt).Interface())
return
}
// o cannot be nil, since it is always embedded in a Handle.
// if nil, let it panic.
// if o == nil {
// err = errors.New("codec.Handle.AddExt: extHandle cannot be a nil pointer.")
// return
// }
rtid := reflect.ValueOf(rt).Pointer()
for _, v := range *o {
if v.rtid == rtid {
v.tag, v.encFn, v.decFn = tag, encfn, decfn
return
}
}
*o = append(*o, &extTypeTagFn{rtid, rt, tag, encfn, decfn})
return
}
func (o extHandle) getExt(rtid uintptr) *extTypeTagFn {
for _, v := range o {
if v.rtid == rtid {
return v
}
}
return nil
}
func (o extHandle) getExtForTag(tag byte) *extTypeTagFn {
for _, v := range o {
if v.tag == tag {
return v
}
}
return nil
}
func (o extHandle) getDecodeExtForTag(tag byte) (
rv reflect.Value, fn func(reflect.Value, []byte) error) {
if x := o.getExtForTag(tag); x != nil {
// ext is only registered for base
rv = reflect.New(x.rt).Elem()
fn = x.decFn
}
return
}
func (o extHandle) getDecodeExt(rtid uintptr) (tag byte, fn func(reflect.Value, []byte) error) {
if x := o.getExt(rtid); x != nil {
tag = x.tag
fn = x.decFn
}
return
}
func (o extHandle) getEncodeExt(rtid uintptr) (tag byte, fn func(reflect.Value) ([]byte, error)) {
if x := o.getExt(rtid); x != nil {
tag = x.tag
fn = x.encFn
}
return
}
type structFieldInfo struct {
encName string // encode name
// only one of 'i' or 'is' can be set. If 'i' is -1, then 'is' has been set.
is []int // (recursive/embedded) field index in struct
i int16 // field index in struct
omitEmpty bool
toArray bool // if field is _struct, is the toArray set?
// tag string // tag
// name string // field name
// encNameBs []byte // encoded name as byte stream
// ikind int // kind of the field as an int i.e. int(reflect.Kind)
}
func parseStructFieldInfo(fname string, stag string) *structFieldInfo {
if fname == "" {
panic("parseStructFieldInfo: No Field Name")
}
si := structFieldInfo{
// name: fname,
encName: fname,
// tag: stag,
}
if stag != "" {
for i, s := range strings.Split(stag, ",") {
if i == 0 {
if s != "" {
si.encName = s
}
} else {
switch s {
case "omitempty":
si.omitEmpty = true
case "toarray":
si.toArray = true
}
}
}
}
// si.encNameBs = []byte(si.encName)
return &si
}
type sfiSortedByEncName []*structFieldInfo
func (p sfiSortedByEncName) Len() int {
return len(p)
}
func (p sfiSortedByEncName) Less(i, j int) bool {
return p[i].encName < p[j].encName
}
func (p sfiSortedByEncName) Swap(i, j int) {
p[i], p[j] = p[j], p[i]
}
// typeInfo keeps information about each type referenced in the encode/decode sequence.
//
// During an encode/decode sequence, we work as below:
// - If base is a built in type, en/decode base value
// - If base is registered as an extension, en/decode base value
// - If type is binary(M/Unm)arshaler, call Binary(M/Unm)arshal method
// - Else decode appropriately based on the reflect.Kind
type typeInfo struct {
sfi []*structFieldInfo // sorted. Used when enc/dec struct to map.
sfip []*structFieldInfo // unsorted. Used when enc/dec struct to array.
rt reflect.Type
rtid uintptr
// baseId gives pointer to the base reflect.Type, after deferencing
// the pointers. E.g. base type of ***time.Time is time.Time.
base reflect.Type
baseId uintptr
baseIndir int8 // number of indirections to get to base
mbs bool // base type (T or *T) is a MapBySlice
m bool // base type (T or *T) is a binaryMarshaler
unm bool // base type (T or *T) is a binaryUnmarshaler
mIndir int8 // number of indirections to get to binaryMarshaler type
unmIndir int8 // number of indirections to get to binaryUnmarshaler type
toArray bool // whether this (struct) type should be encoded as an array
}
func (ti *typeInfo) indexForEncName(name string) int {
//tisfi := ti.sfi
const binarySearchThreshold = 16
if sfilen := len(ti.sfi); sfilen < binarySearchThreshold {
// linear search. faster than binary search in my testing up to 16-field structs.
for i, si := range ti.sfi {
if si.encName == name {
return i
}
}
} else {
// binary search. adapted from sort/search.go.
h, i, j := 0, 0, sfilen
for i < j {
h = i + (j-i)/2
if ti.sfi[h].encName < name {
i = h + 1
} else {
j = h
}
}
if i < sfilen && ti.sfi[i].encName == name {
return i
}
}
return -1
}
func getTypeInfo(rtid uintptr, rt reflect.Type) (pti *typeInfo) {
var ok bool
cachedTypeInfoMutex.RLock()
pti, ok = cachedTypeInfo[rtid]
cachedTypeInfoMutex.RUnlock()
if ok {
return
}
cachedTypeInfoMutex.Lock()
defer cachedTypeInfoMutex.Unlock()
if pti, ok = cachedTypeInfo[rtid]; ok {
return
}
ti := typeInfo{rt: rt, rtid: rtid}
pti = &ti
var indir int8
if ok, indir = implementsIntf(rt, binaryMarshalerTyp); ok {
ti.m, ti.mIndir = true, indir
}
if ok, indir = implementsIntf(rt, binaryUnmarshalerTyp); ok {
ti.unm, ti.unmIndir = true, indir
}
if ok, _ = implementsIntf(rt, mapBySliceTyp); ok {
ti.mbs = true
}
pt := rt
var ptIndir int8
// for ; pt.Kind() == reflect.Ptr; pt, ptIndir = pt.Elem(), ptIndir+1 { }
for pt.Kind() == reflect.Ptr {
pt = pt.Elem()
ptIndir++
}
if ptIndir == 0 {
ti.base = rt
ti.baseId = rtid
} else {
ti.base = pt
ti.baseId = reflect.ValueOf(pt).Pointer()
ti.baseIndir = ptIndir
}
if rt.Kind() == reflect.Struct {
var siInfo *structFieldInfo
if f, ok := rt.FieldByName(structInfoFieldName); ok {
siInfo = parseStructFieldInfo(structInfoFieldName, f.Tag.Get(structTagName))
ti.toArray = siInfo.toArray
}
sfip := make([]*structFieldInfo, 0, rt.NumField())
rgetTypeInfo(rt, nil, make(map[string]bool), &sfip, siInfo)
// // try to put all si close together
// const tryToPutAllStructFieldInfoTogether = true
// if tryToPutAllStructFieldInfoTogether {
// sfip2 := make([]structFieldInfo, len(sfip))
// for i, si := range sfip {
// sfip2[i] = *si
// }
// for i := range sfip {
// sfip[i] = &sfip2[i]
// }
// }
ti.sfip = make([]*structFieldInfo, len(sfip))
ti.sfi = make([]*structFieldInfo, len(sfip))
copy(ti.sfip, sfip)
sort.Sort(sfiSortedByEncName(sfip))
copy(ti.sfi, sfip)
}
// sfi = sfip
cachedTypeInfo[rtid] = pti
return
}
func rgetTypeInfo(rt reflect.Type, indexstack []int, fnameToHastag map[string]bool,
sfi *[]*structFieldInfo, siInfo *structFieldInfo,
) {
// for rt.Kind() == reflect.Ptr {
// // indexstack = append(indexstack, 0)
// rt = rt.Elem()
// }
for j := 0; j < rt.NumField(); j++ {
f := rt.Field(j)
stag := f.Tag.Get(structTagName)
if stag == "-" {
continue
}
if r1, _ := utf8.DecodeRuneInString(f.Name); r1 == utf8.RuneError || !unicode.IsUpper(r1) {
continue
}
// if anonymous and there is no struct tag and its a struct (or pointer to struct), inline it.
if f.Anonymous && stag == "" {
ft := f.Type
for ft.Kind() == reflect.Ptr {
ft = ft.Elem()
}
if ft.Kind() == reflect.Struct {
indexstack2 := append(append(make([]int, 0, len(indexstack)+4), indexstack...), j)
rgetTypeInfo(ft, indexstack2, fnameToHastag, sfi, siInfo)
continue
}
}
// do not let fields with same name in embedded structs override field at higher level.
// this must be done after anonymous check, to allow anonymous field
// still include their child fields
if _, ok := fnameToHastag[f.Name]; ok {
continue
}
si := parseStructFieldInfo(f.Name, stag)
// si.ikind = int(f.Type.Kind())
if len(indexstack) == 0 {
si.i = int16(j)
} else {
si.i = -1
si.is = append(append(make([]int, 0, len(indexstack)+4), indexstack...), j)
}
if siInfo != nil {
if siInfo.omitEmpty {
si.omitEmpty = true
}
}
*sfi = append(*sfi, si)
fnameToHastag[f.Name] = stag != ""
}
}
func panicToErr(err *error) {
if recoverPanicToErr {
if x := recover(); x != nil {
//debug.PrintStack()
panicValToErr(x, err)
}
}
}
func doPanic(tag string, format string, params ...interface{}) {
params2 := make([]interface{}, len(params)+1)
params2[0] = tag
copy(params2[1:], params)
panic(fmt.Errorf("%s: "+format, params2...))
}
func checkOverflowFloat32(f float64, doCheck bool) {
if !doCheck {
return
}
// check overflow (logic adapted from std pkg reflect/value.go OverflowFloat()
f2 := f
if f2 < 0 {
f2 = -f
}
if math.MaxFloat32 < f2 && f2 <= math.MaxFloat64 {
decErr("Overflow float32 value: %v", f2)
}
}
func checkOverflow(ui uint64, i int64, bitsize uint8) {
// check overflow (logic adapted from std pkg reflect/value.go OverflowUint()
if bitsize == 0 {
return
}
if i != 0 {
if trunc := (i << (64 - bitsize)) >> (64 - bitsize); i != trunc {
decErr("Overflow int value: %v", i)
}
}
if ui != 0 {
if trunc := (ui << (64 - bitsize)) >> (64 - bitsize); ui != trunc {
decErr("Overflow uint value: %v", ui)
}
}
}

View File

@ -1,127 +0,0 @@
// Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
// Use of this source code is governed by a BSD-style license found in the LICENSE file.
package codec
// All non-std package dependencies live in this file,
// so porting to different environment is easy (just update functions).
import (
"errors"
"fmt"
"math"
"reflect"
)
var (
raisePanicAfterRecover = false
debugging = true
)
func panicValToErr(panicVal interface{}, err *error) {
switch xerr := panicVal.(type) {
case error:
*err = xerr
case string:
*err = errors.New(xerr)
default:
*err = fmt.Errorf("%v", panicVal)
}
if raisePanicAfterRecover {
panic(panicVal)
}
return
}
func isEmptyValueDeref(v reflect.Value, deref bool) bool {
switch v.Kind() {
case reflect.Array, reflect.Map, reflect.Slice, reflect.String:
return v.Len() == 0
case reflect.Bool:
return !v.Bool()
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return v.Int() == 0
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
return v.Uint() == 0
case reflect.Float32, reflect.Float64:
return v.Float() == 0
case reflect.Interface, reflect.Ptr:
if deref {
if v.IsNil() {
return true
}
return isEmptyValueDeref(v.Elem(), deref)
} else {
return v.IsNil()
}
case reflect.Struct:
// return true if all fields are empty. else return false.
// we cannot use equality check, because some fields may be maps/slices/etc
// and consequently the structs are not comparable.
// return v.Interface() == reflect.Zero(v.Type()).Interface()
for i, n := 0, v.NumField(); i < n; i++ {
if !isEmptyValueDeref(v.Field(i), deref) {
return false
}
}
return true
}
return false
}
func isEmptyValue(v reflect.Value) bool {
return isEmptyValueDeref(v, true)
}
func debugf(format string, args ...interface{}) {
if debugging {
if len(format) == 0 || format[len(format)-1] != '\n' {
format = format + "\n"
}
fmt.Printf(format, args...)
}
}
func pruneSignExt(v []byte, pos bool) (n int) {
if len(v) < 2 {
} else if pos && v[0] == 0 {
for ; v[n] == 0 && n+1 < len(v) && (v[n+1]&(1<<7) == 0); n++ {
}
} else if !pos && v[0] == 0xff {
for ; v[n] == 0xff && n+1 < len(v) && (v[n+1]&(1<<7) != 0); n++ {
}
}
return
}
func implementsIntf(typ, iTyp reflect.Type) (success bool, indir int8) {
if typ == nil {
return
}
rt := typ
// The type might be a pointer and we need to keep
// dereferencing to the base type until we find an implementation.
for {
if rt.Implements(iTyp) {
return true, indir
}
if p := rt; p.Kind() == reflect.Ptr {
indir++
if indir >= math.MaxInt8 { // insane number of indirections
return false, 0
}
rt = p.Elem()
continue
}
break
}
// No luck yet, but if this is a base type (non-pointer), the pointer might satisfy.
if typ.Kind() != reflect.Ptr {
// Not a pointer, but does the pointer work?
if reflect.PtrTo(typ).Implements(iTyp) {
return true, -1
}
}
return false, 0
}

View File

@ -1,816 +0,0 @@
// Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
// Use of this source code is governed by a BSD-style license found in the LICENSE file.
/*
MSGPACK
Msgpack-c implementation powers the c, c++, python, ruby, etc libraries.
We need to maintain compatibility with it and how it encodes integer values
without caring about the type.
For compatibility with behaviour of msgpack-c reference implementation:
- Go intX (>0) and uintX
IS ENCODED AS
msgpack +ve fixnum, unsigned
- Go intX (<0)
IS ENCODED AS
msgpack -ve fixnum, signed
*/
package codec
import (
"fmt"
"io"
"math"
"net/rpc"
)
const (
mpPosFixNumMin byte = 0x00
mpPosFixNumMax = 0x7f
mpFixMapMin = 0x80
mpFixMapMax = 0x8f
mpFixArrayMin = 0x90
mpFixArrayMax = 0x9f
mpFixStrMin = 0xa0
mpFixStrMax = 0xbf
mpNil = 0xc0
_ = 0xc1
mpFalse = 0xc2
mpTrue = 0xc3
mpFloat = 0xca
mpDouble = 0xcb
mpUint8 = 0xcc
mpUint16 = 0xcd
mpUint32 = 0xce
mpUint64 = 0xcf
mpInt8 = 0xd0
mpInt16 = 0xd1
mpInt32 = 0xd2
mpInt64 = 0xd3
// extensions below
mpBin8 = 0xc4
mpBin16 = 0xc5
mpBin32 = 0xc6
mpExt8 = 0xc7
mpExt16 = 0xc8
mpExt32 = 0xc9
mpFixExt1 = 0xd4
mpFixExt2 = 0xd5
mpFixExt4 = 0xd6
mpFixExt8 = 0xd7
mpFixExt16 = 0xd8
mpStr8 = 0xd9 // new
mpStr16 = 0xda
mpStr32 = 0xdb
mpArray16 = 0xdc
mpArray32 = 0xdd
mpMap16 = 0xde
mpMap32 = 0xdf
mpNegFixNumMin = 0xe0
mpNegFixNumMax = 0xff
)
// MsgpackSpecRpcMultiArgs is a special type which signifies to the MsgpackSpecRpcCodec
// that the backend RPC service takes multiple arguments, which have been arranged
// in sequence in the slice.
//
// The Codec then passes it AS-IS to the rpc service (without wrapping it in an
// array of 1 element).
type MsgpackSpecRpcMultiArgs []interface{}
// A MsgpackContainer type specifies the different types of msgpackContainers.
type msgpackContainerType struct {
fixCutoff int
bFixMin, b8, b16, b32 byte
hasFixMin, has8, has8Always bool
}
var (
msgpackContainerStr = msgpackContainerType{32, mpFixStrMin, mpStr8, mpStr16, mpStr32, true, true, false}
msgpackContainerBin = msgpackContainerType{0, 0, mpBin8, mpBin16, mpBin32, false, true, true}
msgpackContainerList = msgpackContainerType{16, mpFixArrayMin, 0, mpArray16, mpArray32, true, false, false}
msgpackContainerMap = msgpackContainerType{16, mpFixMapMin, 0, mpMap16, mpMap32, true, false, false}
)
//---------------------------------------------
type msgpackEncDriver struct {
w encWriter
h *MsgpackHandle
}
func (e *msgpackEncDriver) isBuiltinType(rt uintptr) bool {
//no builtin types. All encodings are based on kinds. Types supported as extensions.
return false
}
func (e *msgpackEncDriver) encodeBuiltin(rt uintptr, v interface{}) {}
func (e *msgpackEncDriver) encodeNil() {
e.w.writen1(mpNil)
}
func (e *msgpackEncDriver) encodeInt(i int64) {
switch {
case i >= 0:
e.encodeUint(uint64(i))
case i >= -32:
e.w.writen1(byte(i))
case i >= math.MinInt8:
e.w.writen2(mpInt8, byte(i))
case i >= math.MinInt16:
e.w.writen1(mpInt16)
e.w.writeUint16(uint16(i))
case i >= math.MinInt32:
e.w.writen1(mpInt32)
e.w.writeUint32(uint32(i))
default:
e.w.writen1(mpInt64)
e.w.writeUint64(uint64(i))
}
}
func (e *msgpackEncDriver) encodeUint(i uint64) {
switch {
case i <= math.MaxInt8:
e.w.writen1(byte(i))
case i <= math.MaxUint8:
e.w.writen2(mpUint8, byte(i))
case i <= math.MaxUint16:
e.w.writen1(mpUint16)
e.w.writeUint16(uint16(i))
case i <= math.MaxUint32:
e.w.writen1(mpUint32)
e.w.writeUint32(uint32(i))
default:
e.w.writen1(mpUint64)
e.w.writeUint64(uint64(i))
}
}
func (e *msgpackEncDriver) encodeBool(b bool) {
if b {
e.w.writen1(mpTrue)
} else {
e.w.writen1(mpFalse)
}
}
func (e *msgpackEncDriver) encodeFloat32(f float32) {
e.w.writen1(mpFloat)
e.w.writeUint32(math.Float32bits(f))
}
func (e *msgpackEncDriver) encodeFloat64(f float64) {
e.w.writen1(mpDouble)
e.w.writeUint64(math.Float64bits(f))
}
func (e *msgpackEncDriver) encodeExtPreamble(xtag byte, l int) {
switch {
case l == 1:
e.w.writen2(mpFixExt1, xtag)
case l == 2:
e.w.writen2(mpFixExt2, xtag)
case l == 4:
e.w.writen2(mpFixExt4, xtag)
case l == 8:
e.w.writen2(mpFixExt8, xtag)
case l == 16:
e.w.writen2(mpFixExt16, xtag)
case l < 256:
e.w.writen2(mpExt8, byte(l))
e.w.writen1(xtag)
case l < 65536:
e.w.writen1(mpExt16)
e.w.writeUint16(uint16(l))
e.w.writen1(xtag)
default:
e.w.writen1(mpExt32)
e.w.writeUint32(uint32(l))
e.w.writen1(xtag)
}
}
func (e *msgpackEncDriver) encodeArrayPreamble(length int) {
e.writeContainerLen(msgpackContainerList, length)
}
func (e *msgpackEncDriver) encodeMapPreamble(length int) {
e.writeContainerLen(msgpackContainerMap, length)
}
func (e *msgpackEncDriver) encodeString(c charEncoding, s string) {
if c == c_RAW && e.h.WriteExt {
e.writeContainerLen(msgpackContainerBin, len(s))
} else {
e.writeContainerLen(msgpackContainerStr, len(s))
}
if len(s) > 0 {
e.w.writestr(s)
}
}
func (e *msgpackEncDriver) encodeSymbol(v string) {
e.encodeString(c_UTF8, v)
}
func (e *msgpackEncDriver) encodeStringBytes(c charEncoding, bs []byte) {
if c == c_RAW && e.h.WriteExt {
e.writeContainerLen(msgpackContainerBin, len(bs))
} else {
e.writeContainerLen(msgpackContainerStr, len(bs))
}
if len(bs) > 0 {
e.w.writeb(bs)
}
}
func (e *msgpackEncDriver) writeContainerLen(ct msgpackContainerType, l int) {
switch {
case ct.hasFixMin && l < ct.fixCutoff:
e.w.writen1(ct.bFixMin | byte(l))
case ct.has8 && l < 256 && (ct.has8Always || e.h.WriteExt):
e.w.writen2(ct.b8, uint8(l))
case l < 65536:
e.w.writen1(ct.b16)
e.w.writeUint16(uint16(l))
default:
e.w.writen1(ct.b32)
e.w.writeUint32(uint32(l))
}
}
//---------------------------------------------
type msgpackDecDriver struct {
r decReader
h *MsgpackHandle
bd byte
bdRead bool
bdType valueType
}
func (d *msgpackDecDriver) isBuiltinType(rt uintptr) bool {
//no builtin types. All encodings are based on kinds. Types supported as extensions.
return false
}
func (d *msgpackDecDriver) decodeBuiltin(rt uintptr, v interface{}) {}
// Note: This returns either a primitive (int, bool, etc) for non-containers,
// or a containerType, or a specific type denoting nil or extension.
// It is called when a nil interface{} is passed, leaving it up to the DecDriver
// to introspect the stream and decide how best to decode.
// It deciphers the value by looking at the stream first.
func (d *msgpackDecDriver) decodeNaked() (v interface{}, vt valueType, decodeFurther bool) {
d.initReadNext()
bd := d.bd
switch bd {
case mpNil:
vt = valueTypeNil
d.bdRead = false
case mpFalse:
vt = valueTypeBool
v = false
case mpTrue:
vt = valueTypeBool
v = true
case mpFloat:
vt = valueTypeFloat
v = float64(math.Float32frombits(d.r.readUint32()))
case mpDouble:
vt = valueTypeFloat
v = math.Float64frombits(d.r.readUint64())
case mpUint8:
vt = valueTypeUint
v = uint64(d.r.readn1())
case mpUint16:
vt = valueTypeUint
v = uint64(d.r.readUint16())
case mpUint32:
vt = valueTypeUint
v = uint64(d.r.readUint32())
case mpUint64:
vt = valueTypeUint
v = uint64(d.r.readUint64())
case mpInt8:
vt = valueTypeInt
v = int64(int8(d.r.readn1()))
case mpInt16:
vt = valueTypeInt
v = int64(int16(d.r.readUint16()))
case mpInt32:
vt = valueTypeInt
v = int64(int32(d.r.readUint32()))
case mpInt64:
vt = valueTypeInt
v = int64(int64(d.r.readUint64()))
default:
switch {
case bd >= mpPosFixNumMin && bd <= mpPosFixNumMax:
// positive fixnum (always signed)
vt = valueTypeInt
v = int64(int8(bd))
case bd >= mpNegFixNumMin && bd <= mpNegFixNumMax:
// negative fixnum
vt = valueTypeInt
v = int64(int8(bd))
case bd == mpStr8, bd == mpStr16, bd == mpStr32, bd >= mpFixStrMin && bd <= mpFixStrMax:
if d.h.RawToString {
var rvm string
vt = valueTypeString
v = &rvm
} else {
var rvm = []byte{}
vt = valueTypeBytes
v = &rvm
}
decodeFurther = true
case bd == mpBin8, bd == mpBin16, bd == mpBin32:
var rvm = []byte{}
vt = valueTypeBytes
v = &rvm
decodeFurther = true
case bd == mpArray16, bd == mpArray32, bd >= mpFixArrayMin && bd <= mpFixArrayMax:
vt = valueTypeArray
decodeFurther = true
case bd == mpMap16, bd == mpMap32, bd >= mpFixMapMin && bd <= mpFixMapMax:
vt = valueTypeMap
decodeFurther = true
case bd >= mpFixExt1 && bd <= mpFixExt16, bd >= mpExt8 && bd <= mpExt32:
clen := d.readExtLen()
var re RawExt
re.Tag = d.r.readn1()
re.Data = d.r.readn(clen)
v = &re
vt = valueTypeExt
default:
decErr("Nil-Deciphered DecodeValue: %s: hex: %x, dec: %d", msgBadDesc, bd, bd)
}
}
if !decodeFurther {
d.bdRead = false
}
return
}
// int can be decoded from msgpack type: intXXX or uintXXX
func (d *msgpackDecDriver) decodeInt(bitsize uint8) (i int64) {
switch d.bd {
case mpUint8:
i = int64(uint64(d.r.readn1()))
case mpUint16:
i = int64(uint64(d.r.readUint16()))
case mpUint32:
i = int64(uint64(d.r.readUint32()))
case mpUint64:
i = int64(d.r.readUint64())
case mpInt8:
i = int64(int8(d.r.readn1()))
case mpInt16:
i = int64(int16(d.r.readUint16()))
case mpInt32:
i = int64(int32(d.r.readUint32()))
case mpInt64:
i = int64(d.r.readUint64())
default:
switch {
case d.bd >= mpPosFixNumMin && d.bd <= mpPosFixNumMax:
i = int64(int8(d.bd))
case d.bd >= mpNegFixNumMin && d.bd <= mpNegFixNumMax:
i = int64(int8(d.bd))
default:
decErr("Unhandled single-byte unsigned integer value: %s: %x", msgBadDesc, d.bd)
}
}
// check overflow (logic adapted from std pkg reflect/value.go OverflowUint()
if bitsize > 0 {
if trunc := (i << (64 - bitsize)) >> (64 - bitsize); i != trunc {
decErr("Overflow int value: %v", i)
}
}
d.bdRead = false
return
}
// uint can be decoded from msgpack type: intXXX or uintXXX
func (d *msgpackDecDriver) decodeUint(bitsize uint8) (ui uint64) {
switch d.bd {
case mpUint8:
ui = uint64(d.r.readn1())
case mpUint16:
ui = uint64(d.r.readUint16())
case mpUint32:
ui = uint64(d.r.readUint32())
case mpUint64:
ui = d.r.readUint64()
case mpInt8:
if i := int64(int8(d.r.readn1())); i >= 0 {
ui = uint64(i)
} else {
decErr("Assigning negative signed value: %v, to unsigned type", i)
}
case mpInt16:
if i := int64(int16(d.r.readUint16())); i >= 0 {
ui = uint64(i)
} else {
decErr("Assigning negative signed value: %v, to unsigned type", i)
}
case mpInt32:
if i := int64(int32(d.r.readUint32())); i >= 0 {
ui = uint64(i)
} else {
decErr("Assigning negative signed value: %v, to unsigned type", i)
}
case mpInt64:
if i := int64(d.r.readUint64()); i >= 0 {
ui = uint64(i)
} else {
decErr("Assigning negative signed value: %v, to unsigned type", i)
}
default:
switch {
case d.bd >= mpPosFixNumMin && d.bd <= mpPosFixNumMax:
ui = uint64(d.bd)
case d.bd >= mpNegFixNumMin && d.bd <= mpNegFixNumMax:
decErr("Assigning negative signed value: %v, to unsigned type", int(d.bd))
default:
decErr("Unhandled single-byte unsigned integer value: %s: %x", msgBadDesc, d.bd)
}
}
// check overflow (logic adapted from std pkg reflect/value.go OverflowUint()
if bitsize > 0 {
if trunc := (ui << (64 - bitsize)) >> (64 - bitsize); ui != trunc {
decErr("Overflow uint value: %v", ui)
}
}
d.bdRead = false
return
}
// float can either be decoded from msgpack type: float, double or intX
func (d *msgpackDecDriver) decodeFloat(chkOverflow32 bool) (f float64) {
switch d.bd {
case mpFloat:
f = float64(math.Float32frombits(d.r.readUint32()))
case mpDouble:
f = math.Float64frombits(d.r.readUint64())
default:
f = float64(d.decodeInt(0))
}
checkOverflowFloat32(f, chkOverflow32)
d.bdRead = false
return
}
// bool can be decoded from bool, fixnum 0 or 1.
func (d *msgpackDecDriver) decodeBool() (b bool) {
switch d.bd {
case mpFalse, 0:
// b = false
case mpTrue, 1:
b = true
default:
decErr("Invalid single-byte value for bool: %s: %x", msgBadDesc, d.bd)
}
d.bdRead = false
return
}
func (d *msgpackDecDriver) decodeString() (s string) {
clen := d.readContainerLen(msgpackContainerStr)
if clen > 0 {
s = string(d.r.readn(clen))
}
d.bdRead = false
return
}
// Callers must check if changed=true (to decide whether to replace the one they have)
func (d *msgpackDecDriver) decodeBytes(bs []byte) (bsOut []byte, changed bool) {
// bytes can be decoded from msgpackContainerStr or msgpackContainerBin
var clen int
switch d.bd {
case mpBin8, mpBin16, mpBin32:
clen = d.readContainerLen(msgpackContainerBin)
default:
clen = d.readContainerLen(msgpackContainerStr)
}
// if clen < 0 {
// changed = true
// panic("length cannot be zero. this cannot be nil.")
// }
if clen > 0 {
// if no contents in stream, don't update the passed byteslice
if len(bs) != clen {
// Return changed=true if length of passed slice diff from length of bytes in stream
if len(bs) > clen {
bs = bs[:clen]
} else {
bs = make([]byte, clen)
}
bsOut = bs
changed = true
}
d.r.readb(bs)
}
d.bdRead = false
return
}
// Every top-level decode funcs (i.e. decodeValue, decode) must call this first.
func (d *msgpackDecDriver) initReadNext() {
if d.bdRead {
return
}
d.bd = d.r.readn1()
d.bdRead = true
d.bdType = valueTypeUnset
}
func (d *msgpackDecDriver) currentEncodedType() valueType {
if d.bdType == valueTypeUnset {
bd := d.bd
switch bd {
case mpNil:
d.bdType = valueTypeNil
case mpFalse, mpTrue:
d.bdType = valueTypeBool
case mpFloat, mpDouble:
d.bdType = valueTypeFloat
case mpUint8, mpUint16, mpUint32, mpUint64:
d.bdType = valueTypeUint
case mpInt8, mpInt16, mpInt32, mpInt64:
d.bdType = valueTypeInt
default:
switch {
case bd >= mpPosFixNumMin && bd <= mpPosFixNumMax:
d.bdType = valueTypeInt
case bd >= mpNegFixNumMin && bd <= mpNegFixNumMax:
d.bdType = valueTypeInt
case bd == mpStr8, bd == mpStr16, bd == mpStr32, bd >= mpFixStrMin && bd <= mpFixStrMax:
if d.h.RawToString {
d.bdType = valueTypeString
} else {
d.bdType = valueTypeBytes
}
case bd == mpBin8, bd == mpBin16, bd == mpBin32:
d.bdType = valueTypeBytes
case bd == mpArray16, bd == mpArray32, bd >= mpFixArrayMin && bd <= mpFixArrayMax:
d.bdType = valueTypeArray
case bd == mpMap16, bd == mpMap32, bd >= mpFixMapMin && bd <= mpFixMapMax:
d.bdType = valueTypeMap
case bd >= mpFixExt1 && bd <= mpFixExt16, bd >= mpExt8 && bd <= mpExt32:
d.bdType = valueTypeExt
default:
decErr("currentEncodedType: Undeciphered descriptor: %s: hex: %x, dec: %d", msgBadDesc, bd, bd)
}
}
}
return d.bdType
}
func (d *msgpackDecDriver) tryDecodeAsNil() bool {
if d.bd == mpNil {
d.bdRead = false
return true
}
return false
}
func (d *msgpackDecDriver) readContainerLen(ct msgpackContainerType) (clen int) {
bd := d.bd
switch {
case bd == mpNil:
clen = -1 // to represent nil
case bd == ct.b8:
clen = int(d.r.readn1())
case bd == ct.b16:
clen = int(d.r.readUint16())
case bd == ct.b32:
clen = int(d.r.readUint32())
case (ct.bFixMin & bd) == ct.bFixMin:
clen = int(ct.bFixMin ^ bd)
default:
decErr("readContainerLen: %s: hex: %x, dec: %d", msgBadDesc, bd, bd)
}
d.bdRead = false
return
}
func (d *msgpackDecDriver) readMapLen() int {
return d.readContainerLen(msgpackContainerMap)
}
func (d *msgpackDecDriver) readArrayLen() int {
return d.readContainerLen(msgpackContainerList)
}
func (d *msgpackDecDriver) readExtLen() (clen int) {
switch d.bd {
case mpNil:
clen = -1 // to represent nil
case mpFixExt1:
clen = 1
case mpFixExt2:
clen = 2
case mpFixExt4:
clen = 4
case mpFixExt8:
clen = 8
case mpFixExt16:
clen = 16
case mpExt8:
clen = int(d.r.readn1())
case mpExt16:
clen = int(d.r.readUint16())
case mpExt32:
clen = int(d.r.readUint32())
default:
decErr("decoding ext bytes: found unexpected byte: %x", d.bd)
}
return
}
func (d *msgpackDecDriver) decodeExt(verifyTag bool, tag byte) (xtag byte, xbs []byte) {
xbd := d.bd
switch {
case xbd == mpBin8, xbd == mpBin16, xbd == mpBin32:
xbs, _ = d.decodeBytes(nil)
case xbd == mpStr8, xbd == mpStr16, xbd == mpStr32,
xbd >= mpFixStrMin && xbd <= mpFixStrMax:
xbs = []byte(d.decodeString())
default:
clen := d.readExtLen()
xtag = d.r.readn1()
if verifyTag && xtag != tag {
decErr("Wrong extension tag. Got %b. Expecting: %v", xtag, tag)
}
xbs = d.r.readn(clen)
}
d.bdRead = false
return
}
//--------------------------------------------------
//MsgpackHandle is a Handle for the Msgpack Schema-Free Encoding Format.
type MsgpackHandle struct {
BasicHandle
// RawToString controls how raw bytes are decoded into a nil interface{}.
RawToString bool
// WriteExt flag supports encoding configured extensions with extension tags.
// It also controls whether other elements of the new spec are encoded (ie Str8).
//
// With WriteExt=false, configured extensions are serialized as raw bytes
// and Str8 is not encoded.
//
// A stream can still be decoded into a typed value, provided an appropriate value
// is provided, but the type cannot be inferred from the stream. If no appropriate
// type is provided (e.g. decoding into a nil interface{}), you get back
// a []byte or string based on the setting of RawToString.
WriteExt bool
}
func (h *MsgpackHandle) newEncDriver(w encWriter) encDriver {
return &msgpackEncDriver{w: w, h: h}
}
func (h *MsgpackHandle) newDecDriver(r decReader) decDriver {
return &msgpackDecDriver{r: r, h: h}
}
func (h *MsgpackHandle) writeExt() bool {
return h.WriteExt
}
func (h *MsgpackHandle) getBasicHandle() *BasicHandle {
return &h.BasicHandle
}
//--------------------------------------------------
type msgpackSpecRpcCodec struct {
rpcCodec
}
// /////////////// Spec RPC Codec ///////////////////
func (c *msgpackSpecRpcCodec) WriteRequest(r *rpc.Request, body interface{}) error {
// WriteRequest can write to both a Go service, and other services that do
// not abide by the 1 argument rule of a Go service.
// We discriminate based on if the body is a MsgpackSpecRpcMultiArgs
var bodyArr []interface{}
if m, ok := body.(MsgpackSpecRpcMultiArgs); ok {
bodyArr = ([]interface{})(m)
} else {
bodyArr = []interface{}{body}
}
r2 := []interface{}{0, uint32(r.Seq), r.ServiceMethod, bodyArr}
return c.write(r2, nil, false, true)
}
func (c *msgpackSpecRpcCodec) WriteResponse(r *rpc.Response, body interface{}) error {
var moe interface{}
if r.Error != "" {
moe = r.Error
}
if moe != nil && body != nil {
body = nil
}
r2 := []interface{}{1, uint32(r.Seq), moe, body}
return c.write(r2, nil, false, true)
}
func (c *msgpackSpecRpcCodec) ReadResponseHeader(r *rpc.Response) error {
return c.parseCustomHeader(1, &r.Seq, &r.Error)
}
func (c *msgpackSpecRpcCodec) ReadRequestHeader(r *rpc.Request) error {
return c.parseCustomHeader(0, &r.Seq, &r.ServiceMethod)
}
func (c *msgpackSpecRpcCodec) ReadRequestBody(body interface{}) error {
if body == nil { // read and discard
return c.read(nil)
}
bodyArr := []interface{}{body}
return c.read(&bodyArr)
}
func (c *msgpackSpecRpcCodec) parseCustomHeader(expectTypeByte byte, msgid *uint64, methodOrError *string) (err error) {
if c.cls {
return io.EOF
}
// We read the response header by hand
// so that the body can be decoded on its own from the stream at a later time.
const fia byte = 0x94 //four item array descriptor value
// Not sure why the panic of EOF is swallowed above.
// if bs1 := c.dec.r.readn1(); bs1 != fia {
// err = fmt.Errorf("Unexpected value for array descriptor: Expecting %v. Received %v", fia, bs1)
// return
// }
var b byte
b, err = c.br.ReadByte()
if err != nil {
return
}
if b != fia {
err = fmt.Errorf("Unexpected value for array descriptor: Expecting %v. Received %v", fia, b)
return
}
if err = c.read(&b); err != nil {
return
}
if b != expectTypeByte {
err = fmt.Errorf("Unexpected byte descriptor in header. Expecting %v. Received %v", expectTypeByte, b)
return
}
if err = c.read(msgid); err != nil {
return
}
if err = c.read(methodOrError); err != nil {
return
}
return
}
//--------------------------------------------------
// msgpackSpecRpc is the implementation of Rpc that uses custom communication protocol
// as defined in the msgpack spec at https://github.com/msgpack-rpc/msgpack-rpc/blob/master/spec.md
type msgpackSpecRpc struct{}
// MsgpackSpecRpc implements Rpc using the communication protocol defined in
// the msgpack spec at https://github.com/msgpack-rpc/msgpack-rpc/blob/master/spec.md .
// Its methods (ServerCodec and ClientCodec) return values that implement RpcCodecBuffered.
var MsgpackSpecRpc msgpackSpecRpc
func (x msgpackSpecRpc) ServerCodec(conn io.ReadWriteCloser, h Handle) rpc.ServerCodec {
return &msgpackSpecRpcCodec{newRPCCodec(conn, h)}
}
func (x msgpackSpecRpc) ClientCodec(conn io.ReadWriteCloser, h Handle) rpc.ClientCodec {
return &msgpackSpecRpcCodec{newRPCCodec(conn, h)}
}
var _ decDriver = (*msgpackDecDriver)(nil)
var _ encDriver = (*msgpackEncDriver)(nil)

View File

@ -1,110 +0,0 @@
#!/usr/bin/env python
# This will create golden files in a directory passed to it.
# A Test calls this internally to create the golden files
# So it can process them (so we don't have to checkin the files).
import msgpack, msgpackrpc, sys, os, threading
def get_test_data_list():
# get list with all primitive types, and a combo type
l0 = [
-8,
-1616,
-32323232,
-6464646464646464,
192,
1616,
32323232,
6464646464646464,
192,
-3232.0,
-6464646464.0,
3232.0,
6464646464.0,
False,
True,
None,
"someday",
"",
"bytestring",
1328176922000002000,
-2206187877999998000,
0,
-6795364578871345152
]
l1 = [
{ "true": True,
"false": False },
{ "true": "True",
"false": False,
"uint16(1616)": 1616 },
{ "list": [1616, 32323232, True, -3232.0, {"TRUE":True, "FALSE":False}, [True, False] ],
"int32":32323232, "bool": True,
"LONG STRING": "123456789012345678901234567890123456789012345678901234567890",
"SHORT STRING": "1234567890" },
{ True: "true", 8: False, "false": 0 }
]
l = []
l.extend(l0)
l.append(l0)
l.extend(l1)
return l
def build_test_data(destdir):
l = get_test_data_list()
for i in range(len(l)):
packer = msgpack.Packer()
serialized = packer.pack(l[i])
f = open(os.path.join(destdir, str(i) + '.golden'), 'wb')
f.write(serialized)
f.close()
def doRpcServer(port, stopTimeSec):
class EchoHandler(object):
def Echo123(self, msg1, msg2, msg3):
return ("1:%s 2:%s 3:%s" % (msg1, msg2, msg3))
def EchoStruct(self, msg):
return ("%s" % msg)
addr = msgpackrpc.Address('localhost', port)
server = msgpackrpc.Server(EchoHandler())
server.listen(addr)
# run thread to stop it after stopTimeSec seconds if > 0
if stopTimeSec > 0:
def myStopRpcServer():
server.stop()
t = threading.Timer(stopTimeSec, myStopRpcServer)
t.start()
server.start()
def doRpcClientToPythonSvc(port):
address = msgpackrpc.Address('localhost', port)
client = msgpackrpc.Client(address, unpack_encoding='utf-8')
print client.call("Echo123", "A1", "B2", "C3")
print client.call("EchoStruct", {"A" :"Aa", "B":"Bb", "C":"Cc"})
def doRpcClientToGoSvc(port):
# print ">>>> port: ", port, " <<<<<"
address = msgpackrpc.Address('localhost', port)
client = msgpackrpc.Client(address, unpack_encoding='utf-8')
print client.call("TestRpcInt.Echo123", ["A1", "B2", "C3"])
print client.call("TestRpcInt.EchoStruct", {"A" :"Aa", "B":"Bb", "C":"Cc"})
def doMain(args):
if len(args) == 2 and args[0] == "testdata":
build_test_data(args[1])
elif len(args) == 3 and args[0] == "rpc-server":
doRpcServer(int(args[1]), int(args[2]))
elif len(args) == 2 and args[0] == "rpc-client-python-service":
doRpcClientToPythonSvc(int(args[1]))
elif len(args) == 2 and args[0] == "rpc-client-go-service":
doRpcClientToGoSvc(int(args[1]))
else:
print("Usage: msgpack_test.py " +
"[testdata|rpc-server|rpc-client-python-service|rpc-client-go-service] ...")
if __name__ == "__main__":
doMain(sys.argv[1:])

View File

@ -1,152 +0,0 @@
// Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
// Use of this source code is governed by a BSD-style license found in the LICENSE file.
package codec
import (
"bufio"
"io"
"net/rpc"
"sync"
)
// Rpc provides a rpc Server or Client Codec for rpc communication.
type Rpc interface {
ServerCodec(conn io.ReadWriteCloser, h Handle) rpc.ServerCodec
ClientCodec(conn io.ReadWriteCloser, h Handle) rpc.ClientCodec
}
// RpcCodecBuffered allows access to the underlying bufio.Reader/Writer
// used by the rpc connection. It accomodates use-cases where the connection
// should be used by rpc and non-rpc functions, e.g. streaming a file after
// sending an rpc response.
type RpcCodecBuffered interface {
BufferedReader() *bufio.Reader
BufferedWriter() *bufio.Writer
}
// -------------------------------------
// rpcCodec defines the struct members and common methods.
type rpcCodec struct {
rwc io.ReadWriteCloser
dec *Decoder
enc *Encoder
bw *bufio.Writer
br *bufio.Reader
mu sync.Mutex
cls bool
}
func newRPCCodec(conn io.ReadWriteCloser, h Handle) rpcCodec {
bw := bufio.NewWriter(conn)
br := bufio.NewReader(conn)
return rpcCodec{
rwc: conn,
bw: bw,
br: br,
enc: NewEncoder(bw, h),
dec: NewDecoder(br, h),
}
}
func (c *rpcCodec) BufferedReader() *bufio.Reader {
return c.br
}
func (c *rpcCodec) BufferedWriter() *bufio.Writer {
return c.bw
}
func (c *rpcCodec) write(obj1, obj2 interface{}, writeObj2, doFlush bool) (err error) {
if c.cls {
return io.EOF
}
if err = c.enc.Encode(obj1); err != nil {
return
}
if writeObj2 {
if err = c.enc.Encode(obj2); err != nil {
return
}
}
if doFlush && c.bw != nil {
return c.bw.Flush()
}
return
}
func (c *rpcCodec) read(obj interface{}) (err error) {
if c.cls {
return io.EOF
}
//If nil is passed in, we should still attempt to read content to nowhere.
if obj == nil {
var obj2 interface{}
return c.dec.Decode(&obj2)
}
return c.dec.Decode(obj)
}
func (c *rpcCodec) Close() error {
if c.cls {
return io.EOF
}
c.cls = true
return c.rwc.Close()
}
func (c *rpcCodec) ReadResponseBody(body interface{}) error {
return c.read(body)
}
// -------------------------------------
type goRpcCodec struct {
rpcCodec
}
func (c *goRpcCodec) WriteRequest(r *rpc.Request, body interface{}) error {
// Must protect for concurrent access as per API
c.mu.Lock()
defer c.mu.Unlock()
return c.write(r, body, true, true)
}
func (c *goRpcCodec) WriteResponse(r *rpc.Response, body interface{}) error {
c.mu.Lock()
defer c.mu.Unlock()
return c.write(r, body, true, true)
}
func (c *goRpcCodec) ReadResponseHeader(r *rpc.Response) error {
return c.read(r)
}
func (c *goRpcCodec) ReadRequestHeader(r *rpc.Request) error {
return c.read(r)
}
func (c *goRpcCodec) ReadRequestBody(body interface{}) error {
return c.read(body)
}
// -------------------------------------
// goRpc is the implementation of Rpc that uses the communication protocol
// as defined in net/rpc package.
type goRpc struct{}
// GoRpc implements Rpc using the communication protocol defined in net/rpc package.
// Its methods (ServerCodec and ClientCodec) return values that implement RpcCodecBuffered.
var GoRpc goRpc
func (x goRpc) ServerCodec(conn io.ReadWriteCloser, h Handle) rpc.ServerCodec {
return &goRpcCodec{newRPCCodec(conn, h)}
}
func (x goRpc) ClientCodec(conn io.ReadWriteCloser, h Handle) rpc.ClientCodec {
return &goRpcCodec{newRPCCodec(conn, h)}
}
var _ RpcCodecBuffered = (*rpcCodec)(nil) // ensure *rpcCodec implements RpcCodecBuffered

View File

@ -1,461 +0,0 @@
// Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
// Use of this source code is governed by a BSD-style license found in the LICENSE file.
package codec
import "math"
const (
_ uint8 = iota
simpleVdNil = 1
simpleVdFalse = 2
simpleVdTrue = 3
simpleVdFloat32 = 4
simpleVdFloat64 = 5
// each lasts for 4 (ie n, n+1, n+2, n+3)
simpleVdPosInt = 8
simpleVdNegInt = 12
// containers: each lasts for 4 (ie n, n+1, n+2, ... n+7)
simpleVdString = 216
simpleVdByteArray = 224
simpleVdArray = 232
simpleVdMap = 240
simpleVdExt = 248
)
type simpleEncDriver struct {
h *SimpleHandle
w encWriter
//b [8]byte
}
func (e *simpleEncDriver) isBuiltinType(rt uintptr) bool {
return false
}
func (e *simpleEncDriver) encodeBuiltin(rt uintptr, v interface{}) {
}
func (e *simpleEncDriver) encodeNil() {
e.w.writen1(simpleVdNil)
}
func (e *simpleEncDriver) encodeBool(b bool) {
if b {
e.w.writen1(simpleVdTrue)
} else {
e.w.writen1(simpleVdFalse)
}
}
func (e *simpleEncDriver) encodeFloat32(f float32) {
e.w.writen1(simpleVdFloat32)
e.w.writeUint32(math.Float32bits(f))
}
func (e *simpleEncDriver) encodeFloat64(f float64) {
e.w.writen1(simpleVdFloat64)
e.w.writeUint64(math.Float64bits(f))
}
func (e *simpleEncDriver) encodeInt(v int64) {
if v < 0 {
e.encUint(uint64(-v), simpleVdNegInt)
} else {
e.encUint(uint64(v), simpleVdPosInt)
}
}
func (e *simpleEncDriver) encodeUint(v uint64) {
e.encUint(v, simpleVdPosInt)
}
func (e *simpleEncDriver) encUint(v uint64, bd uint8) {
switch {
case v <= math.MaxUint8:
e.w.writen2(bd, uint8(v))
case v <= math.MaxUint16:
e.w.writen1(bd + 1)
e.w.writeUint16(uint16(v))
case v <= math.MaxUint32:
e.w.writen1(bd + 2)
e.w.writeUint32(uint32(v))
case v <= math.MaxUint64:
e.w.writen1(bd + 3)
e.w.writeUint64(v)
}
}
func (e *simpleEncDriver) encLen(bd byte, length int) {
switch {
case length == 0:
e.w.writen1(bd)
case length <= math.MaxUint8:
e.w.writen1(bd + 1)
e.w.writen1(uint8(length))
case length <= math.MaxUint16:
e.w.writen1(bd + 2)
e.w.writeUint16(uint16(length))
case int64(length) <= math.MaxUint32:
e.w.writen1(bd + 3)
e.w.writeUint32(uint32(length))
default:
e.w.writen1(bd + 4)
e.w.writeUint64(uint64(length))
}
}
func (e *simpleEncDriver) encodeExtPreamble(xtag byte, length int) {
e.encLen(simpleVdExt, length)
e.w.writen1(xtag)
}
func (e *simpleEncDriver) encodeArrayPreamble(length int) {
e.encLen(simpleVdArray, length)
}
func (e *simpleEncDriver) encodeMapPreamble(length int) {
e.encLen(simpleVdMap, length)
}
func (e *simpleEncDriver) encodeString(c charEncoding, v string) {
e.encLen(simpleVdString, len(v))
e.w.writestr(v)
}
func (e *simpleEncDriver) encodeSymbol(v string) {
e.encodeString(c_UTF8, v)
}
func (e *simpleEncDriver) encodeStringBytes(c charEncoding, v []byte) {
e.encLen(simpleVdByteArray, len(v))
e.w.writeb(v)
}
//------------------------------------
type simpleDecDriver struct {
h *SimpleHandle
r decReader
bdRead bool
bdType valueType
bd byte
//b [8]byte
}
func (d *simpleDecDriver) initReadNext() {
if d.bdRead {
return
}
d.bd = d.r.readn1()
d.bdRead = true
d.bdType = valueTypeUnset
}
func (d *simpleDecDriver) currentEncodedType() valueType {
if d.bdType == valueTypeUnset {
switch d.bd {
case simpleVdNil:
d.bdType = valueTypeNil
case simpleVdTrue, simpleVdFalse:
d.bdType = valueTypeBool
case simpleVdPosInt, simpleVdPosInt + 1, simpleVdPosInt + 2, simpleVdPosInt + 3:
d.bdType = valueTypeUint
case simpleVdNegInt, simpleVdNegInt + 1, simpleVdNegInt + 2, simpleVdNegInt + 3:
d.bdType = valueTypeInt
case simpleVdFloat32, simpleVdFloat64:
d.bdType = valueTypeFloat
case simpleVdString, simpleVdString + 1, simpleVdString + 2, simpleVdString + 3, simpleVdString + 4:
d.bdType = valueTypeString
case simpleVdByteArray, simpleVdByteArray + 1, simpleVdByteArray + 2, simpleVdByteArray + 3, simpleVdByteArray + 4:
d.bdType = valueTypeBytes
case simpleVdExt, simpleVdExt + 1, simpleVdExt + 2, simpleVdExt + 3, simpleVdExt + 4:
d.bdType = valueTypeExt
case simpleVdArray, simpleVdArray + 1, simpleVdArray + 2, simpleVdArray + 3, simpleVdArray + 4:
d.bdType = valueTypeArray
case simpleVdMap, simpleVdMap + 1, simpleVdMap + 2, simpleVdMap + 3, simpleVdMap + 4:
d.bdType = valueTypeMap
default:
decErr("currentEncodedType: Unrecognized d.vd: 0x%x", d.bd)
}
}
return d.bdType
}
func (d *simpleDecDriver) tryDecodeAsNil() bool {
if d.bd == simpleVdNil {
d.bdRead = false
return true
}
return false
}
func (d *simpleDecDriver) isBuiltinType(rt uintptr) bool {
return false
}
func (d *simpleDecDriver) decodeBuiltin(rt uintptr, v interface{}) {
}
func (d *simpleDecDriver) decIntAny() (ui uint64, i int64, neg bool) {
switch d.bd {
case simpleVdPosInt:
ui = uint64(d.r.readn1())
i = int64(ui)
case simpleVdPosInt + 1:
ui = uint64(d.r.readUint16())
i = int64(ui)
case simpleVdPosInt + 2:
ui = uint64(d.r.readUint32())
i = int64(ui)
case simpleVdPosInt + 3:
ui = uint64(d.r.readUint64())
i = int64(ui)
case simpleVdNegInt:
ui = uint64(d.r.readn1())
i = -(int64(ui))
neg = true
case simpleVdNegInt + 1:
ui = uint64(d.r.readUint16())
i = -(int64(ui))
neg = true
case simpleVdNegInt + 2:
ui = uint64(d.r.readUint32())
i = -(int64(ui))
neg = true
case simpleVdNegInt + 3:
ui = uint64(d.r.readUint64())
i = -(int64(ui))
neg = true
default:
decErr("decIntAny: Integer only valid from pos/neg integer1..8. Invalid descriptor: %v", d.bd)
}
// don't do this check, because callers may only want the unsigned value.
// if ui > math.MaxInt64 {
// decErr("decIntAny: Integer out of range for signed int64: %v", ui)
// }
return
}
func (d *simpleDecDriver) decodeInt(bitsize uint8) (i int64) {
_, i, _ = d.decIntAny()
checkOverflow(0, i, bitsize)
d.bdRead = false
return
}
func (d *simpleDecDriver) decodeUint(bitsize uint8) (ui uint64) {
ui, i, neg := d.decIntAny()
if neg {
decErr("Assigning negative signed value: %v, to unsigned type", i)
}
checkOverflow(ui, 0, bitsize)
d.bdRead = false
return
}
func (d *simpleDecDriver) decodeFloat(chkOverflow32 bool) (f float64) {
switch d.bd {
case simpleVdFloat32:
f = float64(math.Float32frombits(d.r.readUint32()))
case simpleVdFloat64:
f = math.Float64frombits(d.r.readUint64())
default:
if d.bd >= simpleVdPosInt && d.bd <= simpleVdNegInt+3 {
_, i, _ := d.decIntAny()
f = float64(i)
} else {
decErr("Float only valid from float32/64: Invalid descriptor: %v", d.bd)
}
}
checkOverflowFloat32(f, chkOverflow32)
d.bdRead = false
return
}
// bool can be decoded from bool only (single byte).
func (d *simpleDecDriver) decodeBool() (b bool) {
switch d.bd {
case simpleVdTrue:
b = true
case simpleVdFalse:
default:
decErr("Invalid single-byte value for bool: %s: %x", msgBadDesc, d.bd)
}
d.bdRead = false
return
}
func (d *simpleDecDriver) readMapLen() (length int) {
d.bdRead = false
return d.decLen()
}
func (d *simpleDecDriver) readArrayLen() (length int) {
d.bdRead = false
return d.decLen()
}
func (d *simpleDecDriver) decLen() int {
switch d.bd % 8 {
case 0:
return 0
case 1:
return int(d.r.readn1())
case 2:
return int(d.r.readUint16())
case 3:
ui := uint64(d.r.readUint32())
checkOverflow(ui, 0, intBitsize)
return int(ui)
case 4:
ui := d.r.readUint64()
checkOverflow(ui, 0, intBitsize)
return int(ui)
}
decErr("decLen: Cannot read length: bd%8 must be in range 0..4. Got: %d", d.bd%8)
return -1
}
func (d *simpleDecDriver) decodeString() (s string) {
s = string(d.r.readn(d.decLen()))
d.bdRead = false
return
}
func (d *simpleDecDriver) decodeBytes(bs []byte) (bsOut []byte, changed bool) {
if clen := d.decLen(); clen > 0 {
// if no contents in stream, don't update the passed byteslice
if len(bs) != clen {
if len(bs) > clen {
bs = bs[:clen]
} else {
bs = make([]byte, clen)
}
bsOut = bs
changed = true
}
d.r.readb(bs)
}
d.bdRead = false
return
}
func (d *simpleDecDriver) decodeExt(verifyTag bool, tag byte) (xtag byte, xbs []byte) {
switch d.bd {
case simpleVdExt, simpleVdExt + 1, simpleVdExt + 2, simpleVdExt + 3, simpleVdExt + 4:
l := d.decLen()
xtag = d.r.readn1()
if verifyTag && xtag != tag {
decErr("Wrong extension tag. Got %b. Expecting: %v", xtag, tag)
}
xbs = d.r.readn(l)
case simpleVdByteArray, simpleVdByteArray + 1, simpleVdByteArray + 2, simpleVdByteArray + 3, simpleVdByteArray + 4:
xbs, _ = d.decodeBytes(nil)
default:
decErr("Invalid d.vd for extensions (Expecting extensions or byte array). Got: 0x%x", d.bd)
}
d.bdRead = false
return
}
func (d *simpleDecDriver) decodeNaked() (v interface{}, vt valueType, decodeFurther bool) {
d.initReadNext()
switch d.bd {
case simpleVdNil:
vt = valueTypeNil
case simpleVdFalse:
vt = valueTypeBool
v = false
case simpleVdTrue:
vt = valueTypeBool
v = true
case simpleVdPosInt, simpleVdPosInt + 1, simpleVdPosInt + 2, simpleVdPosInt + 3:
vt = valueTypeUint
ui, _, _ := d.decIntAny()
v = ui
case simpleVdNegInt, simpleVdNegInt + 1, simpleVdNegInt + 2, simpleVdNegInt + 3:
vt = valueTypeInt
_, i, _ := d.decIntAny()
v = i
case simpleVdFloat32:
vt = valueTypeFloat
v = d.decodeFloat(true)
case simpleVdFloat64:
vt = valueTypeFloat
v = d.decodeFloat(false)
case simpleVdString, simpleVdString + 1, simpleVdString + 2, simpleVdString + 3, simpleVdString + 4:
vt = valueTypeString
v = d.decodeString()
case simpleVdByteArray, simpleVdByteArray + 1, simpleVdByteArray + 2, simpleVdByteArray + 3, simpleVdByteArray + 4:
vt = valueTypeBytes
v, _ = d.decodeBytes(nil)
case simpleVdExt, simpleVdExt + 1, simpleVdExt + 2, simpleVdExt + 3, simpleVdExt + 4:
vt = valueTypeExt
l := d.decLen()
var re RawExt
re.Tag = d.r.readn1()
re.Data = d.r.readn(l)
v = &re
vt = valueTypeExt
case simpleVdArray, simpleVdArray + 1, simpleVdArray + 2, simpleVdArray + 3, simpleVdArray + 4:
vt = valueTypeArray
decodeFurther = true
case simpleVdMap, simpleVdMap + 1, simpleVdMap + 2, simpleVdMap + 3, simpleVdMap + 4:
vt = valueTypeMap
decodeFurther = true
default:
decErr("decodeNaked: Unrecognized d.vd: 0x%x", d.bd)
}
if !decodeFurther {
d.bdRead = false
}
return
}
//------------------------------------
// SimpleHandle is a Handle for a very simple encoding format.
//
// simple is a simplistic codec similar to binc, but not as compact.
// - Encoding of a value is always preceeded by the descriptor byte (bd)
// - True, false, nil are encoded fully in 1 byte (the descriptor)
// - Integers (intXXX, uintXXX) are encoded in 1, 2, 4 or 8 bytes (plus a descriptor byte).
// There are positive (uintXXX and intXXX >= 0) and negative (intXXX < 0) integers.
// - Floats are encoded in 4 or 8 bytes (plus a descriptor byte)
// - Lenght of containers (strings, bytes, array, map, extensions)
// are encoded in 0, 1, 2, 4 or 8 bytes.
// Zero-length containers have no length encoded.
// For others, the number of bytes is given by pow(2, bd%3)
// - maps are encoded as [bd] [length] [[key][value]]...
// - arrays are encoded as [bd] [length] [value]...
// - extensions are encoded as [bd] [length] [tag] [byte]...
// - strings/bytearrays are encoded as [bd] [length] [byte]...
//
// The full spec will be published soon.
type SimpleHandle struct {
BasicHandle
}
func (h *SimpleHandle) newEncDriver(w encWriter) encDriver {
return &simpleEncDriver{w: w, h: h}
}
func (h *SimpleHandle) newDecDriver(r decReader) decDriver {
return &simpleDecDriver{r: r, h: h}
}
func (_ *SimpleHandle) writeExt() bool {
return true
}
func (h *SimpleHandle) getBasicHandle() *BasicHandle {
return &h.BasicHandle
}
var _ decDriver = (*simpleDecDriver)(nil)
var _ encDriver = (*simpleEncDriver)(nil)

View File

@ -1,193 +0,0 @@
// Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
// Use of this source code is governed by a BSD-style license found in the LICENSE file.
package codec
import (
"time"
)
var (
timeDigits = [...]byte{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
)
// EncodeTime encodes a time.Time as a []byte, including
// information on the instant in time and UTC offset.
//
// Format Description
//
// A timestamp is composed of 3 components:
//
// - secs: signed integer representing seconds since unix epoch
// - nsces: unsigned integer representing fractional seconds as a
// nanosecond offset within secs, in the range 0 <= nsecs < 1e9
// - tz: signed integer representing timezone offset in minutes east of UTC,
// and a dst (daylight savings time) flag
//
// When encoding a timestamp, the first byte is the descriptor, which
// defines which components are encoded and how many bytes are used to
// encode secs and nsecs components. *If secs/nsecs is 0 or tz is UTC, it
// is not encoded in the byte array explicitly*.
//
// Descriptor 8 bits are of the form `A B C DDD EE`:
// A: Is secs component encoded? 1 = true
// B: Is nsecs component encoded? 1 = true
// C: Is tz component encoded? 1 = true
// DDD: Number of extra bytes for secs (range 0-7).
// If A = 1, secs encoded in DDD+1 bytes.
// If A = 0, secs is not encoded, and is assumed to be 0.
// If A = 1, then we need at least 1 byte to encode secs.
// DDD says the number of extra bytes beyond that 1.
// E.g. if DDD=0, then secs is represented in 1 byte.
// if DDD=2, then secs is represented in 3 bytes.
// EE: Number of extra bytes for nsecs (range 0-3).
// If B = 1, nsecs encoded in EE+1 bytes (similar to secs/DDD above)
//
// Following the descriptor bytes, subsequent bytes are:
//
// secs component encoded in `DDD + 1` bytes (if A == 1)
// nsecs component encoded in `EE + 1` bytes (if B == 1)
// tz component encoded in 2 bytes (if C == 1)
//
// secs and nsecs components are integers encoded in a BigEndian
// 2-complement encoding format.
//
// tz component is encoded as 2 bytes (16 bits). Most significant bit 15 to
// Least significant bit 0 are described below:
//
// Timezone offset has a range of -12:00 to +14:00 (ie -720 to +840 minutes).
// Bit 15 = have\_dst: set to 1 if we set the dst flag.
// Bit 14 = dst\_on: set to 1 if dst is in effect at the time, or 0 if not.
// Bits 13..0 = timezone offset in minutes. It is a signed integer in Big Endian format.
//
func encodeTime(t time.Time) []byte {
//t := rv.Interface().(time.Time)
tsecs, tnsecs := t.Unix(), t.Nanosecond()
var (
bd byte
btmp [8]byte
bs [16]byte
i int = 1
)
l := t.Location()
if l == time.UTC {
l = nil
}
if tsecs != 0 {
bd = bd | 0x80
bigen.PutUint64(btmp[:], uint64(tsecs))
f := pruneSignExt(btmp[:], tsecs >= 0)
bd = bd | (byte(7-f) << 2)
copy(bs[i:], btmp[f:])
i = i + (8 - f)
}
if tnsecs != 0 {
bd = bd | 0x40
bigen.PutUint32(btmp[:4], uint32(tnsecs))
f := pruneSignExt(btmp[:4], true)
bd = bd | byte(3-f)
copy(bs[i:], btmp[f:4])
i = i + (4 - f)
}
if l != nil {
bd = bd | 0x20
// Note that Go Libs do not give access to dst flag.
_, zoneOffset := t.Zone()
//zoneName, zoneOffset := t.Zone()
zoneOffset /= 60
z := uint16(zoneOffset)
bigen.PutUint16(btmp[:2], z)
// clear dst flags
bs[i] = btmp[0] & 0x3f
bs[i+1] = btmp[1]
i = i + 2
}
bs[0] = bd
return bs[0:i]
}
// DecodeTime decodes a []byte into a time.Time.
func decodeTime(bs []byte) (tt time.Time, err error) {
bd := bs[0]
var (
tsec int64
tnsec uint32
tz uint16
i byte = 1
i2 byte
n byte
)
if bd&(1<<7) != 0 {
var btmp [8]byte
n = ((bd >> 2) & 0x7) + 1
i2 = i + n
copy(btmp[8-n:], bs[i:i2])
//if first bit of bs[i] is set, then fill btmp[0..8-n] with 0xff (ie sign extend it)
if bs[i]&(1<<7) != 0 {
copy(btmp[0:8-n], bsAll0xff)
//for j,k := byte(0), 8-n; j < k; j++ { btmp[j] = 0xff }
}
i = i2
tsec = int64(bigen.Uint64(btmp[:]))
}
if bd&(1<<6) != 0 {
var btmp [4]byte
n = (bd & 0x3) + 1
i2 = i + n
copy(btmp[4-n:], bs[i:i2])
i = i2
tnsec = bigen.Uint32(btmp[:])
}
if bd&(1<<5) == 0 {
tt = time.Unix(tsec, int64(tnsec)).UTC()
return
}
// In stdlib time.Parse, when a date is parsed without a zone name, it uses "" as zone name.
// However, we need name here, so it can be shown when time is printed.
// Zone name is in form: UTC-08:00.
// Note that Go Libs do not give access to dst flag, so we ignore dst bits
i2 = i + 2
tz = bigen.Uint16(bs[i:i2])
i = i2
// sign extend sign bit into top 2 MSB (which were dst bits):
if tz&(1<<13) == 0 { // positive
tz = tz & 0x3fff //clear 2 MSBs: dst bits
} else { // negative
tz = tz | 0xc000 //set 2 MSBs: dst bits
//tzname[3] = '-' (TODO: verify. this works here)
}
tzint := int16(tz)
if tzint == 0 {
tt = time.Unix(tsec, int64(tnsec)).UTC()
} else {
// For Go Time, do not use a descriptive timezone.
// It's unnecessary, and makes it harder to do a reflect.DeepEqual.
// The Offset already tells what the offset should be, if not on UTC and unknown zone name.
// var zoneName = timeLocUTCName(tzint)
tt = time.Unix(tsec, int64(tnsec)).In(time.FixedZone("", int(tzint)*60))
}
return
}
func timeLocUTCName(tzint int16) string {
if tzint == 0 {
return "UTC"
}
var tzname = []byte("UTC+00:00")
//tzname := fmt.Sprintf("UTC%s%02d:%02d", tzsign, tz/60, tz%60) //perf issue using Sprintf. inline below.
//tzhr, tzmin := tz/60, tz%60 //faster if u convert to int first
var tzhr, tzmin int16
if tzint < 0 {
tzname[3] = '-' // (TODO: verify. this works here)
tzhr, tzmin = -tzint/60, (-tzint)%60
} else {
tzhr, tzmin = tzint/60, tzint%60
}
tzname[4] = timeDigits[tzhr/10]
tzname[5] = timeDigits[tzhr%10]
tzname[7] = timeDigits[tzmin/10]
tzname[8] = timeDigits[tzmin%10]
return string(tzname)
//return time.FixedZone(string(tzname), int(tzint)*60)
}

View File

@ -1,362 +0,0 @@
Mozilla Public License, version 2.0
1. Definitions
1.1. "Contributor"
means each individual or legal entity that creates, contributes to the
creation of, or owns Covered Software.
1.2. "Contributor Version"
means the combination of the Contributions of others (if any) used by a
Contributor and that particular Contributor's Contribution.
1.3. "Contribution"
means Covered Software of a particular Contributor.
1.4. "Covered Software"
means Source Code Form to which the initial Contributor has attached the
notice in Exhibit A, the Executable Form of such Source Code Form, and
Modifications of such Source Code Form, in each case including portions
thereof.
1.5. "Incompatible With Secondary Licenses"
means
a. that the initial Contributor has attached the notice described in
Exhibit B to the Covered Software; or
b. that the Covered Software was made available under the terms of
version 1.1 or earlier of the License, but not also under the terms of
a Secondary License.
1.6. "Executable Form"
means any form of the work other than Source Code Form.
1.7. "Larger Work"
means a work that combines Covered Software with other material, in a
separate file or files, that is not Covered Software.
1.8. "License"
means this document.
1.9. "Licensable"
means having the right to grant, to the maximum extent possible, whether
at the time of the initial grant or subsequently, any and all of the
rights conveyed by this License.
1.10. "Modifications"
means any of the following:
a. any file in Source Code Form that results from an addition to,
deletion from, or modification of the contents of Covered Software; or
b. any new file in Source Code Form that contains any Covered Software.
1.11. "Patent Claims" of a Contributor
means any patent claim(s), including without limitation, method,
process, and apparatus claims, in any patent Licensable by such
Contributor that would be infringed, but for the grant of the License,
by the making, using, selling, offering for sale, having made, import,
or transfer of either its Contributions or its Contributor Version.
1.12. "Secondary License"
means either the GNU General Public License, Version 2.0, the GNU Lesser
General Public License, Version 2.1, the GNU Affero General Public
License, Version 3.0, or any later versions of those licenses.
1.13. "Source Code Form"
means the form of the work preferred for making modifications.
1.14. "You" (or "Your")
means an individual or a legal entity exercising rights under this
License. For legal entities, "You" includes any entity that controls, is
controlled by, or is under common control with You. For purposes of this
definition, "control" means (a) the power, direct or indirect, to cause
the direction or management of such entity, whether by contract or
otherwise, or (b) ownership of more than fifty percent (50%) of the
outstanding shares or beneficial ownership of such entity.
2. License Grants and Conditions
2.1. Grants
Each Contributor hereby grants You a world-wide, royalty-free,
non-exclusive license:
a. under intellectual property rights (other than patent or trademark)
Licensable by such Contributor to use, reproduce, make available,
modify, display, perform, distribute, and otherwise exploit its
Contributions, either on an unmodified basis, with Modifications, or
as part of a Larger Work; and
b. under Patent Claims of such Contributor to make, use, sell, offer for
sale, have made, import, and otherwise transfer either its
Contributions or its Contributor Version.
2.2. Effective Date
The licenses granted in Section 2.1 with respect to any Contribution
become effective for each Contribution on the date the Contributor first
distributes such Contribution.
2.3. Limitations on Grant Scope
The licenses granted in this Section 2 are the only rights granted under
this License. No additional rights or licenses will be implied from the
distribution or licensing of Covered Software under this License.
Notwithstanding Section 2.1(b) above, no patent license is granted by a
Contributor:
a. for any code that a Contributor has removed from Covered Software; or
b. for infringements caused by: (i) Your and any other third party's
modifications of Covered Software, or (ii) the combination of its
Contributions with other software (except as part of its Contributor
Version); or
c. under Patent Claims infringed by Covered Software in the absence of
its Contributions.
This License does not grant any rights in the trademarks, service marks,
or logos of any Contributor (except as may be necessary to comply with
the notice requirements in Section 3.4).
2.4. Subsequent Licenses
No Contributor makes additional grants as a result of Your choice to
distribute the Covered Software under a subsequent version of this
License (see Section 10.2) or under the terms of a Secondary License (if
permitted under the terms of Section 3.3).
2.5. Representation
Each Contributor represents that the Contributor believes its
Contributions are its original creation(s) or it has sufficient rights to
grant the rights to its Contributions conveyed by this License.
2.6. Fair Use
This License is not intended to limit any rights You have under
applicable copyright doctrines of fair use, fair dealing, or other
equivalents.
2.7. Conditions
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in
Section 2.1.
3. Responsibilities
3.1. Distribution of Source Form
All distribution of Covered Software in Source Code Form, including any
Modifications that You create or to which You contribute, must be under
the terms of this License. You must inform recipients that the Source
Code Form of the Covered Software is governed by the terms of this
License, and how they can obtain a copy of this License. You may not
attempt to alter or restrict the recipients' rights in the Source Code
Form.
3.2. Distribution of Executable Form
If You distribute Covered Software in Executable Form then:
a. such Covered Software must also be made available in Source Code Form,
as described in Section 3.1, and You must inform recipients of the
Executable Form how they can obtain a copy of such Source Code Form by
reasonable means in a timely manner, at a charge no more than the cost
of distribution to the recipient; and
b. You may distribute such Executable Form under the terms of this
License, or sublicense it under different terms, provided that the
license for the Executable Form does not attempt to limit or alter the
recipients' rights in the Source Code Form under this License.
3.3. Distribution of a Larger Work
You may create and distribute a Larger Work under terms of Your choice,
provided that You also comply with the requirements of this License for
the Covered Software. If the Larger Work is a combination of Covered
Software with a work governed by one or more Secondary Licenses, and the
Covered Software is not Incompatible With Secondary Licenses, this
License permits You to additionally distribute such Covered Software
under the terms of such Secondary License(s), so that the recipient of
the Larger Work may, at their option, further distribute the Covered
Software under the terms of either this License or such Secondary
License(s).
3.4. Notices
You may not remove or alter the substance of any license notices
(including copyright notices, patent notices, disclaimers of warranty, or
limitations of liability) contained within the Source Code Form of the
Covered Software, except that You may alter any license notices to the
extent required to remedy known factual inaccuracies.
3.5. Application of Additional Terms
You may choose to offer, and to charge a fee for, warranty, support,
indemnity or liability obligations to one or more recipients of Covered
Software. However, You may do so only on Your own behalf, and not on
behalf of any Contributor. You must make it absolutely clear that any
such warranty, support, indemnity, or liability obligation is offered by
You alone, and You hereby agree to indemnify every Contributor for any
liability incurred by such Contributor as a result of warranty, support,
indemnity or liability terms You offer. You may include additional
disclaimers of warranty and limitations of liability specific to any
jurisdiction.
4. Inability to Comply Due to Statute or Regulation
If it is impossible for You to comply with any of the terms of this License
with respect to some or all of the Covered Software due to statute,
judicial order, or regulation then You must: (a) comply with the terms of
this License to the maximum extent possible; and (b) describe the
limitations and the code they affect. Such description must be placed in a
text file included with all distributions of the Covered Software under
this License. Except to the extent prohibited by statute or regulation,
such description must be sufficiently detailed for a recipient of ordinary
skill to be able to understand it.
5. Termination
5.1. The rights granted under this License will terminate automatically if You
fail to comply with any of its terms. However, if You become compliant,
then the rights granted under this License from a particular Contributor
are reinstated (a) provisionally, unless and until such Contributor
explicitly and finally terminates Your grants, and (b) on an ongoing
basis, if such Contributor fails to notify You of the non-compliance by
some reasonable means prior to 60 days after You have come back into
compliance. Moreover, Your grants from a particular Contributor are
reinstated on an ongoing basis if such Contributor notifies You of the
non-compliance by some reasonable means, this is the first time You have
received notice of non-compliance with this License from such
Contributor, and You become compliant prior to 30 days after Your receipt
of the notice.
5.2. If You initiate litigation against any entity by asserting a patent
infringement claim (excluding declaratory judgment actions,
counter-claims, and cross-claims) alleging that a Contributor Version
directly or indirectly infringes any patent, then the rights granted to
You by any and all Contributors for the Covered Software under Section
2.1 of this License shall terminate.
5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user
license agreements (excluding distributors and resellers) which have been
validly granted by You or Your distributors under this License prior to
termination shall survive termination.
6. Disclaimer of Warranty
Covered Software is provided under this License on an "as is" basis,
without warranty of any kind, either expressed, implied, or statutory,
including, without limitation, warranties that the Covered Software is free
of defects, merchantable, fit for a particular purpose or non-infringing.
The entire risk as to the quality and performance of the Covered Software
is with You. Should any Covered Software prove defective in any respect,
You (not any Contributor) assume the cost of any necessary servicing,
repair, or correction. This disclaimer of warranty constitutes an essential
part of this License. No use of any Covered Software is authorized under
this License except under this disclaimer.
7. Limitation of Liability
Under no circumstances and under no legal theory, whether tort (including
negligence), contract, or otherwise, shall any Contributor, or anyone who
distributes Covered Software as permitted above, be liable to You for any
direct, indirect, special, incidental, or consequential damages of any
character including, without limitation, damages for lost profits, loss of
goodwill, work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses, even if such party shall have been
informed of the possibility of such damages. This limitation of liability
shall not apply to liability for death or personal injury resulting from
such party's negligence to the extent applicable law prohibits such
limitation. Some jurisdictions do not allow the exclusion or limitation of
incidental or consequential damages, so this exclusion and limitation may
not apply to You.
8. Litigation
Any litigation relating to this License may be brought only in the courts
of a jurisdiction where the defendant maintains its principal place of
business and such litigation shall be governed by laws of that
jurisdiction, without reference to its conflict-of-law provisions. Nothing
in this Section shall prevent a party's ability to bring cross-claims or
counter-claims.
9. Miscellaneous
This License represents the complete agreement concerning the subject
matter hereof. If any provision of this License is held to be
unenforceable, such provision shall be reformed only to the extent
necessary to make it enforceable. Any law or regulation which provides that
the language of a contract shall be construed against the drafter shall not
be used to construe this License against a Contributor.
10. Versions of the License
10.1. New Versions
Mozilla Foundation is the license steward. Except as provided in Section
10.3, no one other than the license steward has the right to modify or
publish new versions of this License. Each version will be given a
distinguishing version number.
10.2. Effect of New Versions
You may distribute the Covered Software under the terms of the version
of the License under which You originally received the Covered Software,
or under the terms of any subsequent version published by the license
steward.
10.3. Modified Versions
If you create software not governed by this License, and you want to
create a new license for such software, you may create and use a
modified version of this License if you rename the license and remove
any references to the name of the license steward (except to note that
such modified license differs from this License).
10.4. Distributing Source Code Form that is Incompatible With Secondary
Licenses If You choose to distribute Source Code Form that is
Incompatible With Secondary Licenses under the terms of this version of
the License, the notice described in Exhibit B of this License must be
attached.
Exhibit A - Source Code Form License Notice
This Source Code Form is subject to the
terms of the Mozilla Public License, v.
2.0. If a copy of the MPL was not
distributed with this file, You can
obtain one at
http://mozilla.org/MPL/2.0/.
If it is not possible or desirable to put the notice in a particular file,
then You may include the notice in a location (such as a LICENSE file in a
relevant directory) where a recipient would be likely to look for such a
notice.
You may add additional accurate notices of copyright ownership.
Exhibit B - "Incompatible With Secondary Licenses" Notice
This Source Code Form is "Incompatible
With Secondary Licenses", as defined by
the Mozilla Public License, v. 2.0.

View File

@ -1,11 +0,0 @@
raft-boltdb
===========
This repository provides the `raftboltdb` package. The package exports the
`BoltStore` which is an implementation of both a `LogStore` and `StableStore`.
It is meant to be used as a backend for the `raft` [package
here](https://github.com/hashicorp/raft).
This implementation uses [BoltDB](https://github.com/boltdb/bolt). BoltDB is
a simple key/value store implemented in pure Go, and inspired by LMDB.

View File

@ -1,231 +0,0 @@
package raftboltdb
import (
"errors"
"github.com/boltdb/bolt"
"github.com/hashicorp/raft"
)
const (
// Permissions to use on the db file. This is only used if the
// database file does not exist and needs to be created.
dbFileMode = 0600
)
var (
// Bucket names we perform transactions in
dbLogs = []byte("logs")
dbConf = []byte("conf")
// An error indicating a given key does not exist
ErrKeyNotFound = errors.New("not found")
)
// BoltStore provides access to BoltDB for Raft to store and retrieve
// log entries. It also provides key/value storage, and can be used as
// a LogStore and StableStore.
type BoltStore struct {
// conn is the underlying handle to the db.
conn *bolt.DB
// The path to the Bolt database file
path string
}
// NewBoltStore takes a file path and returns a connected Raft backend.
func NewBoltStore(path string) (*BoltStore, error) {
// Try to connect
handle, err := bolt.Open(path, dbFileMode, nil)
if err != nil {
return nil, err
}
// Create the new store
store := &BoltStore{
conn: handle,
path: path,
}
// Set up our buckets
if err := store.initialize(); err != nil {
store.Close()
return nil, err
}
return store, nil
}
// initialize is used to set up all of the buckets.
func (b *BoltStore) initialize() error {
tx, err := b.conn.Begin(true)
if err != nil {
return err
}
defer tx.Rollback()
// Create all the buckets
if _, err := tx.CreateBucketIfNotExists(dbLogs); err != nil {
return err
}
if _, err := tx.CreateBucketIfNotExists(dbConf); err != nil {
return err
}
return tx.Commit()
}
// Close is used to gracefully close the DB connection.
func (b *BoltStore) Close() error {
return b.conn.Close()
}
// FirstIndex returns the first known index from the Raft log.
func (b *BoltStore) FirstIndex() (uint64, error) {
tx, err := b.conn.Begin(false)
if err != nil {
return 0, err
}
defer tx.Rollback()
curs := tx.Bucket(dbLogs).Cursor()
if first, _ := curs.First(); first == nil {
return 0, nil
} else {
return bytesToUint64(first), nil
}
}
// LastIndex returns the last known index from the Raft log.
func (b *BoltStore) LastIndex() (uint64, error) {
tx, err := b.conn.Begin(false)
if err != nil {
return 0, err
}
defer tx.Rollback()
curs := tx.Bucket(dbLogs).Cursor()
if last, _ := curs.Last(); last == nil {
return 0, nil
} else {
return bytesToUint64(last), nil
}
}
// GetLog is used to retrieve a log from BoltDB at a given index.
func (b *BoltStore) GetLog(idx uint64, log *raft.Log) error {
tx, err := b.conn.Begin(false)
if err != nil {
return err
}
defer tx.Rollback()
bucket := tx.Bucket(dbLogs)
val := bucket.Get(uint64ToBytes(idx))
if val == nil {
return raft.ErrLogNotFound
}
return decodeMsgPack(val, log)
}
// StoreLog is used to store a single raft log
func (b *BoltStore) StoreLog(log *raft.Log) error {
return b.StoreLogs([]*raft.Log{log})
}
// StoreLogs is used to store a set of raft logs
func (b *BoltStore) StoreLogs(logs []*raft.Log) error {
tx, err := b.conn.Begin(true)
if err != nil {
return err
}
defer tx.Rollback()
for _, log := range logs {
key := uint64ToBytes(log.Index)
val, err := encodeMsgPack(log)
if err != nil {
return err
}
bucket := tx.Bucket(dbLogs)
if err := bucket.Put(key, val.Bytes()); err != nil {
return err
}
}
return tx.Commit()
}
// DeleteRange is used to delete logs within a given range inclusively.
func (b *BoltStore) DeleteRange(min, max uint64) error {
minKey := uint64ToBytes(min)
tx, err := b.conn.Begin(true)
if err != nil {
return err
}
defer tx.Rollback()
curs := tx.Bucket(dbLogs).Cursor()
for k, _ := curs.Seek(minKey); k != nil; k, _ = curs.Next() {
// Handle out-of-range log index
if bytesToUint64(k) > max {
break
}
// Delete in-range log index
if err := curs.Delete(); err != nil {
return err
}
}
return tx.Commit()
}
// Set is used to set a key/value set outside of the raft log
func (b *BoltStore) Set(k, v []byte) error {
tx, err := b.conn.Begin(true)
if err != nil {
return err
}
defer tx.Rollback()
bucket := tx.Bucket(dbConf)
if err := bucket.Put(k, v); err != nil {
return err
}
return tx.Commit()
}
// Get is used to retrieve a value from the k/v store by key
func (b *BoltStore) Get(k []byte) ([]byte, error) {
tx, err := b.conn.Begin(false)
if err != nil {
return nil, err
}
defer tx.Rollback()
bucket := tx.Bucket(dbConf)
val := bucket.Get(k)
if val == nil {
return nil, ErrKeyNotFound
}
return append([]byte{}, val...), nil
}
// SetUint64 is like Set, but handles uint64 values
func (b *BoltStore) SetUint64(key []byte, val uint64) error {
return b.Set(key, uint64ToBytes(val))
}
// GetUint64 is like Get, but handles uint64 values
func (b *BoltStore) GetUint64(key []byte) (uint64, error) {
val, err := b.Get(key)
if err != nil {
return 0, err
}
return bytesToUint64(val), nil
}

View File

@ -1,37 +0,0 @@
package raftboltdb
import (
"bytes"
"encoding/binary"
"github.com/hashicorp/go-msgpack/codec"
)
// Decode reverses the encode operation on a byte slice input
func decodeMsgPack(buf []byte, out interface{}) error {
r := bytes.NewBuffer(buf)
hd := codec.MsgpackHandle{}
dec := codec.NewDecoder(r, &hd)
return dec.Decode(out)
}
// Encode writes an encoded object to a new bytes buffer
func encodeMsgPack(in interface{}) (*bytes.Buffer, error) {
buf := bytes.NewBuffer(nil)
hd := codec.MsgpackHandle{}
enc := codec.NewEncoder(buf, &hd)
err := enc.Encode(in)
return buf, err
}
// Converts bytes to an integer
func bytesToUint64(b []byte) uint64 {
return binary.BigEndian.Uint64(b)
}
// Converts a uint to a byte slice
func uint64ToBytes(u uint64) []byte {
buf := make([]byte, 8)
binary.BigEndian.PutUint64(buf, u)
return buf
}

View File

@ -1,23 +0,0 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test

View File

@ -1,14 +0,0 @@
language: go
go:
- 1.2
- tip
install: make deps
script:
- make integ
notifications:
flowdock:
secure: fZrcf9rlh2IrQrlch1sHkn3YI7SKvjGnAl/zyV5D6NROe1Bbr6d3QRMuCXWWdhJHzjKmXk5rIzbqJhUc0PNF7YjxGNKSzqWMQ56KcvN1k8DzlqxpqkcA3Jbs6fXCWo2fssRtZ7hj/wOP1f5n6cc7kzHDt9dgaYJ6nO2fqNPJiTc=

View File

@ -1,354 +0,0 @@
Mozilla Public License, version 2.0
1. Definitions
1.1. “Contributor”
means each individual or legal entity that creates, contributes to the
creation of, or owns Covered Software.
1.2. “Contributor Version”
means the combination of the Contributions of others (if any) used by a
Contributor and that particular Contributors Contribution.
1.3. “Contribution”
means Covered Software of a particular Contributor.
1.4. “Covered Software”
means Source Code Form to which the initial Contributor has attached the
notice in Exhibit A, the Executable Form of such Source Code Form, and
Modifications of such Source Code Form, in each case including portions
thereof.
1.5. “Incompatible With Secondary Licenses”
means
a. that the initial Contributor has attached the notice described in
Exhibit B to the Covered Software; or
b. that the Covered Software was made available under the terms of version
1.1 or earlier of the License, but not also under the terms of a
Secondary License.
1.6. “Executable Form”
means any form of the work other than Source Code Form.
1.7. “Larger Work”
means a work that combines Covered Software with other material, in a separate
file or files, that is not Covered Software.
1.8. “License”
means this document.
1.9. “Licensable”
means having the right to grant, to the maximum extent possible, whether at the
time of the initial grant or subsequently, any and all of the rights conveyed by
this License.
1.10. “Modifications”
means any of the following:
a. any file in Source Code Form that results from an addition to, deletion
from, or modification of the contents of Covered Software; or
b. any new file in Source Code Form that contains any Covered Software.
1.11. “Patent Claims” of a Contributor
means any patent claim(s), including without limitation, method, process,
and apparatus claims, in any patent Licensable by such Contributor that
would be infringed, but for the grant of the License, by the making,
using, selling, offering for sale, having made, import, or transfer of
either its Contributions or its Contributor Version.
1.12. “Secondary License”
means either the GNU General Public License, Version 2.0, the GNU Lesser
General Public License, Version 2.1, the GNU Affero General Public
License, Version 3.0, or any later versions of those licenses.
1.13. “Source Code Form”
means the form of the work preferred for making modifications.
1.14. “You” (or “Your”)
means an individual or a legal entity exercising rights under this
License. For legal entities, “You” includes any entity that controls, is
controlled by, or is under common control with You. For purposes of this
definition, “control” means (a) the power, direct or indirect, to cause
the direction or management of such entity, whether by contract or
otherwise, or (b) ownership of more than fifty percent (50%) of the
outstanding shares or beneficial ownership of such entity.
2. License Grants and Conditions
2.1. Grants
Each Contributor hereby grants You a world-wide, royalty-free,
non-exclusive license:
a. under intellectual property rights (other than patent or trademark)
Licensable by such Contributor to use, reproduce, make available,
modify, display, perform, distribute, and otherwise exploit its
Contributions, either on an unmodified basis, with Modifications, or as
part of a Larger Work; and
b. under Patent Claims of such Contributor to make, use, sell, offer for
sale, have made, import, and otherwise transfer either its Contributions
or its Contributor Version.
2.2. Effective Date
The licenses granted in Section 2.1 with respect to any Contribution become
effective for each Contribution on the date the Contributor first distributes
such Contribution.
2.3. Limitations on Grant Scope
The licenses granted in this Section 2 are the only rights granted under this
License. No additional rights or licenses will be implied from the distribution
or licensing of Covered Software under this License. Notwithstanding Section
2.1(b) above, no patent license is granted by a Contributor:
a. for any code that a Contributor has removed from Covered Software; or
b. for infringements caused by: (i) Your and any other third partys
modifications of Covered Software, or (ii) the combination of its
Contributions with other software (except as part of its Contributor
Version); or
c. under Patent Claims infringed by Covered Software in the absence of its
Contributions.
This License does not grant any rights in the trademarks, service marks, or
logos of any Contributor (except as may be necessary to comply with the
notice requirements in Section 3.4).
2.4. Subsequent Licenses
No Contributor makes additional grants as a result of Your choice to
distribute the Covered Software under a subsequent version of this License
(see Section 10.2) or under the terms of a Secondary License (if permitted
under the terms of Section 3.3).
2.5. Representation
Each Contributor represents that the Contributor believes its Contributions
are its original creation(s) or it has sufficient rights to grant the
rights to its Contributions conveyed by this License.
2.6. Fair Use
This License is not intended to limit any rights You have under applicable
copyright doctrines of fair use, fair dealing, or other equivalents.
2.7. Conditions
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in
Section 2.1.
3. Responsibilities
3.1. Distribution of Source Form
All distribution of Covered Software in Source Code Form, including any
Modifications that You create or to which You contribute, must be under the
terms of this License. You must inform recipients that the Source Code Form
of the Covered Software is governed by the terms of this License, and how
they can obtain a copy of this License. You may not attempt to alter or
restrict the recipients rights in the Source Code Form.
3.2. Distribution of Executable Form
If You distribute Covered Software in Executable Form then:
a. such Covered Software must also be made available in Source Code Form,
as described in Section 3.1, and You must inform recipients of the
Executable Form how they can obtain a copy of such Source Code Form by
reasonable means in a timely manner, at a charge no more than the cost
of distribution to the recipient; and
b. You may distribute such Executable Form under the terms of this License,
or sublicense it under different terms, provided that the license for
the Executable Form does not attempt to limit or alter the recipients
rights in the Source Code Form under this License.
3.3. Distribution of a Larger Work
You may create and distribute a Larger Work under terms of Your choice,
provided that You also comply with the requirements of this License for the
Covered Software. If the Larger Work is a combination of Covered Software
with a work governed by one or more Secondary Licenses, and the Covered
Software is not Incompatible With Secondary Licenses, this License permits
You to additionally distribute such Covered Software under the terms of
such Secondary License(s), so that the recipient of the Larger Work may, at
their option, further distribute the Covered Software under the terms of
either this License or such Secondary License(s).
3.4. Notices
You may not remove or alter the substance of any license notices (including
copyright notices, patent notices, disclaimers of warranty, or limitations
of liability) contained within the Source Code Form of the Covered
Software, except that You may alter any license notices to the extent
required to remedy known factual inaccuracies.
3.5. Application of Additional Terms
You may choose to offer, and to charge a fee for, warranty, support,
indemnity or liability obligations to one or more recipients of Covered
Software. However, You may do so only on Your own behalf, and not on behalf
of any Contributor. You must make it absolutely clear that any such
warranty, support, indemnity, or liability obligation is offered by You
alone, and You hereby agree to indemnify every Contributor for any
liability incurred by such Contributor as a result of warranty, support,
indemnity or liability terms You offer. You may include additional
disclaimers of warranty and limitations of liability specific to any
jurisdiction.
4. Inability to Comply Due to Statute or Regulation
If it is impossible for You to comply with any of the terms of this License
with respect to some or all of the Covered Software due to statute, judicial
order, or regulation then You must: (a) comply with the terms of this License
to the maximum extent possible; and (b) describe the limitations and the code
they affect. Such description must be placed in a text file included with all
distributions of the Covered Software under this License. Except to the
extent prohibited by statute or regulation, such description must be
sufficiently detailed for a recipient of ordinary skill to be able to
understand it.
5. Termination
5.1. The rights granted under this License will terminate automatically if You
fail to comply with any of its terms. However, if You become compliant,
then the rights granted under this License from a particular Contributor
are reinstated (a) provisionally, unless and until such Contributor
explicitly and finally terminates Your grants, and (b) on an ongoing basis,
if such Contributor fails to notify You of the non-compliance by some
reasonable means prior to 60 days after You have come back into compliance.
Moreover, Your grants from a particular Contributor are reinstated on an
ongoing basis if such Contributor notifies You of the non-compliance by
some reasonable means, this is the first time You have received notice of
non-compliance with this License from such Contributor, and You become
compliant prior to 30 days after Your receipt of the notice.
5.2. If You initiate litigation against any entity by asserting a patent
infringement claim (excluding declaratory judgment actions, counter-claims,
and cross-claims) alleging that a Contributor Version directly or
indirectly infringes any patent, then the rights granted to You by any and
all Contributors for the Covered Software under Section 2.1 of this License
shall terminate.
5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user
license agreements (excluding distributors and resellers) which have been
validly granted by You or Your distributors under this License prior to
termination shall survive termination.
6. Disclaimer of Warranty
Covered Software is provided under this License on an “as is” basis, without
warranty of any kind, either expressed, implied, or statutory, including,
without limitation, warranties that the Covered Software is free of defects,
merchantable, fit for a particular purpose or non-infringing. The entire
risk as to the quality and performance of the Covered Software is with You.
Should any Covered Software prove defective in any respect, You (not any
Contributor) assume the cost of any necessary servicing, repair, or
correction. This disclaimer of warranty constitutes an essential part of this
License. No use of any Covered Software is authorized under this License
except under this disclaimer.
7. Limitation of Liability
Under no circumstances and under no legal theory, whether tort (including
negligence), contract, or otherwise, shall any Contributor, or anyone who
distributes Covered Software as permitted above, be liable to You for any
direct, indirect, special, incidental, or consequential damages of any
character including, without limitation, damages for lost profits, loss of
goodwill, work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses, even if such party shall have been
informed of the possibility of such damages. This limitation of liability
shall not apply to liability for death or personal injury resulting from such
partys negligence to the extent applicable law prohibits such limitation.
Some jurisdictions do not allow the exclusion or limitation of incidental or
consequential damages, so this exclusion and limitation may not apply to You.
8. Litigation
Any litigation relating to this License may be brought only in the courts of
a jurisdiction where the defendant maintains its principal place of business
and such litigation shall be governed by laws of that jurisdiction, without
reference to its conflict-of-law provisions. Nothing in this Section shall
prevent a partys ability to bring cross-claims or counter-claims.
9. Miscellaneous
This License represents the complete agreement concerning the subject matter
hereof. If any provision of this License is held to be unenforceable, such
provision shall be reformed only to the extent necessary to make it
enforceable. Any law or regulation which provides that the language of a
contract shall be construed against the drafter shall not be used to construe
this License against a Contributor.
10. Versions of the License
10.1. New Versions
Mozilla Foundation is the license steward. Except as provided in Section
10.3, no one other than the license steward has the right to modify or
publish new versions of this License. Each version will be given a
distinguishing version number.
10.2. Effect of New Versions
You may distribute the Covered Software under the terms of the version of
the License under which You originally received the Covered Software, or
under the terms of any subsequent version published by the license
steward.
10.3. Modified Versions
If you create software not governed by this License, and you want to
create a new license for such software, you may create and use a modified
version of this License if you rename the license and remove any
references to the name of the license steward (except to note that such
modified license differs from this License).
10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses
If You choose to distribute Source Code Form that is Incompatible With
Secondary Licenses under the terms of this version of the License, the
notice described in Exhibit B of this License must be attached.
Exhibit A - Source Code Form License Notice
This Source Code Form is subject to the
terms of the Mozilla Public License, v.
2.0. If a copy of the MPL was not
distributed with this file, You can
obtain one at
http://mozilla.org/MPL/2.0/.
If it is not possible or desirable to put the notice in a particular file, then
You may include the notice in a location (such as a LICENSE file in a relevant
directory) where a recipient would be likely to look for such a notice.
You may add additional accurate notices of copyright ownership.
Exhibit B - “Incompatible With Secondary Licenses” Notice
This Source Code Form is “Incompatible
With Secondary Licenses”, as defined by
the Mozilla Public License, v. 2.0.

View File

@ -1,17 +0,0 @@
DEPS = $(go list -f '{{range .TestImports}}{{.}} {{end}}' ./...)
test:
go test -timeout=5s ./...
integ: test
INTEG_TESTS=yes go test -timeout=3s -run=Integ ./...
deps:
go get -d -v ./...
echo $(DEPS) | xargs -n1 go get -d
cov:
INTEG_TESTS=yes gocov test github.com/hashicorp/raft | gocov-html > /tmp/coverage.html
open /tmp/coverage.html
.PHONY: test cov integ deps

View File

@ -1,89 +0,0 @@
raft [![Build Status](https://travis-ci.org/hashicorp/raft.png)](https://travis-ci.org/hashicorp/raft)
====
raft is a [Go](http://www.golang.org) library that manages a replicated
log and can be used with an FSM to manage replicated state machines. It
is library for providing [consensus](http://en.wikipedia.org/wiki/Consensus_(computer_science)).
The use cases for such a library are far-reaching as replicated state
machines are a key component of many distributed systems. They enable
building Consistent, Partition Tolerant (CP) systems, with limited
fault tolerance as well.
## Building
If you wish to build raft you'll need Go version 1.2+ installed.
Please check your installation with:
```
go version
```
## Documentation
For complete documentation, see the associated [Godoc](http://godoc.org/github.com/hashicorp/raft).
To prevent complications with cgo, the primary backend `MDBStore` is in a separate repositoy,
called [raft-mdb](http://github.com/hashicorp/raft-mdb). That is the recommended implementation
for the `LogStore` and `StableStore`.
A pure Go backend using [BoltDB](https://github.com/boltdb/bolt) is also available called
[raft-boltdb](https://github.com/hashicorp/raft-boltdb). It can also be used as a `LogStore`
and `StableStore`.
## Protocol
raft is based on ["Raft: In Search of an Understandable Consensus Algorithm"](https://ramcloud.stanford.edu/wiki/download/attachments/11370504/raft.pdf)
A high level overview of the Raft protocol is described below, but for details please read the full
[Raft paper](https://ramcloud.stanford.edu/wiki/download/attachments/11370504/raft.pdf)
followed by the raft source. Any questions about the raft protocol should be sent to the
[raft-dev mailing list](https://groups.google.com/forum/#!forum/raft-dev).
### Protocol Description
Raft nodes are always in one of three states: follower, candidate or leader. All
nodes initially start out as a follower. In this state, nodes can accept log entries
from a leader and cast votes. If no entries are received for some time, nodes
self-promote to the candidate state. In the candidate state nodes request votes from
their peers. If a candidate receives a quorum of votes, then it is promoted to a leader.
The leader must accept new log entries and replicate to all the other followers.
In addition, if stale reads are not acceptable, all queries must also be performed on
the leader.
Once a cluster has a leader, it is able to accept new log entries. A client can
request that a leader append a new log entry, which is an opaque binary blob to
Raft. The leader then writes the entry to durable storage and attempts to replicate
to a quorum of followers. Once the log entry is considered *committed*, it can be
*applied* to a finite state machine. The finite state machine is application specific,
and is implemented using an interface.
An obvious question relates to the unbounded nature of a replicated log. Raft provides
a mechanism by which the current state is snapshotted, and the log is compacted. Because
of the FSM abstraction, restoring the state of the FSM must result in the same state
as a replay of old logs. This allows Raft to capture the FSM state at a point in time,
and then remove all the logs that were used to reach that state. This is performed automatically
without user intervention, and prevents unbounded disk usage as well as minimizing
time spent replaying logs.
Lastly, there is the issue of updating the peer set when new servers are joining
or existing servers are leaving. As long as a quorum of nodes is available, this
is not an issue as Raft provides mechanisms to dynamically update the peer set.
If a quorum of nodes is unavailable, then this becomes a very challenging issue.
For example, suppose there are only 2 peers, A and B. The quorum size is also
2, meaning both nodes must agree to commit a log entry. If either A or B fails,
it is now impossible to reach quorum. This means the cluster is unable to add,
or remove a node, or commit any additional log entries. This results in *unavailability*.
At this point, manual intervention would be required to remove either A or B,
and to restart the remaining node in bootstrap mode.
A Raft cluster of 3 nodes can tolerate a single node failure, while a cluster
of 5 can tolerate 2 node failures. The recommended configuration is to either
run 3 or 5 raft servers. This maximizes availability without
greatly sacrificing performance.
In terms of performance, Raft is comparable to Paxos. Assuming stable leadership,
committing a log entry requires a single round trip to half of the cluster.
Thus performance is bound by disk I/O and network latency.

View File

@ -1,171 +0,0 @@
package raftbench
// raftbench provides common benchmarking functions which can be used by
// anything which implements the raft.LogStore and raft.StableStore interfaces.
// All functions accept these interfaces and perform benchmarking. This
// makes comparing backend performance easier by sharing the tests.
import (
"github.com/hashicorp/raft"
"testing"
)
func FirstIndex(b *testing.B, store raft.LogStore) {
// Create some fake data
var logs []*raft.Log
for i := 1; i < 10; i++ {
logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
}
if err := store.StoreLogs(logs); err != nil {
b.Fatalf("err: %s", err)
}
b.ResetTimer()
// Run FirstIndex a number of times
for n := 0; n < b.N; n++ {
store.FirstIndex()
}
}
func LastIndex(b *testing.B, store raft.LogStore) {
// Create some fake data
var logs []*raft.Log
for i := 1; i < 10; i++ {
logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
}
if err := store.StoreLogs(logs); err != nil {
b.Fatalf("err: %s", err)
}
b.ResetTimer()
// Run LastIndex a number of times
for n := 0; n < b.N; n++ {
store.LastIndex()
}
}
func GetLog(b *testing.B, store raft.LogStore) {
// Create some fake data
var logs []*raft.Log
for i := 1; i < 10; i++ {
logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
}
if err := store.StoreLogs(logs); err != nil {
b.Fatalf("err: %s", err)
}
b.ResetTimer()
// Run GetLog a number of times
for n := 0; n < b.N; n++ {
if err := store.GetLog(5, new(raft.Log)); err != nil {
b.Fatalf("err: %s", err)
}
}
}
func StoreLog(b *testing.B, store raft.LogStore) {
// Run StoreLog a number of times
for n := 0; n < b.N; n++ {
log := &raft.Log{Index: uint64(n), Data: []byte("data")}
if err := store.StoreLog(log); err != nil {
b.Fatalf("err: %s", err)
}
}
}
func StoreLogs(b *testing.B, store raft.LogStore) {
// Run StoreLogs a number of times. We want to set multiple logs each
// run, so we create 3 logs with incrementing indexes for each iteration.
for n := 0; n < b.N; n++ {
b.StopTimer()
offset := 3 * (n + 1)
logs := []*raft.Log{
&raft.Log{Index: uint64(offset - 2), Data: []byte("data")},
&raft.Log{Index: uint64(offset - 1), Data: []byte("data")},
&raft.Log{Index: uint64(offset), Data: []byte("data")},
}
b.StartTimer()
if err := store.StoreLogs(logs); err != nil {
b.Fatalf("err: %s", err)
}
}
}
func DeleteRange(b *testing.B, store raft.LogStore) {
// Create some fake data. In this case, we create 3 new log entries for each
// test case, and separate them by index in multiples of 10. This allows
// some room so that we can test deleting ranges with "extra" logs to
// to ensure we stop going to the database once our max index is hit.
var logs []*raft.Log
for n := 0; n < b.N; n++ {
offset := 10 * n
for i := offset; i < offset+3; i++ {
logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
}
}
if err := store.StoreLogs(logs); err != nil {
b.Fatalf("err: %s", err)
}
b.ResetTimer()
// Delete a range of the data
for n := 0; n < b.N; n++ {
offset := 10 * n
if err := store.DeleteRange(uint64(offset), uint64(offset+9)); err != nil {
b.Fatalf("err: %s", err)
}
}
}
func Set(b *testing.B, store raft.StableStore) {
// Run Set a number of times
for n := 0; n < b.N; n++ {
if err := store.Set([]byte{byte(n)}, []byte("val")); err != nil {
b.Fatalf("err: %s", err)
}
}
}
func Get(b *testing.B, store raft.StableStore) {
// Create some fake data
for i := 1; i < 10; i++ {
if err := store.Set([]byte{byte(i)}, []byte("val")); err != nil {
b.Fatalf("err: %s", err)
}
}
b.ResetTimer()
// Run Get a number of times
for n := 0; n < b.N; n++ {
if _, err := store.Get([]byte{0x05}); err != nil {
b.Fatalf("err: %s", err)
}
}
}
func SetUint64(b *testing.B, store raft.StableStore) {
// Run SetUint64 a number of times
for n := 0; n < b.N; n++ {
if err := store.SetUint64([]byte{byte(n)}, uint64(n)); err != nil {
b.Fatalf("err: %s", err)
}
}
}
func GetUint64(b *testing.B, store raft.StableStore) {
// Create some fake data
for i := 0; i < 10; i++ {
if err := store.SetUint64([]byte{byte(i)}, uint64(i)); err != nil {
b.Fatalf("err: %s", err)
}
}
b.ResetTimer()
// Run GetUint64 a number of times
for n := 0; n < b.N; n++ {
if _, err := store.Get([]byte{0x05}); err != nil {
b.Fatalf("err: %s", err)
}
}
}

View File

@ -1,84 +0,0 @@
package raft
// AppendEntriesRequest is the command used to append entries to the
// replicated log.
type AppendEntriesRequest struct {
// Provide the current term and leader
Term uint64
Leader []byte
// Provide the previous entries for integrity checking
PrevLogEntry uint64
PrevLogTerm uint64
// New entries to commit
Entries []*Log
// Commit index on the leader
LeaderCommitIndex uint64
}
// AppendEntriesResponse is the response returned from an
// AppendEntriesRequest.
type AppendEntriesResponse struct {
// Newer term if leader is out of date
Term uint64
// Last Log is a hint to help accelerate rebuilding slow nodes
LastLog uint64
// We may not succeed if we have a conflicting entry
Success bool
// There are scenarios where this request didn't succeed
// but there's no need to wait/back-off the next attempt.
NoRetryBackoff bool
}
// RequestVoteRequest is the command used by a candidate to ask a Raft peer
// for a vote in an election.
type RequestVoteRequest struct {
// Provide the term and our id
Term uint64
Candidate []byte
// Used to ensure safety
LastLogIndex uint64
LastLogTerm uint64
}
// RequestVoteResponse is the response returned from a RequestVoteRequest.
type RequestVoteResponse struct {
// Newer term if leader is out of date
Term uint64
// Return the peers, so that a node can shutdown on removal
Peers []byte
// Is the vote granted
Granted bool
}
// InstallSnapshotRequest is the command sent to a Raft peer to bootstrap its
// log (and state machine) from a snapshot on another peer.
type InstallSnapshotRequest struct {
Term uint64
Leader []byte
// These are the last index/term included in the snapshot
LastLogIndex uint64
LastLogTerm uint64
// Peer Set in the snapshot
Peers []byte
// Size of the snapshot
Size int64
}
// InstallSnapshotResponse is the response returned from an
// InstallSnapshotRequest.
type InstallSnapshotResponse struct {
Term uint64
Success bool
}

View File

@ -1,134 +0,0 @@
package raft
import (
"fmt"
"io"
"log"
"time"
)
// Config provides any necessary configuration to
// the Raft server
type Config struct {
// Time in follower state without a leader before we attempt an election.
HeartbeatTimeout time.Duration
// Time in candidate state without a leader before we attempt an election.
ElectionTimeout time.Duration
// Time without an Apply() operation before we heartbeat to ensure
// a timely commit. Due to random staggering, may be delayed as much as
// 2x this value.
CommitTimeout time.Duration
// MaxAppendEntries controls the maximum number of append entries
// to send at once. We want to strike a balance between efficiency
// and avoiding waste if the follower is going to reject because of
// an inconsistent log.
MaxAppendEntries int
// If we are a member of a cluster, and RemovePeer is invoked for the
// local node, then we forget all peers and transition into the follower state.
// If ShutdownOnRemove is is set, we additional shutdown Raft. Otherwise,
// we can become a leader of a cluster containing only this node.
ShutdownOnRemove bool
// DisableBootstrapAfterElect is used to turn off EnableSingleNode
// after the node is elected. This is used to prevent self-election
// if the node is removed from the Raft cluster via RemovePeer. Setting
// it to false will keep the bootstrap mode, allowing the node to self-elect
// and potentially bootstrap a separate cluster.
DisableBootstrapAfterElect bool
// TrailingLogs controls how many logs we leave after a snapshot. This is
// used so that we can quickly replay logs on a follower instead of being
// forced to send an entire snapshot.
TrailingLogs uint64
// SnapshotInterval controls how often we check if we should perform a snapshot.
// We randomly stagger between this value and 2x this value to avoid the entire
// cluster from performing a snapshot at once.
SnapshotInterval time.Duration
// SnapshotThreshold controls how many outstanding logs there must be before
// we perform a snapshot. This is to prevent excessive snapshots when we can
// just replay a small set of logs.
SnapshotThreshold uint64
// EnableSingleNode allows for a single node mode of operation. This
// is false by default, which prevents a lone node from electing itself.
// leader.
EnableSingleNode bool
// LeaderLeaseTimeout is used to control how long the "lease" lasts
// for being the leader without being able to contact a quorum
// of nodes. If we reach this interval without contact, we will
// step down as leader.
LeaderLeaseTimeout time.Duration
// StartAsLeader forces Raft to start in the leader state. This should
// never be used except for testing purposes, as it can cause a split-brain.
StartAsLeader bool
// NotifyCh is used to provide a channel that will be notified of leadership
// changes. Raft will block writing to this channel, so it should either be
// buffered or aggressively consumed.
NotifyCh chan<- bool
// LogOutput is used as a sink for logs, unless Logger is specified.
// Defaults to os.Stderr.
LogOutput io.Writer
// Logger is a user-provided logger. If nil, a logger writing to LogOutput
// is used.
Logger *log.Logger
}
// DefaultConfig returns a Config with usable defaults.
func DefaultConfig() *Config {
return &Config{
HeartbeatTimeout: 1000 * time.Millisecond,
ElectionTimeout: 1000 * time.Millisecond,
CommitTimeout: 50 * time.Millisecond,
MaxAppendEntries: 64,
ShutdownOnRemove: true,
DisableBootstrapAfterElect: true,
TrailingLogs: 10240,
SnapshotInterval: 120 * time.Second,
SnapshotThreshold: 8192,
EnableSingleNode: false,
LeaderLeaseTimeout: 500 * time.Millisecond,
}
}
// ValidateConfig is used to validate a sane configuration
func ValidateConfig(config *Config) error {
if config.HeartbeatTimeout < 5*time.Millisecond {
return fmt.Errorf("Heartbeat timeout is too low")
}
if config.ElectionTimeout < 5*time.Millisecond {
return fmt.Errorf("Election timeout is too low")
}
if config.CommitTimeout < time.Millisecond {
return fmt.Errorf("Commit timeout is too low")
}
if config.MaxAppendEntries <= 0 {
return fmt.Errorf("MaxAppendEntries must be positive")
}
if config.MaxAppendEntries > 1024 {
return fmt.Errorf("MaxAppendEntries is too large")
}
if config.SnapshotInterval < 5*time.Millisecond {
return fmt.Errorf("Snapshot interval is too low")
}
if config.LeaderLeaseTimeout < 5*time.Millisecond {
return fmt.Errorf("Leader lease timeout is too low")
}
if config.LeaderLeaseTimeout > config.HeartbeatTimeout {
return fmt.Errorf("Leader lease timeout cannot be larger than heartbeat timeout")
}
if config.ElectionTimeout < config.HeartbeatTimeout {
return fmt.Errorf("Election timeout must be equal or greater than Heartbeat Timeout")
}
return nil
}

View File

@ -1,48 +0,0 @@
package raft
import (
"fmt"
"io"
)
// DiscardSnapshotStore is used to successfully snapshot while
// always discarding the snapshot. This is useful for when the
// log should be truncated but no snapshot should be retained.
// This should never be used for production use, and is only
// suitable for testing.
type DiscardSnapshotStore struct{}
type DiscardSnapshotSink struct{}
// NewDiscardSnapshotStore is used to create a new DiscardSnapshotStore.
func NewDiscardSnapshotStore() *DiscardSnapshotStore {
return &DiscardSnapshotStore{}
}
func (d *DiscardSnapshotStore) Create(index, term uint64, peers []byte) (SnapshotSink, error) {
return &DiscardSnapshotSink{}, nil
}
func (d *DiscardSnapshotStore) List() ([]*SnapshotMeta, error) {
return nil, nil
}
func (d *DiscardSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {
return nil, nil, fmt.Errorf("open is not supported")
}
func (d *DiscardSnapshotSink) Write(b []byte) (int, error) {
return len(b), nil
}
func (d *DiscardSnapshotSink) Close() error {
return nil
}
func (d *DiscardSnapshotSink) ID() string {
return "discard"
}
func (d *DiscardSnapshotSink) Cancel() error {
return nil
}

View File

@ -1,470 +0,0 @@
package raft
import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"hash"
"hash/crc64"
"io"
"io/ioutil"
"log"
"os"
"path/filepath"
"sort"
"strings"
"time"
)
const (
testPath = "permTest"
snapPath = "snapshots"
metaFilePath = "meta.json"
stateFilePath = "state.bin"
tmpSuffix = ".tmp"
)
// FileSnapshotStore implements the SnapshotStore interface and allows
// snapshots to be made on the local disk.
type FileSnapshotStore struct {
path string
retain int
logger *log.Logger
}
type snapMetaSlice []*fileSnapshotMeta
// FileSnapshotSink implements SnapshotSink with a file.
type FileSnapshotSink struct {
store *FileSnapshotStore
logger *log.Logger
dir string
meta fileSnapshotMeta
stateFile *os.File
stateHash hash.Hash64
buffered *bufio.Writer
closed bool
}
// fileSnapshotMeta is stored on disk. We also put a CRC
// on disk so that we can verify the snapshot.
type fileSnapshotMeta struct {
SnapshotMeta
CRC []byte
}
// bufferedFile is returned when we open a snapshot. This way
// reads are buffered and the file still gets closed.
type bufferedFile struct {
bh *bufio.Reader
fh *os.File
}
func (b *bufferedFile) Read(p []byte) (n int, err error) {
return b.bh.Read(p)
}
func (b *bufferedFile) Close() error {
return b.fh.Close()
}
// NewFileSnapshotStoreWithLogger creates a new FileSnapshotStore based
// on a base directory. The `retain` parameter controls how many
// snapshots are retained. Must be at least 1.
func NewFileSnapshotStoreWithLogger(base string, retain int, logger *log.Logger) (*FileSnapshotStore, error) {
if retain < 1 {
return nil, fmt.Errorf("must retain at least one snapshot")
}
if logger == nil {
logger = log.New(os.Stderr, "", log.LstdFlags)
}
// Ensure our path exists
path := filepath.Join(base, snapPath)
if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
return nil, fmt.Errorf("snapshot path not accessible: %v", err)
}
// Setup the store
store := &FileSnapshotStore{
path: path,
retain: retain,
logger: logger,
}
// Do a permissions test
if err := store.testPermissions(); err != nil {
return nil, fmt.Errorf("permissions test failed: %v", err)
}
return store, nil
}
// NewFileSnapshotStore creates a new FileSnapshotStore based
// on a base directory. The `retain` parameter controls how many
// snapshots are retained. Must be at least 1.
func NewFileSnapshotStore(base string, retain int, logOutput io.Writer) (*FileSnapshotStore, error) {
if logOutput == nil {
logOutput = os.Stderr
}
return NewFileSnapshotStoreWithLogger(base, retain, log.New(logOutput, "", log.LstdFlags))
}
// testPermissions tries to touch a file in our path to see if it works.
func (f *FileSnapshotStore) testPermissions() error {
path := filepath.Join(f.path, testPath)
fh, err := os.Create(path)
if err != nil {
return err
}
fh.Close()
os.Remove(path)
return nil
}
// snapshotName generates a name for the snapshot.
func snapshotName(term, index uint64) string {
now := time.Now()
msec := now.UnixNano() / int64(time.Millisecond)
return fmt.Sprintf("%d-%d-%d", term, index, msec)
}
// Create is used to start a new snapshot
func (f *FileSnapshotStore) Create(index, term uint64, peers []byte) (SnapshotSink, error) {
// Create a new path
name := snapshotName(term, index)
path := filepath.Join(f.path, name+tmpSuffix)
f.logger.Printf("[INFO] snapshot: Creating new snapshot at %s", path)
// Make the directory
if err := os.MkdirAll(path, 0755); err != nil {
f.logger.Printf("[ERR] snapshot: Failed to make snapshot directory: %v", err)
return nil, err
}
// Create the sink
sink := &FileSnapshotSink{
store: f,
logger: f.logger,
dir: path,
meta: fileSnapshotMeta{
SnapshotMeta: SnapshotMeta{
ID: name,
Index: index,
Term: term,
Peers: peers,
},
CRC: nil,
},
}
// Write out the meta data
if err := sink.writeMeta(); err != nil {
f.logger.Printf("[ERR] snapshot: Failed to write metadata: %v", err)
return nil, err
}
// Open the state file
statePath := filepath.Join(path, stateFilePath)
fh, err := os.Create(statePath)
if err != nil {
f.logger.Printf("[ERR] snapshot: Failed to create state file: %v", err)
return nil, err
}
sink.stateFile = fh
// Create a CRC64 hash
sink.stateHash = crc64.New(crc64.MakeTable(crc64.ECMA))
// Wrap both the hash and file in a MultiWriter with buffering
multi := io.MultiWriter(sink.stateFile, sink.stateHash)
sink.buffered = bufio.NewWriter(multi)
// Done
return sink, nil
}
// List returns available snapshots in the store.
func (f *FileSnapshotStore) List() ([]*SnapshotMeta, error) {
// Get the eligible snapshots
snapshots, err := f.getSnapshots()
if err != nil {
f.logger.Printf("[ERR] snapshot: Failed to get snapshots: %v", err)
return nil, err
}
var snapMeta []*SnapshotMeta
for _, meta := range snapshots {
snapMeta = append(snapMeta, &meta.SnapshotMeta)
if len(snapMeta) == f.retain {
break
}
}
return snapMeta, nil
}
// getSnapshots returns all the known snapshots.
func (f *FileSnapshotStore) getSnapshots() ([]*fileSnapshotMeta, error) {
// Get the eligible snapshots
snapshots, err := ioutil.ReadDir(f.path)
if err != nil {
f.logger.Printf("[ERR] snapshot: Failed to scan snapshot dir: %v", err)
return nil, err
}
// Populate the metadata
var snapMeta []*fileSnapshotMeta
for _, snap := range snapshots {
// Ignore any files
if !snap.IsDir() {
continue
}
// Ignore any temporary snapshots
dirName := snap.Name()
if strings.HasSuffix(dirName, tmpSuffix) {
f.logger.Printf("[WARN] snapshot: Found temporary snapshot: %v", dirName)
continue
}
// Try to read the meta data
meta, err := f.readMeta(dirName)
if err != nil {
f.logger.Printf("[WARN] snapshot: Failed to read metadata for %v: %v", dirName, err)
continue
}
// Append, but only return up to the retain count
snapMeta = append(snapMeta, meta)
}
// Sort the snapshot, reverse so we get new -> old
sort.Sort(sort.Reverse(snapMetaSlice(snapMeta)))
return snapMeta, nil
}
// readMeta is used to read the meta data for a given named backup
func (f *FileSnapshotStore) readMeta(name string) (*fileSnapshotMeta, error) {
// Open the meta file
metaPath := filepath.Join(f.path, name, metaFilePath)
fh, err := os.Open(metaPath)
if err != nil {
return nil, err
}
defer fh.Close()
// Buffer the file IO
buffered := bufio.NewReader(fh)
// Read in the JSON
meta := &fileSnapshotMeta{}
dec := json.NewDecoder(buffered)
if err := dec.Decode(meta); err != nil {
return nil, err
}
return meta, nil
}
// Open takes a snapshot ID and returns a ReadCloser for that snapshot.
func (f *FileSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {
// Get the metadata
meta, err := f.readMeta(id)
if err != nil {
f.logger.Printf("[ERR] snapshot: Failed to get meta data to open snapshot: %v", err)
return nil, nil, err
}
// Open the state file
statePath := filepath.Join(f.path, id, stateFilePath)
fh, err := os.Open(statePath)
if err != nil {
f.logger.Printf("[ERR] snapshot: Failed to open state file: %v", err)
return nil, nil, err
}
// Create a CRC64 hash
stateHash := crc64.New(crc64.MakeTable(crc64.ECMA))
// Compute the hash
_, err = io.Copy(stateHash, fh)
if err != nil {
f.logger.Printf("[ERR] snapshot: Failed to read state file: %v", err)
fh.Close()
return nil, nil, err
}
// Verify the hash
computed := stateHash.Sum(nil)
if bytes.Compare(meta.CRC, computed) != 0 {
f.logger.Printf("[ERR] snapshot: CRC checksum failed (stored: %v computed: %v)",
meta.CRC, computed)
fh.Close()
return nil, nil, fmt.Errorf("CRC mismatch")
}
// Seek to the start
if _, err := fh.Seek(0, 0); err != nil {
f.logger.Printf("[ERR] snapshot: State file seek failed: %v", err)
fh.Close()
return nil, nil, err
}
// Return a buffered file
buffered := &bufferedFile{
bh: bufio.NewReader(fh),
fh: fh,
}
return &meta.SnapshotMeta, buffered, nil
}
// ReapSnapshots reaps any snapshots beyond the retain count.
func (f *FileSnapshotStore) ReapSnapshots() error {
snapshots, err := f.getSnapshots()
if err != nil {
f.logger.Printf("[ERR] snapshot: Failed to get snapshots: %v", err)
return err
}
for i := f.retain; i < len(snapshots); i++ {
path := filepath.Join(f.path, snapshots[i].ID)
f.logger.Printf("[INFO] snapshot: reaping snapshot %v", path)
if err := os.RemoveAll(path); err != nil {
f.logger.Printf("[ERR] snapshot: Failed to reap snapshot %v: %v", path, err)
return err
}
}
return nil
}
// ID returns the ID of the snapshot, can be used with Open()
// after the snapshot is finalized.
func (s *FileSnapshotSink) ID() string {
return s.meta.ID
}
// Write is used to append to the state file. We write to the
// buffered IO object to reduce the amount of context switches.
func (s *FileSnapshotSink) Write(b []byte) (int, error) {
return s.buffered.Write(b)
}
// Close is used to indicate a successful end.
func (s *FileSnapshotSink) Close() error {
// Make sure close is idempotent
if s.closed {
return nil
}
s.closed = true
// Close the open handles
if err := s.finalize(); err != nil {
s.logger.Printf("[ERR] snapshot: Failed to finalize snapshot: %v", err)
return err
}
// Write out the meta data
if err := s.writeMeta(); err != nil {
s.logger.Printf("[ERR] snapshot: Failed to write metadata: %v", err)
return err
}
// Move the directory into place
newPath := strings.TrimSuffix(s.dir, tmpSuffix)
if err := os.Rename(s.dir, newPath); err != nil {
s.logger.Printf("[ERR] snapshot: Failed to move snapshot into place: %v", err)
return err
}
// Reap any old snapshots
s.store.ReapSnapshots()
return nil
}
// Cancel is used to indicate an unsuccessful end.
func (s *FileSnapshotSink) Cancel() error {
// Make sure close is idempotent
if s.closed {
return nil
}
s.closed = true
// Close the open handles
if err := s.finalize(); err != nil {
s.logger.Printf("[ERR] snapshot: Failed to finalize snapshot: %v", err)
return err
}
// Attempt to remove all artifacts
return os.RemoveAll(s.dir)
}
// finalize is used to close all of our resources.
func (s *FileSnapshotSink) finalize() error {
// Flush any remaining data
if err := s.buffered.Flush(); err != nil {
return err
}
// Get the file size
stat, statErr := s.stateFile.Stat()
// Close the file
if err := s.stateFile.Close(); err != nil {
return err
}
// Set the file size, check after we close
if statErr != nil {
return statErr
}
s.meta.Size = stat.Size()
// Set the CRC
s.meta.CRC = s.stateHash.Sum(nil)
return nil
}
// writeMeta is used to write out the metadata we have.
func (s *FileSnapshotSink) writeMeta() error {
// Open the meta file
metaPath := filepath.Join(s.dir, metaFilePath)
fh, err := os.Create(metaPath)
if err != nil {
return err
}
defer fh.Close()
// Buffer the file IO
buffered := bufio.NewWriter(fh)
defer buffered.Flush()
// Write out as JSON
enc := json.NewEncoder(buffered)
if err := enc.Encode(&s.meta); err != nil {
return err
}
return nil
}
// Implement the sort interface for []*fileSnapshotMeta.
func (s snapMetaSlice) Len() int {
return len(s)
}
func (s snapMetaSlice) Less(i, j int) bool {
if s[i].Term != s[j].Term {
return s[i].Term < s[j].Term
}
if s[i].Index != s[j].Index {
return s[i].Index < s[j].Index
}
return s[i].ID < s[j].ID
}
func (s snapMetaSlice) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}

View File

@ -1,37 +0,0 @@
package raft
import (
"io"
)
// FSM provides an interface that can be implemented by
// clients to make use of the replicated log.
type FSM interface {
// Apply log is invoked once a log entry is committed.
Apply(*Log) interface{}
// Snapshot is used to support log compaction. This call should
// return an FSMSnapshot which can be used to save a point-in-time
// snapshot of the FSM. Apply and Snapshot are not called in multiple
// threads, but Apply will be called concurrently with Persist. This means
// the FSM should be implemented in a fashion that allows for concurrent
// updates while a snapshot is happening.
Snapshot() (FSMSnapshot, error)
// Restore is used to restore an FSM from a snapshot. It is not called
// concurrently with any other command. The FSM must discard all previous
// state.
Restore(io.ReadCloser) error
}
// FSMSnapshot is returned by an FSM in response to a Snapshot
// It must be safe to invoke FSMSnapshot methods with concurrent
// calls to Apply.
type FSMSnapshot interface {
// Persist should dump all necessary state to the WriteCloser 'sink',
// and call sink.Close() when finished or call sink.Cancel() on error.
Persist(sink SnapshotSink) error
// Release is invoked when we are finished with the snapshot.
Release()
}

View File

@ -1,182 +0,0 @@
package raft
import (
"sync"
"time"
)
// Future is used to represent an action that may occur in the future.
type Future interface {
Error() error
}
// ApplyFuture is used for Apply() and can returns the FSM response.
type ApplyFuture interface {
Future
Response() interface{}
Index() uint64
}
// errorFuture is used to return a static error.
type errorFuture struct {
err error
}
func (e errorFuture) Error() error {
return e.err
}
func (e errorFuture) Response() interface{} {
return nil
}
func (e errorFuture) Index() uint64 {
return 0
}
// deferError can be embedded to allow a future
// to provide an error in the future.
type deferError struct {
err error
errCh chan error
responded bool
}
func (d *deferError) init() {
d.errCh = make(chan error, 1)
}
func (d *deferError) Error() error {
if d.err != nil {
return d.err
}
if d.errCh == nil {
panic("waiting for response on nil channel")
}
d.err = <-d.errCh
return d.err
}
func (d *deferError) respond(err error) {
if d.errCh == nil {
return
}
if d.responded {
return
}
d.errCh <- err
close(d.errCh)
d.responded = true
}
// logFuture is used to apply a log entry and waits until
// the log is considered committed.
type logFuture struct {
deferError
log Log
policy quorumPolicy
response interface{}
dispatch time.Time
}
func (l *logFuture) Response() interface{} {
return l.response
}
func (l *logFuture) Index() uint64 {
return l.log.Index
}
type peerFuture struct {
deferError
peers []string
}
type shutdownFuture struct {
raft *Raft
}
func (s *shutdownFuture) Error() error {
for s.raft.getRoutines() > 0 {
time.Sleep(5 * time.Millisecond)
}
return nil
}
// snapshotFuture is used for waiting on a snapshot to complete.
type snapshotFuture struct {
deferError
}
// reqSnapshotFuture is used for requesting a snapshot start.
// It is only used internally.
type reqSnapshotFuture struct {
deferError
// snapshot details provided by the FSM runner before responding
index uint64
term uint64
peers []string
snapshot FSMSnapshot
}
// restoreFuture is used for requesting an FSM to perform a
// snapshot restore. Used internally only.
type restoreFuture struct {
deferError
ID string
}
// verifyFuture is used to verify the current node is still
// the leader. This is to prevent a stale read.
type verifyFuture struct {
deferError
notifyCh chan *verifyFuture
quorumSize int
votes int
voteLock sync.Mutex
}
// vote is used to respond to a verifyFuture.
// This may block when responding on the notifyCh.
func (v *verifyFuture) vote(leader bool) {
v.voteLock.Lock()
defer v.voteLock.Unlock()
// Guard against having notified already
if v.notifyCh == nil {
return
}
if leader {
v.votes++
if v.votes >= v.quorumSize {
v.notifyCh <- v
v.notifyCh = nil
}
} else {
v.notifyCh <- v
v.notifyCh = nil
}
}
// appendFuture is used for waiting on a pipelined append
// entries RPC.
type appendFuture struct {
deferError
start time.Time
args *AppendEntriesRequest
resp *AppendEntriesResponse
}
func (a *appendFuture) Start() time.Time {
return a.start
}
func (a *appendFuture) Request() *AppendEntriesRequest {
return a.args
}
func (a *appendFuture) Response() *AppendEntriesResponse {
return a.resp
}

View File

@ -1,213 +0,0 @@
package raft
import (
"container/list"
"sync"
)
// QuorumPolicy allows individual logFutures to have different
// commitment rules while still using the inflight mechanism.
type quorumPolicy interface {
// Checks if a commit from a given peer is enough to
// satisfy the commitment rules
Commit() bool
// Checks if a commit is committed
IsCommitted() bool
}
// MajorityQuorum is used by Apply transactions and requires
// a simple majority of nodes.
type majorityQuorum struct {
count int
votesNeeded int
}
func newMajorityQuorum(clusterSize int) *majorityQuorum {
votesNeeded := (clusterSize / 2) + 1
return &majorityQuorum{count: 0, votesNeeded: votesNeeded}
}
func (m *majorityQuorum) Commit() bool {
m.count++
return m.count >= m.votesNeeded
}
func (m *majorityQuorum) IsCommitted() bool {
return m.count >= m.votesNeeded
}
// Inflight is used to track operations that are still in-flight.
type inflight struct {
sync.Mutex
committed *list.List
commitCh chan struct{}
minCommit uint64
maxCommit uint64
operations map[uint64]*logFuture
stopCh chan struct{}
}
// NewInflight returns an inflight struct that notifies
// the provided channel when logs are finished committing.
func newInflight(commitCh chan struct{}) *inflight {
return &inflight{
committed: list.New(),
commitCh: commitCh,
minCommit: 0,
maxCommit: 0,
operations: make(map[uint64]*logFuture),
stopCh: make(chan struct{}),
}
}
// Start is used to mark a logFuture as being inflight. It
// also commits the entry, as it is assumed the leader is
// starting.
func (i *inflight) Start(l *logFuture) {
i.Lock()
defer i.Unlock()
i.start(l)
}
// StartAll is used to mark a list of logFuture's as being
// inflight. It also commits each entry as the leader is
// assumed to be starting.
func (i *inflight) StartAll(logs []*logFuture) {
i.Lock()
defer i.Unlock()
for _, l := range logs {
i.start(l)
}
}
// start is used to mark a single entry as inflight,
// must be invoked with the lock held.
func (i *inflight) start(l *logFuture) {
idx := l.log.Index
i.operations[idx] = l
if idx > i.maxCommit {
i.maxCommit = idx
}
if i.minCommit == 0 {
i.minCommit = idx
}
i.commit(idx)
}
// Cancel is used to cancel all in-flight operations.
// This is done when the leader steps down, and all futures
// are sent the given error.
func (i *inflight) Cancel(err error) {
// Close the channel first to unblock any pending commits
close(i.stopCh)
// Lock after close to avoid deadlock
i.Lock()
defer i.Unlock()
// Respond to all inflight operations
for _, op := range i.operations {
op.respond(err)
}
// Clear all the committed but not processed
for e := i.committed.Front(); e != nil; e = e.Next() {
e.Value.(*logFuture).respond(err)
}
// Clear the map
i.operations = make(map[uint64]*logFuture)
// Clear the list of committed
i.committed = list.New()
// Close the commmitCh
close(i.commitCh)
// Reset indexes
i.minCommit = 0
i.maxCommit = 0
}
// Committed returns all the committed operations in order.
func (i *inflight) Committed() (l *list.List) {
i.Lock()
l, i.committed = i.committed, list.New()
i.Unlock()
return l
}
// Commit is used by leader replication routines to indicate that
// a follower was finished committing a log to disk.
func (i *inflight) Commit(index uint64) {
i.Lock()
defer i.Unlock()
i.commit(index)
}
// CommitRange is used to commit a range of indexes inclusively.
// It is optimized to avoid commits for indexes that are not tracked.
func (i *inflight) CommitRange(minIndex, maxIndex uint64) {
i.Lock()
defer i.Unlock()
// Update the minimum index
minIndex = max(i.minCommit, minIndex)
// Commit each index
for idx := minIndex; idx <= maxIndex; idx++ {
i.commit(idx)
}
}
// commit is used to commit a single index. Must be called with the lock held.
func (i *inflight) commit(index uint64) {
op, ok := i.operations[index]
if !ok {
// Ignore if not in the map, as it may be committed already
return
}
// Check if we've satisfied the commit
if !op.policy.Commit() {
return
}
// Cannot commit if this is not the minimum inflight. This can happen
// if the quorum size changes, meaning a previous commit requires a larger
// quorum that this commit. We MUST block until the previous log is committed,
// otherwise logs will be applied out of order.
if index != i.minCommit {
return
}
NOTIFY:
// Add the operation to the committed list
i.committed.PushBack(op)
// Stop tracking since it is committed
delete(i.operations, index)
// Update the indexes
if index == i.maxCommit {
i.minCommit = 0
i.maxCommit = 0
} else {
i.minCommit++
}
// Check if the next in-flight operation is ready
if i.minCommit != 0 {
op = i.operations[i.minCommit]
if op.policy.IsCommitted() {
index = i.minCommit
goto NOTIFY
}
}
// Async notify of ready operations
asyncNotifyCh(i.commitCh)
}

View File

@ -1,116 +0,0 @@
package raft
import (
"sync"
)
// InmemStore implements the LogStore and StableStore interface.
// It should NOT EVER be used for production. It is used only for
// unit tests. Use the MDBStore implementation instead.
type InmemStore struct {
l sync.RWMutex
lowIndex uint64
highIndex uint64
logs map[uint64]*Log
kv map[string][]byte
kvInt map[string]uint64
}
// NewInmemStore returns a new in-memory backend. Do not ever
// use for production. Only for testing.
func NewInmemStore() *InmemStore {
i := &InmemStore{
logs: make(map[uint64]*Log),
kv: make(map[string][]byte),
kvInt: make(map[string]uint64),
}
return i
}
// FirstIndex implements the LogStore interface.
func (i *InmemStore) FirstIndex() (uint64, error) {
i.l.RLock()
defer i.l.RUnlock()
return i.lowIndex, nil
}
// LastIndex implements the LogStore interface.
func (i *InmemStore) LastIndex() (uint64, error) {
i.l.RLock()
defer i.l.RUnlock()
return i.highIndex, nil
}
// GetLog implements the LogStore interface.
func (i *InmemStore) GetLog(index uint64, log *Log) error {
i.l.RLock()
defer i.l.RUnlock()
l, ok := i.logs[index]
if !ok {
return ErrLogNotFound
}
*log = *l
return nil
}
// StoreLog implements the LogStore interface.
func (i *InmemStore) StoreLog(log *Log) error {
return i.StoreLogs([]*Log{log})
}
// StoreLogs implements the LogStore interface.
func (i *InmemStore) StoreLogs(logs []*Log) error {
i.l.Lock()
defer i.l.Unlock()
for _, l := range logs {
i.logs[l.Index] = l
if i.lowIndex == 0 {
i.lowIndex = l.Index
}
if l.Index > i.highIndex {
i.highIndex = l.Index
}
}
return nil
}
// DeleteRange implements the LogStore interface.
func (i *InmemStore) DeleteRange(min, max uint64) error {
i.l.Lock()
defer i.l.Unlock()
for j := min; j <= max; j++ {
delete(i.logs, j)
}
i.lowIndex = max + 1
return nil
}
// Set implements the StableStore interface.
func (i *InmemStore) Set(key []byte, val []byte) error {
i.l.Lock()
defer i.l.Unlock()
i.kv[string(key)] = val
return nil
}
// Get implements the StableStore interface.
func (i *InmemStore) Get(key []byte) ([]byte, error) {
i.l.RLock()
defer i.l.RUnlock()
return i.kv[string(key)], nil
}
// SetUint64 implements the StableStore interface.
func (i *InmemStore) SetUint64(key []byte, val uint64) error {
i.l.Lock()
defer i.l.Unlock()
i.kvInt[string(key)] = val
return nil
}
// GetUint64 implements the StableStore interface.
func (i *InmemStore) GetUint64(key []byte) (uint64, error) {
i.l.RLock()
defer i.l.RUnlock()
return i.kvInt[string(key)], nil
}

View File

@ -1,315 +0,0 @@
package raft
import (
"fmt"
"io"
"sync"
"time"
)
// NewInmemAddr returns a new in-memory addr with
// a randomly generate UUID as the ID.
func NewInmemAddr() string {
return generateUUID()
}
// inmemPipeline is used to pipeline requests for the in-mem transport.
type inmemPipeline struct {
trans *InmemTransport
peer *InmemTransport
peerAddr string
doneCh chan AppendFuture
inprogressCh chan *inmemPipelineInflight
shutdown bool
shutdownCh chan struct{}
shutdownLock sync.Mutex
}
type inmemPipelineInflight struct {
future *appendFuture
respCh <-chan RPCResponse
}
// InmemTransport Implements the Transport interface, to allow Raft to be
// tested in-memory without going over a network.
type InmemTransport struct {
sync.RWMutex
consumerCh chan RPC
localAddr string
peers map[string]*InmemTransport
pipelines []*inmemPipeline
timeout time.Duration
}
// NewInmemTransport is used to initialize a new transport
// and generates a random local address.
func NewInmemTransport() (string, *InmemTransport) {
addr := NewInmemAddr()
trans := &InmemTransport{
consumerCh: make(chan RPC, 16),
localAddr: addr,
peers: make(map[string]*InmemTransport),
timeout: 50 * time.Millisecond,
}
return addr, trans
}
// SetHeartbeatHandler is used to set optional fast-path for
// heartbeats, not supported for this transport.
func (i *InmemTransport) SetHeartbeatHandler(cb func(RPC)) {
}
// Consumer implements the Transport interface.
func (i *InmemTransport) Consumer() <-chan RPC {
return i.consumerCh
}
// LocalAddr implements the Transport interface.
func (i *InmemTransport) LocalAddr() string {
return i.localAddr
}
// AppendEntriesPipeline returns an interface that can be used to pipeline
// AppendEntries requests.
func (i *InmemTransport) AppendEntriesPipeline(target string) (AppendPipeline, error) {
i.RLock()
peer, ok := i.peers[target]
i.RUnlock()
if !ok {
return nil, fmt.Errorf("failed to connect to peer: %v", target)
}
pipeline := newInmemPipeline(i, peer, target)
i.Lock()
i.pipelines = append(i.pipelines, pipeline)
i.Unlock()
return pipeline, nil
}
// AppendEntries implements the Transport interface.
func (i *InmemTransport) AppendEntries(target string, args *AppendEntriesRequest, resp *AppendEntriesResponse) error {
rpcResp, err := i.makeRPC(target, args, nil, i.timeout)
if err != nil {
return err
}
// Copy the result back
out := rpcResp.Response.(*AppendEntriesResponse)
*resp = *out
return nil
}
// RequestVote implements the Transport interface.
func (i *InmemTransport) RequestVote(target string, args *RequestVoteRequest, resp *RequestVoteResponse) error {
rpcResp, err := i.makeRPC(target, args, nil, i.timeout)
if err != nil {
return err
}
// Copy the result back
out := rpcResp.Response.(*RequestVoteResponse)
*resp = *out
return nil
}
// InstallSnapshot implements the Transport interface.
func (i *InmemTransport) InstallSnapshot(target string, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error {
rpcResp, err := i.makeRPC(target, args, data, 10*i.timeout)
if err != nil {
return err
}
// Copy the result back
out := rpcResp.Response.(*InstallSnapshotResponse)
*resp = *out
return nil
}
func (i *InmemTransport) makeRPC(target string, args interface{}, r io.Reader, timeout time.Duration) (rpcResp RPCResponse, err error) {
i.RLock()
peer, ok := i.peers[target]
i.RUnlock()
if !ok {
err = fmt.Errorf("failed to connect to peer: %v", target)
return
}
// Send the RPC over
respCh := make(chan RPCResponse)
peer.consumerCh <- RPC{
Command: args,
Reader: r,
RespChan: respCh,
}
// Wait for a response
select {
case rpcResp = <-respCh:
if rpcResp.Error != nil {
err = rpcResp.Error
}
case <-time.After(timeout):
err = fmt.Errorf("command timed out")
}
return
}
// EncodePeer implements the Transport interface. It uses the UUID as the
// address directly.
func (i *InmemTransport) EncodePeer(p string) []byte {
return []byte(p)
}
// DecodePeer implements the Transport interface. It wraps the UUID in an
// InmemAddr.
func (i *InmemTransport) DecodePeer(buf []byte) string {
return string(buf)
}
// Connect is used to connect this transport to another transport for
// a given peer name. This allows for local routing.
func (i *InmemTransport) Connect(peer string, trans *InmemTransport) {
i.Lock()
defer i.Unlock()
i.peers[peer] = trans
}
// Disconnect is used to remove the ability to route to a given peer.
func (i *InmemTransport) Disconnect(peer string) {
i.Lock()
defer i.Unlock()
delete(i.peers, peer)
// Disconnect any pipelines
n := len(i.pipelines)
for idx := 0; idx < n; idx++ {
if i.pipelines[idx].peerAddr == peer {
i.pipelines[idx].Close()
i.pipelines[idx], i.pipelines[n-1] = i.pipelines[n-1], nil
idx--
n--
}
}
i.pipelines = i.pipelines[:n]
}
// DisconnectAll is used to remove all routes to peers.
func (i *InmemTransport) DisconnectAll() {
i.Lock()
defer i.Unlock()
i.peers = make(map[string]*InmemTransport)
// Handle pipelines
for _, pipeline := range i.pipelines {
pipeline.Close()
}
i.pipelines = nil
}
func newInmemPipeline(trans *InmemTransport, peer *InmemTransport, addr string) *inmemPipeline {
i := &inmemPipeline{
trans: trans,
peer: peer,
peerAddr: addr,
doneCh: make(chan AppendFuture, 16),
inprogressCh: make(chan *inmemPipelineInflight, 16),
shutdownCh: make(chan struct{}),
}
go i.decodeResponses()
return i
}
func (i *inmemPipeline) decodeResponses() {
timeout := i.trans.timeout
for {
select {
case inp := <-i.inprogressCh:
var timeoutCh <-chan time.Time
if timeout > 0 {
timeoutCh = time.After(timeout)
}
select {
case rpcResp := <-inp.respCh:
// Copy the result back
*inp.future.resp = *rpcResp.Response.(*AppendEntriesResponse)
inp.future.respond(rpcResp.Error)
select {
case i.doneCh <- inp.future:
case <-i.shutdownCh:
return
}
case <-timeoutCh:
inp.future.respond(fmt.Errorf("command timed out"))
select {
case i.doneCh <- inp.future:
case <-i.shutdownCh:
return
}
case <-i.shutdownCh:
return
}
case <-i.shutdownCh:
return
}
}
}
func (i *inmemPipeline) AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error) {
// Create a new future
future := &appendFuture{
start: time.Now(),
args: args,
resp: resp,
}
future.init()
// Handle a timeout
var timeout <-chan time.Time
if i.trans.timeout > 0 {
timeout = time.After(i.trans.timeout)
}
// Send the RPC over
respCh := make(chan RPCResponse, 1)
rpc := RPC{
Command: args,
RespChan: respCh,
}
select {
case i.peer.consumerCh <- rpc:
case <-timeout:
return nil, fmt.Errorf("command enqueue timeout")
case <-i.shutdownCh:
return nil, ErrPipelineShutdown
}
// Send to be decoded
select {
case i.inprogressCh <- &inmemPipelineInflight{future, respCh}:
return future, nil
case <-i.shutdownCh:
return nil, ErrPipelineShutdown
}
}
func (i *inmemPipeline) Consumer() <-chan AppendFuture {
return i.doneCh
}
func (i *inmemPipeline) Close() error {
i.shutdownLock.Lock()
defer i.shutdownLock.Unlock()
if i.shutdown {
return nil
}
i.shutdown = true
close(i.shutdownCh)
return nil
}

View File

@ -1,60 +0,0 @@
package raft
// LogType describes various types of log entries.
type LogType uint8
const (
// LogCommand is applied to a user FSM.
LogCommand LogType = iota
// LogNoop is used to assert leadership.
LogNoop
// LogAddPeer is used to add a new peer.
LogAddPeer
// LogRemovePeer is used to remove an existing peer.
LogRemovePeer
// LogBarrier is used to ensure all preceding operations have been
// applied to the FSM. It is similar to LogNoop, but instead of returning
// once committed, it only returns once the FSM manager acks it. Otherwise
// it is possible there are operations committed but not yet applied to
// the FSM.
LogBarrier
)
// Log entries are replicated to all members of the Raft cluster
// and form the heart of the replicated state machine.
type Log struct {
Index uint64
Term uint64
Type LogType
Data []byte
// peer is not exported since it is not transmitted, only used
// internally to construct the Data field.
peer string
}
// LogStore is used to provide an interface for storing
// and retrieving logs in a durable fashion.
type LogStore interface {
// Returns the first index written. 0 for no entries.
FirstIndex() (uint64, error)
// Returns the last index written. 0 for no entries.
LastIndex() (uint64, error)
// Gets a log entry at a given index.
GetLog(index uint64, log *Log) error
// Stores a log entry.
StoreLog(log *Log) error
// Stores multiple log entries.
StoreLogs(logs []*Log) error
// Deletes a range of log entries. The range is inclusive.
DeleteRange(min, max uint64) error
}

View File

@ -1,79 +0,0 @@
package raft
import (
"fmt"
"sync"
)
// LogCache wraps any LogStore implementation to provide an
// in-memory ring buffer. This is used to cache access to
// the recently written entries. For implementations that do not
// cache themselves, this can provide a substantial boost by
// avoiding disk I/O on recent entries.
type LogCache struct {
store LogStore
cache []*Log
l sync.RWMutex
}
// NewLogCache is used to create a new LogCache with the
// given capacity and backend store.
func NewLogCache(capacity int, store LogStore) (*LogCache, error) {
if capacity <= 0 {
return nil, fmt.Errorf("capacity must be positive")
}
c := &LogCache{
store: store,
cache: make([]*Log, capacity),
}
return c, nil
}
func (c *LogCache) GetLog(idx uint64, log *Log) error {
// Check the buffer for an entry
c.l.RLock()
cached := c.cache[idx%uint64(len(c.cache))]
c.l.RUnlock()
// Check if entry is valid
if cached != nil && cached.Index == idx {
*log = *cached
return nil
}
// Forward request on cache miss
return c.store.GetLog(idx, log)
}
func (c *LogCache) StoreLog(log *Log) error {
return c.StoreLogs([]*Log{log})
}
func (c *LogCache) StoreLogs(logs []*Log) error {
// Insert the logs into the ring buffer
c.l.Lock()
for _, l := range logs {
c.cache[l.Index%uint64(len(c.cache))] = l
}
c.l.Unlock()
return c.store.StoreLogs(logs)
}
func (c *LogCache) FirstIndex() (uint64, error) {
return c.store.FirstIndex()
}
func (c *LogCache) LastIndex() (uint64, error) {
return c.store.LastIndex()
}
func (c *LogCache) DeleteRange(min, max uint64) error {
// Invalidate the cache on deletes
c.l.Lock()
c.cache = make([]*Log, len(c.cache))
c.l.Unlock()
return c.store.DeleteRange(min, max)
}

View File

@ -1,622 +0,0 @@
package raft
import (
"bufio"
"errors"
"fmt"
"io"
"log"
"net"
"os"
"sync"
"time"
"github.com/hashicorp/go-msgpack/codec"
)
const (
rpcAppendEntries uint8 = iota
rpcRequestVote
rpcInstallSnapshot
// DefaultTimeoutScale is the default TimeoutScale in a NetworkTransport.
DefaultTimeoutScale = 256 * 1024 // 256KB
// rpcMaxPipeline controls the maximum number of outstanding
// AppendEntries RPC calls.
rpcMaxPipeline = 128
)
var (
// ErrTransportShutdown is returned when operations on a transport are
// invoked after it's been terminated.
ErrTransportShutdown = errors.New("transport shutdown")
// ErrPipelineShutdown is returned when the pipeline is closed.
ErrPipelineShutdown = errors.New("append pipeline closed")
)
/*
NetworkTransport provides a network based transport that can be
used to communicate with Raft on remote machines. It requires
an underlying stream layer to provide a stream abstraction, which can
be simple TCP, TLS, etc.
This transport is very simple and lightweight. Each RPC request is
framed by sending a byte that indicates the message type, followed
by the MsgPack encoded request.
The response is an error string followed by the response object,
both are encoded using MsgPack.
InstallSnapshot is special, in that after the RPC request we stream
the entire state. That socket is not re-used as the connection state
is not known if there is an error.
*/
type NetworkTransport struct {
connPool map[string][]*netConn
connPoolLock sync.Mutex
consumeCh chan RPC
heartbeatFn func(RPC)
heartbeatFnLock sync.Mutex
logger *log.Logger
maxPool int
shutdown bool
shutdownCh chan struct{}
shutdownLock sync.Mutex
stream StreamLayer
timeout time.Duration
TimeoutScale int
}
// StreamLayer is used with the NetworkTransport to provide
// the low level stream abstraction.
type StreamLayer interface {
net.Listener
// Dial is used to create a new outgoing connection
Dial(address string, timeout time.Duration) (net.Conn, error)
}
type netConn struct {
target string
conn net.Conn
r *bufio.Reader
w *bufio.Writer
dec *codec.Decoder
enc *codec.Encoder
}
func (n *netConn) Release() error {
return n.conn.Close()
}
type netPipeline struct {
conn *netConn
trans *NetworkTransport
doneCh chan AppendFuture
inprogressCh chan *appendFuture
shutdown bool
shutdownCh chan struct{}
shutdownLock sync.Mutex
}
// NewNetworkTransport creates a new network transport with the given dialer
// and listener. The maxPool controls how many connections we will pool. The
// timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply
// the timeout by (SnapshotSize / TimeoutScale).
func NewNetworkTransport(
stream StreamLayer,
maxPool int,
timeout time.Duration,
logOutput io.Writer,
) *NetworkTransport {
if logOutput == nil {
logOutput = os.Stderr
}
return NewNetworkTransportWithLogger(stream, maxPool, timeout, log.New(logOutput, "", log.LstdFlags))
}
// NewNetworkTransportWithLogger creates a new network transport with the given dialer
// and listener. The maxPool controls how many connections we will pool. The
// timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply
// the timeout by (SnapshotSize / TimeoutScale).
func NewNetworkTransportWithLogger(
stream StreamLayer,
maxPool int,
timeout time.Duration,
logger *log.Logger,
) *NetworkTransport {
if logger == nil {
logger = log.New(os.Stderr, "", log.LstdFlags)
}
trans := &NetworkTransport{
connPool: make(map[string][]*netConn),
consumeCh: make(chan RPC),
logger: logger,
maxPool: maxPool,
shutdownCh: make(chan struct{}),
stream: stream,
timeout: timeout,
TimeoutScale: DefaultTimeoutScale,
}
go trans.listen()
return trans
}
// SetHeartbeatHandler is used to setup a heartbeat handler
// as a fast-pass. This is to avoid head-of-line blocking from
// disk IO.
func (n *NetworkTransport) SetHeartbeatHandler(cb func(rpc RPC)) {
n.heartbeatFnLock.Lock()
defer n.heartbeatFnLock.Unlock()
n.heartbeatFn = cb
}
// Close is used to stop the network transport.
func (n *NetworkTransport) Close() error {
n.shutdownLock.Lock()
defer n.shutdownLock.Unlock()
if !n.shutdown {
close(n.shutdownCh)
n.stream.Close()
n.shutdown = true
}
return nil
}
// Consumer implements the Transport interface.
func (n *NetworkTransport) Consumer() <-chan RPC {
return n.consumeCh
}
// LocalAddr implements the Transport interface.
func (n *NetworkTransport) LocalAddr() string {
return n.stream.Addr().String()
}
// IsShutdown is used to check if the transport is shutdown.
func (n *NetworkTransport) IsShutdown() bool {
select {
case <-n.shutdownCh:
return true
default:
return false
}
}
// getExistingConn is used to grab a pooled connection.
func (n *NetworkTransport) getPooledConn(target string) *netConn {
n.connPoolLock.Lock()
defer n.connPoolLock.Unlock()
conns, ok := n.connPool[target]
if !ok || len(conns) == 0 {
return nil
}
var conn *netConn
num := len(conns)
conn, conns[num-1] = conns[num-1], nil
n.connPool[target] = conns[:num-1]
return conn
}
// getConn is used to get a connection from the pool.
func (n *NetworkTransport) getConn(target string) (*netConn, error) {
// Check for a pooled conn
if conn := n.getPooledConn(target); conn != nil {
return conn, nil
}
// Dial a new connection
conn, err := n.stream.Dial(target, n.timeout)
if err != nil {
return nil, err
}
// Wrap the conn
netConn := &netConn{
target: target,
conn: conn,
r: bufio.NewReader(conn),
w: bufio.NewWriter(conn),
}
// Setup encoder/decoders
netConn.dec = codec.NewDecoder(netConn.r, &codec.MsgpackHandle{})
netConn.enc = codec.NewEncoder(netConn.w, &codec.MsgpackHandle{})
// Done
return netConn, nil
}
// returnConn returns a connection back to the pool.
func (n *NetworkTransport) returnConn(conn *netConn) {
n.connPoolLock.Lock()
defer n.connPoolLock.Unlock()
key := conn.target
conns, _ := n.connPool[key]
if !n.IsShutdown() && len(conns) < n.maxPool {
n.connPool[key] = append(conns, conn)
} else {
conn.Release()
}
}
// AppendEntriesPipeline returns an interface that can be used to pipeline
// AppendEntries requests.
func (n *NetworkTransport) AppendEntriesPipeline(target string) (AppendPipeline, error) {
// Get a connection
conn, err := n.getConn(target)
if err != nil {
return nil, err
}
// Create the pipeline
return newNetPipeline(n, conn), nil
}
// AppendEntries implements the Transport interface.
func (n *NetworkTransport) AppendEntries(target string, args *AppendEntriesRequest, resp *AppendEntriesResponse) error {
return n.genericRPC(target, rpcAppendEntries, args, resp)
}
// RequestVote implements the Transport interface.
func (n *NetworkTransport) RequestVote(target string, args *RequestVoteRequest, resp *RequestVoteResponse) error {
return n.genericRPC(target, rpcRequestVote, args, resp)
}
// genericRPC handles a simple request/response RPC.
func (n *NetworkTransport) genericRPC(target string, rpcType uint8, args interface{}, resp interface{}) error {
// Get a conn
conn, err := n.getConn(target)
if err != nil {
return err
}
// Set a deadline
if n.timeout > 0 {
conn.conn.SetDeadline(time.Now().Add(n.timeout))
}
// Send the RPC
if err := sendRPC(conn, rpcType, args); err != nil {
return err
}
// Decode the response
canReturn, err := decodeResponse(conn, resp)
if canReturn {
n.returnConn(conn)
}
return err
}
// InstallSnapshot implements the Transport interface.
func (n *NetworkTransport) InstallSnapshot(target string, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error {
// Get a conn, always close for InstallSnapshot
conn, err := n.getConn(target)
if err != nil {
return err
}
defer conn.Release()
// Set a deadline, scaled by request size
if n.timeout > 0 {
timeout := n.timeout * time.Duration(args.Size/int64(n.TimeoutScale))
if timeout < n.timeout {
timeout = n.timeout
}
conn.conn.SetDeadline(time.Now().Add(timeout))
}
// Send the RPC
if err := sendRPC(conn, rpcInstallSnapshot, args); err != nil {
return err
}
// Stream the state
if _, err := io.Copy(conn.w, data); err != nil {
return err
}
// Flush
if err := conn.w.Flush(); err != nil {
return err
}
// Decode the response, do not return conn
_, err = decodeResponse(conn, resp)
return err
}
// EncodePeer implements the Transport interface.
func (n *NetworkTransport) EncodePeer(p string) []byte {
return []byte(p)
}
// DecodePeer implements the Transport interface.
func (n *NetworkTransport) DecodePeer(buf []byte) string {
return string(buf)
}
// listen is used to handling incoming connections.
func (n *NetworkTransport) listen() {
for {
// Accept incoming connections
conn, err := n.stream.Accept()
if err != nil {
if n.IsShutdown() {
return
}
n.logger.Printf("[ERR] raft-net: Failed to accept connection: %v", err)
continue
}
n.logger.Printf("[DEBUG] raft-net: %v accepted connection from: %v", n.LocalAddr(), conn.RemoteAddr())
// Handle the connection in dedicated routine
go n.handleConn(conn)
}
}
// handleConn is used to handle an inbound connection for its lifespan.
func (n *NetworkTransport) handleConn(conn net.Conn) {
defer conn.Close()
r := bufio.NewReader(conn)
w := bufio.NewWriter(conn)
dec := codec.NewDecoder(r, &codec.MsgpackHandle{})
enc := codec.NewEncoder(w, &codec.MsgpackHandle{})
for {
if err := n.handleCommand(r, dec, enc); err != nil {
if err != io.EOF {
n.logger.Printf("[ERR] raft-net: Failed to decode incoming command: %v", err)
}
return
}
if err := w.Flush(); err != nil {
n.logger.Printf("[ERR] raft-net: Failed to flush response: %v", err)
return
}
}
}
// handleCommand is used to decode and dispatch a single command.
func (n *NetworkTransport) handleCommand(r *bufio.Reader, dec *codec.Decoder, enc *codec.Encoder) error {
// Get the rpc type
rpcType, err := r.ReadByte()
if err != nil {
return err
}
// Create the RPC object
respCh := make(chan RPCResponse, 1)
rpc := RPC{
RespChan: respCh,
}
// Decode the command
isHeartbeat := false
switch rpcType {
case rpcAppendEntries:
var req AppendEntriesRequest
if err := dec.Decode(&req); err != nil {
return err
}
rpc.Command = &req
// Check if this is a heartbeat
if req.Term != 0 && req.Leader != nil &&
req.PrevLogEntry == 0 && req.PrevLogTerm == 0 &&
len(req.Entries) == 0 && req.LeaderCommitIndex == 0 {
isHeartbeat = true
}
case rpcRequestVote:
var req RequestVoteRequest
if err := dec.Decode(&req); err != nil {
return err
}
rpc.Command = &req
case rpcInstallSnapshot:
var req InstallSnapshotRequest
if err := dec.Decode(&req); err != nil {
return err
}
rpc.Command = &req
rpc.Reader = io.LimitReader(r, req.Size)
default:
return fmt.Errorf("unknown rpc type %d", rpcType)
}
// Check for heartbeat fast-path
if isHeartbeat {
n.heartbeatFnLock.Lock()
fn := n.heartbeatFn
n.heartbeatFnLock.Unlock()
if fn != nil {
fn(rpc)
goto RESP
}
}
// Dispatch the RPC
select {
case n.consumeCh <- rpc:
case <-n.shutdownCh:
return ErrTransportShutdown
}
// Wait for response
RESP:
select {
case resp := <-respCh:
// Send the error first
respErr := ""
if resp.Error != nil {
respErr = resp.Error.Error()
}
if err := enc.Encode(respErr); err != nil {
return err
}
// Send the response
if err := enc.Encode(resp.Response); err != nil {
return err
}
case <-n.shutdownCh:
return ErrTransportShutdown
}
return nil
}
// decodeResponse is used to decode an RPC response and reports whether
// the connection can be reused.
func decodeResponse(conn *netConn, resp interface{}) (bool, error) {
// Decode the error if any
var rpcError string
if err := conn.dec.Decode(&rpcError); err != nil {
conn.Release()
return false, err
}
// Decode the response
if err := conn.dec.Decode(resp); err != nil {
conn.Release()
return false, err
}
// Format an error if any
if rpcError != "" {
return true, fmt.Errorf(rpcError)
}
return true, nil
}
// sendRPC is used to encode and send the RPC.
func sendRPC(conn *netConn, rpcType uint8, args interface{}) error {
// Write the request type
if err := conn.w.WriteByte(rpcType); err != nil {
conn.Release()
return err
}
// Send the request
if err := conn.enc.Encode(args); err != nil {
conn.Release()
return err
}
// Flush
if err := conn.w.Flush(); err != nil {
conn.Release()
return err
}
return nil
}
// newNetPipeline is used to construct a netPipeline from a given
// transport and connection.
func newNetPipeline(trans *NetworkTransport, conn *netConn) *netPipeline {
n := &netPipeline{
conn: conn,
trans: trans,
doneCh: make(chan AppendFuture, rpcMaxPipeline),
inprogressCh: make(chan *appendFuture, rpcMaxPipeline),
shutdownCh: make(chan struct{}),
}
go n.decodeResponses()
return n
}
// decodeResponses is a long running routine that decodes the responses
// sent on the connection.
func (n *netPipeline) decodeResponses() {
timeout := n.trans.timeout
for {
select {
case future := <-n.inprogressCh:
if timeout > 0 {
n.conn.conn.SetReadDeadline(time.Now().Add(timeout))
}
_, err := decodeResponse(n.conn, future.resp)
future.respond(err)
select {
case n.doneCh <- future:
case <-n.shutdownCh:
return
}
case <-n.shutdownCh:
return
}
}
}
// AppendEntries is used to pipeline a new append entries request.
func (n *netPipeline) AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error) {
// Create a new future
future := &appendFuture{
start: time.Now(),
args: args,
resp: resp,
}
future.init()
// Add a send timeout
if timeout := n.trans.timeout; timeout > 0 {
n.conn.conn.SetWriteDeadline(time.Now().Add(timeout))
}
// Send the RPC
if err := sendRPC(n.conn, rpcAppendEntries, future.args); err != nil {
return nil, err
}
// Hand-off for decoding, this can also cause back-pressure
// to prevent too many inflight requests
select {
case n.inprogressCh <- future:
return future, nil
case <-n.shutdownCh:
return nil, ErrPipelineShutdown
}
}
// Consumer returns a channel that can be used to consume complete futures.
func (n *netPipeline) Consumer() <-chan AppendFuture {
return n.doneCh
}
// Closed is used to shutdown the pipeline connection.
func (n *netPipeline) Close() error {
n.shutdownLock.Lock()
defer n.shutdownLock.Unlock()
if n.shutdown {
return nil
}
// Release the connection
n.conn.Release()
n.shutdown = true
close(n.shutdownCh)
return nil
}

View File

@ -1,122 +0,0 @@
package raft
import (
"bytes"
"encoding/json"
"io/ioutil"
"os"
"path/filepath"
"sync"
)
const (
jsonPeerPath = "peers.json"
)
// PeerStore provides an interface for persistent storage and
// retrieval of peers. We use a separate interface than StableStore
// since the peers may need to be edited by a human operator. For example,
// in a two node cluster, the failure of either node requires human intervention
// since consensus is impossible.
type PeerStore interface {
// Peers returns the list of known peers.
Peers() ([]string, error)
// SetPeers sets the list of known peers. This is invoked when a peer is
// added or removed.
SetPeers([]string) error
}
// StaticPeers is used to provide a static list of peers.
type StaticPeers struct {
StaticPeers []string
l sync.Mutex
}
// Peers implements the PeerStore interface.
func (s *StaticPeers) Peers() ([]string, error) {
s.l.Lock()
peers := s.StaticPeers
s.l.Unlock()
return peers, nil
}
// SetPeers implements the PeerStore interface.
func (s *StaticPeers) SetPeers(p []string) error {
s.l.Lock()
s.StaticPeers = p
s.l.Unlock()
return nil
}
// JSONPeers is used to provide peer persistence on disk in the form
// of a JSON file. This allows human operators to manipulate the file.
type JSONPeers struct {
l sync.Mutex
path string
trans Transport
}
// NewJSONPeers creates a new JSONPeers store. Requires a transport
// to handle the serialization of network addresses.
func NewJSONPeers(base string, trans Transport) *JSONPeers {
path := filepath.Join(base, jsonPeerPath)
store := &JSONPeers{
path: path,
trans: trans,
}
return store
}
// Peers implements the PeerStore interface.
func (j *JSONPeers) Peers() ([]string, error) {
j.l.Lock()
defer j.l.Unlock()
// Read the file
buf, err := ioutil.ReadFile(j.path)
if err != nil && !os.IsNotExist(err) {
return nil, err
}
// Check for no peers
if len(buf) == 0 {
return nil, nil
}
// Decode the peers
var peerSet []string
dec := json.NewDecoder(bytes.NewReader(buf))
if err := dec.Decode(&peerSet); err != nil {
return nil, err
}
// Deserialize each peer
var peers []string
for _, p := range peerSet {
peers = append(peers, j.trans.DecodePeer([]byte(p)))
}
return peers, nil
}
// SetPeers implements the PeerStore interface.
func (j *JSONPeers) SetPeers(peers []string) error {
j.l.Lock()
defer j.l.Unlock()
// Encode each peer
var peerSet []string
for _, p := range peers {
peerSet = append(peerSet, string(j.trans.EncodePeer(p)))
}
// Convert to JSON
var buf bytes.Buffer
enc := json.NewEncoder(&buf)
if err := enc.Encode(peerSet); err != nil {
return err
}
// Write out as JSON
return ioutil.WriteFile(j.path, buf.Bytes(), 0755)
}

File diff suppressed because it is too large Load Diff

View File

@ -1,517 +0,0 @@
package raft
import (
"errors"
"fmt"
"sync"
"time"
"github.com/armon/go-metrics"
)
const (
maxFailureScale = 12
failureWait = 10 * time.Millisecond
)
var (
// ErrLogNotFound indicates a given log entry is not available.
ErrLogNotFound = errors.New("log not found")
// ErrPipelineReplicationNotSupported can be returned by the transport to
// signal that pipeline replication is not supported in general, and that
// no error message should be produced.
ErrPipelineReplicationNotSupported = errors.New("pipeline replication not supported")
)
type followerReplication struct {
peer string
inflight *inflight
stopCh chan uint64
triggerCh chan struct{}
currentTerm uint64
matchIndex uint64
nextIndex uint64
lastContact time.Time
lastContactLock sync.RWMutex
failures uint64
notifyCh chan struct{}
notify []*verifyFuture
notifyLock sync.Mutex
// stepDown is used to indicate to the leader that we
// should step down based on information from a follower.
stepDown chan struct{}
// allowPipeline is used to control it seems like
// pipeline replication should be enabled.
allowPipeline bool
}
// notifyAll is used to notify all the waiting verify futures
// if the follower believes we are still the leader.
func (s *followerReplication) notifyAll(leader bool) {
// Clear the waiting notifies minimizing lock time
s.notifyLock.Lock()
n := s.notify
s.notify = nil
s.notifyLock.Unlock()
// Submit our votes
for _, v := range n {
v.vote(leader)
}
}
// LastContact returns the time of last contact.
func (s *followerReplication) LastContact() time.Time {
s.lastContactLock.RLock()
last := s.lastContact
s.lastContactLock.RUnlock()
return last
}
// setLastContact sets the last contact to the current time.
func (s *followerReplication) setLastContact() {
s.lastContactLock.Lock()
s.lastContact = time.Now()
s.lastContactLock.Unlock()
}
// replicate is a long running routine that is used to manage
// the process of replicating logs to our followers.
func (r *Raft) replicate(s *followerReplication) {
// Start an async heartbeating routing
stopHeartbeat := make(chan struct{})
defer close(stopHeartbeat)
r.goFunc(func() { r.heartbeat(s, stopHeartbeat) })
RPC:
shouldStop := false
for !shouldStop {
select {
case maxIndex := <-s.stopCh:
// Make a best effort to replicate up to this index
if maxIndex > 0 {
r.replicateTo(s, maxIndex)
}
return
case <-s.triggerCh:
shouldStop = r.replicateTo(s, r.getLastLogIndex())
case <-randomTimeout(r.conf.CommitTimeout):
shouldStop = r.replicateTo(s, r.getLastLogIndex())
}
// If things looks healthy, switch to pipeline mode
if !shouldStop && s.allowPipeline {
goto PIPELINE
}
}
return
PIPELINE:
// Disable until re-enabled
s.allowPipeline = false
// Replicates using a pipeline for high performance. This method
// is not able to gracefully recover from errors, and so we fall back
// to standard mode on failure.
if err := r.pipelineReplicate(s); err != nil {
if err != ErrPipelineReplicationNotSupported {
r.logger.Printf("[ERR] raft: Failed to start pipeline replication to %s: %s", s.peer, err)
}
}
goto RPC
}
// replicateTo is used to replicate the logs up to a given last index.
// If the follower log is behind, we take care to bring them up to date.
func (r *Raft) replicateTo(s *followerReplication, lastIndex uint64) (shouldStop bool) {
// Create the base request
var req AppendEntriesRequest
var resp AppendEntriesResponse
var start time.Time
START:
// Prevent an excessive retry rate on errors
if s.failures > 0 {
select {
case <-time.After(backoff(failureWait, s.failures, maxFailureScale)):
case <-r.shutdownCh:
}
}
// Setup the request
if err := r.setupAppendEntries(s, &req, s.nextIndex, lastIndex); err == ErrLogNotFound {
goto SEND_SNAP
} else if err != nil {
return
}
// Make the RPC call
start = time.Now()
if err := r.trans.AppendEntries(s.peer, &req, &resp); err != nil {
r.logger.Printf("[ERR] raft: Failed to AppendEntries to %v: %v", s.peer, err)
s.failures++
return
}
appendStats(s.peer, start, float32(len(req.Entries)))
// Check for a newer term, stop running
if resp.Term > req.Term {
r.handleStaleTerm(s)
return true
}
// Update the last contact
s.setLastContact()
// Update s based on success
if resp.Success {
// Update our replication state
updateLastAppended(s, &req)
// Clear any failures, allow pipelining
s.failures = 0
s.allowPipeline = true
} else {
s.nextIndex = max(min(s.nextIndex-1, resp.LastLog+1), 1)
s.matchIndex = s.nextIndex - 1
if resp.NoRetryBackoff {
s.failures = 0
} else {
s.failures++
}
r.logger.Printf("[WARN] raft: AppendEntries to %v rejected, sending older logs (next: %d)", s.peer, s.nextIndex)
}
CHECK_MORE:
// Check if there are more logs to replicate
if s.nextIndex <= lastIndex {
goto START
}
return
// SEND_SNAP is used when we fail to get a log, usually because the follower
// is too far behind, and we must ship a snapshot down instead
SEND_SNAP:
if stop, err := r.sendLatestSnapshot(s); stop {
return true
} else if err != nil {
r.logger.Printf("[ERR] raft: Failed to send snapshot to %v: %v", s.peer, err)
return
}
// Check if there is more to replicate
goto CHECK_MORE
}
// sendLatestSnapshot is used to send the latest snapshot we have
// down to our follower.
func (r *Raft) sendLatestSnapshot(s *followerReplication) (bool, error) {
// Get the snapshots
snapshots, err := r.snapshots.List()
if err != nil {
r.logger.Printf("[ERR] raft: Failed to list snapshots: %v", err)
return false, err
}
// Check we have at least a single snapshot
if len(snapshots) == 0 {
return false, fmt.Errorf("no snapshots found")
}
// Open the most recent snapshot
snapID := snapshots[0].ID
meta, snapshot, err := r.snapshots.Open(snapID)
if err != nil {
r.logger.Printf("[ERR] raft: Failed to open snapshot %v: %v", snapID, err)
return false, err
}
defer snapshot.Close()
// Setup the request
req := InstallSnapshotRequest{
Term: s.currentTerm,
Leader: r.trans.EncodePeer(r.localAddr),
LastLogIndex: meta.Index,
LastLogTerm: meta.Term,
Peers: meta.Peers,
Size: meta.Size,
}
// Make the call
start := time.Now()
var resp InstallSnapshotResponse
if err := r.trans.InstallSnapshot(s.peer, &req, &resp, snapshot); err != nil {
r.logger.Printf("[ERR] raft: Failed to install snapshot %v: %v", snapID, err)
s.failures++
return false, err
}
metrics.MeasureSince([]string{"raft", "replication", "installSnapshot", s.peer}, start)
// Check for a newer term, stop running
if resp.Term > req.Term {
r.handleStaleTerm(s)
return true, nil
}
// Update the last contact
s.setLastContact()
// Check for success
if resp.Success {
// Mark any inflight logs as committed
s.inflight.CommitRange(s.matchIndex+1, meta.Index)
// Update the indexes
s.matchIndex = meta.Index
s.nextIndex = s.matchIndex + 1
// Clear any failures
s.failures = 0
// Notify we are still leader
s.notifyAll(true)
} else {
s.failures++
r.logger.Printf("[WARN] raft: InstallSnapshot to %v rejected", s.peer)
}
return false, nil
}
// heartbeat is used to periodically invoke AppendEntries on a peer
// to ensure they don't time out. This is done async of replicate(),
// since that routine could potentially be blocked on disk IO.
func (r *Raft) heartbeat(s *followerReplication, stopCh chan struct{}) {
var failures uint64
req := AppendEntriesRequest{
Term: s.currentTerm,
Leader: r.trans.EncodePeer(r.localAddr),
}
var resp AppendEntriesResponse
for {
// Wait for the next heartbeat interval or forced notify
select {
case <-s.notifyCh:
case <-randomTimeout(r.conf.HeartbeatTimeout / 10):
case <-stopCh:
return
}
start := time.Now()
if err := r.trans.AppendEntries(s.peer, &req, &resp); err != nil {
r.logger.Printf("[ERR] raft: Failed to heartbeat to %v: %v", s.peer, err)
failures++
select {
case <-time.After(backoff(failureWait, failures, maxFailureScale)):
case <-stopCh:
}
} else {
s.setLastContact()
failures = 0
metrics.MeasureSince([]string{"raft", "replication", "heartbeat", s.peer}, start)
s.notifyAll(resp.Success)
}
}
}
// pipelineReplicate is used when we have synchronized our state with the follower,
// and want to switch to a higher performance pipeline mode of replication.
// We only pipeline AppendEntries commands, and if we ever hit an error, we fall
// back to the standard replication which can handle more complex situations.
func (r *Raft) pipelineReplicate(s *followerReplication) error {
// Create a new pipeline
pipeline, err := r.trans.AppendEntriesPipeline(s.peer)
if err != nil {
return err
}
defer pipeline.Close()
// Log start and stop of pipeline
r.logger.Printf("[INFO] raft: pipelining replication to peer %v", s.peer)
defer r.logger.Printf("[INFO] raft: aborting pipeline replication to peer %v", s.peer)
// Create a shutdown and finish channel
stopCh := make(chan struct{})
finishCh := make(chan struct{})
// Start a dedicated decoder
r.goFunc(func() { r.pipelineDecode(s, pipeline, stopCh, finishCh) })
// Start pipeline sends at the last good nextIndex
nextIndex := s.nextIndex
shouldStop := false
SEND:
for !shouldStop {
select {
case <-finishCh:
break SEND
case maxIndex := <-s.stopCh:
if maxIndex > 0 {
r.pipelineSend(s, pipeline, &nextIndex, maxIndex)
}
break SEND
case <-s.triggerCh:
shouldStop = r.pipelineSend(s, pipeline, &nextIndex, r.getLastLogIndex())
case <-randomTimeout(r.conf.CommitTimeout):
shouldStop = r.pipelineSend(s, pipeline, &nextIndex, r.getLastLogIndex())
}
}
// Stop our decoder, and wait for it to finish
close(stopCh)
select {
case <-finishCh:
case <-r.shutdownCh:
}
return nil
}
// pipelineSend is used to send data over a pipeline.
func (r *Raft) pipelineSend(s *followerReplication, p AppendPipeline, nextIdx *uint64, lastIndex uint64) (shouldStop bool) {
// Create a new append request
req := new(AppendEntriesRequest)
if err := r.setupAppendEntries(s, req, *nextIdx, lastIndex); err != nil {
return true
}
// Pipeline the append entries
if _, err := p.AppendEntries(req, new(AppendEntriesResponse)); err != nil {
r.logger.Printf("[ERR] raft: Failed to pipeline AppendEntries to %v: %v", s.peer, err)
return true
}
// Increase the next send log to avoid re-sending old logs
if n := len(req.Entries); n > 0 {
last := req.Entries[n-1]
*nextIdx = last.Index + 1
}
return false
}
// pipelineDecode is used to decode the responses of pipelined requests.
func (r *Raft) pipelineDecode(s *followerReplication, p AppendPipeline, stopCh, finishCh chan struct{}) {
defer close(finishCh)
respCh := p.Consumer()
for {
select {
case ready := <-respCh:
req, resp := ready.Request(), ready.Response()
appendStats(s.peer, ready.Start(), float32(len(req.Entries)))
// Check for a newer term, stop running
if resp.Term > req.Term {
r.handleStaleTerm(s)
return
}
// Update the last contact
s.setLastContact()
// Abort pipeline if not successful
if !resp.Success {
return
}
// Update our replication state
updateLastAppended(s, req)
case <-stopCh:
return
}
}
}
// setupAppendEntries is used to setup an append entries request.
func (r *Raft) setupAppendEntries(s *followerReplication, req *AppendEntriesRequest, nextIndex, lastIndex uint64) error {
req.Term = s.currentTerm
req.Leader = r.trans.EncodePeer(r.localAddr)
req.LeaderCommitIndex = r.getCommitIndex()
if err := r.setPreviousLog(req, nextIndex); err != nil {
return err
}
if err := r.setNewLogs(req, nextIndex, lastIndex); err != nil {
return err
}
return nil
}
// setPreviousLog is used to setup the PrevLogEntry and PrevLogTerm for an
// AppendEntriesRequest given the next index to replicate.
func (r *Raft) setPreviousLog(req *AppendEntriesRequest, nextIndex uint64) error {
// Guard for the first index, since there is no 0 log entry
// Guard against the previous index being a snapshot as well
if nextIndex == 1 {
req.PrevLogEntry = 0
req.PrevLogTerm = 0
} else if (nextIndex - 1) == r.getLastSnapshotIndex() {
req.PrevLogEntry = r.getLastSnapshotIndex()
req.PrevLogTerm = r.getLastSnapshotTerm()
} else {
var l Log
if err := r.logs.GetLog(nextIndex-1, &l); err != nil {
r.logger.Printf("[ERR] raft: Failed to get log at index %d: %v",
nextIndex-1, err)
return err
}
// Set the previous index and term (0 if nextIndex is 1)
req.PrevLogEntry = l.Index
req.PrevLogTerm = l.Term
}
return nil
}
// setNewLogs is used to setup the logs which should be appended for a request.
func (r *Raft) setNewLogs(req *AppendEntriesRequest, nextIndex, lastIndex uint64) error {
// Append up to MaxAppendEntries or up to the lastIndex
req.Entries = make([]*Log, 0, r.conf.MaxAppendEntries)
maxIndex := min(nextIndex+uint64(r.conf.MaxAppendEntries)-1, lastIndex)
for i := nextIndex; i <= maxIndex; i++ {
oldLog := new(Log)
if err := r.logs.GetLog(i, oldLog); err != nil {
r.logger.Printf("[ERR] raft: Failed to get log at index %d: %v", i, err)
return err
}
req.Entries = append(req.Entries, oldLog)
}
return nil
}
// appendStats is used to emit stats about an AppendEntries invocation.
func appendStats(peer string, start time.Time, logs float32) {
metrics.MeasureSince([]string{"raft", "replication", "appendEntries", "rpc", peer}, start)
metrics.IncrCounter([]string{"raft", "replication", "appendEntries", "logs", peer}, logs)
}
// handleStaleTerm is used when a follower indicates that we have a stale term.
func (r *Raft) handleStaleTerm(s *followerReplication) {
r.logger.Printf("[ERR] raft: peer %v has newer term, stopping replication", s.peer)
s.notifyAll(false) // No longer leader
asyncNotifyCh(s.stepDown)
}
// updateLastAppended is used to update follower replication state after a successful
// AppendEntries RPC.
func updateLastAppended(s *followerReplication, req *AppendEntriesRequest) {
// Mark any inflight logs as committed
if logs := req.Entries; len(logs) > 0 {
first := logs[0]
last := logs[len(logs)-1]
s.inflight.CommitRange(first.Index, last.Index)
// Update the indexes
s.matchIndex = last.Index
s.nextIndex = last.Index + 1
}
// Notify still leader
s.notifyAll(true)
}

View File

@ -1,40 +0,0 @@
package raft
import (
"io"
)
// SnapshotMeta is for metadata of a snapshot.
type SnapshotMeta struct {
ID string // ID is opaque to the store, and is used for opening
Index uint64
Term uint64
Peers []byte
Size int64
}
// SnapshotStore interface is used to allow for flexible implementations
// of snapshot storage and retrieval. For example, a client could implement
// a shared state store such as S3, allowing new nodes to restore snapshots
// without steaming from the leader.
type SnapshotStore interface {
// Create is used to begin a snapshot at a given index and term,
// with the current peer set already encoded.
Create(index, term uint64, peers []byte) (SnapshotSink, error)
// List is used to list the available snapshots in the store.
// It should return then in descending order, with the highest index first.
List() ([]*SnapshotMeta, error)
// Open takes a snapshot ID and provides a ReadCloser. Once close is
// called it is assumed the snapshot is no longer needed.
Open(id string) (*SnapshotMeta, io.ReadCloser, error)
}
// SnapshotSink is returned by StartSnapshot. The FSM will Write state
// to the sink and call Close on completion. On error, Cancel will be invoked.
type SnapshotSink interface {
io.WriteCloser
ID() string
Cancel() error
}

View File

@ -1,15 +0,0 @@
package raft
// StableStore is used to provide stable storage
// of key configurations to ensure safety.
type StableStore interface {
Set(key []byte, val []byte) error
// Get returns the value for key, or an empty byte slice if key was not found.
Get(key []byte) ([]byte, error)
SetUint64(key []byte, val uint64) error
// GetUint64 returns the uint64 value for key, or 0 if key was not found.
GetUint64(key []byte) (uint64, error)
}

View File

@ -1,169 +0,0 @@
package raft
import (
"sync/atomic"
)
// RaftState captures the state of a Raft node: Follower, Candidate, Leader,
// or Shutdown.
type RaftState uint32
const (
// Follower is the initial state of a Raft node.
Follower RaftState = iota
// Candidate is one of the valid states of a Raft node.
Candidate
// Leader is one of the valid states of a Raft node.
Leader
// Shutdown is the terminal state of a Raft node.
Shutdown
)
func (s RaftState) String() string {
switch s {
case Follower:
return "Follower"
case Candidate:
return "Candidate"
case Leader:
return "Leader"
case Shutdown:
return "Shutdown"
default:
return "Unknown"
}
}
// raftState is used to maintain various state variables
// and provides an interface to set/get the variables in a
// thread safe manner.
type raftState struct {
// The current term, cache of StableStore
currentTerm uint64
// Cache the latest log from LogStore
LastLogIndex uint64
LastLogTerm uint64
// Highest committed log entry
commitIndex uint64
// Last applied log to the FSM
lastApplied uint64
// Cache the latest snapshot index/term
lastSnapshotIndex uint64
lastSnapshotTerm uint64
// Tracks the number of live routines
runningRoutines int32
// The current state
state RaftState
}
func (r *raftState) getState() RaftState {
stateAddr := (*uint32)(&r.state)
return RaftState(atomic.LoadUint32(stateAddr))
}
func (r *raftState) setState(s RaftState) {
stateAddr := (*uint32)(&r.state)
atomic.StoreUint32(stateAddr, uint32(s))
}
func (r *raftState) getCurrentTerm() uint64 {
return atomic.LoadUint64(&r.currentTerm)
}
func (r *raftState) setCurrentTerm(term uint64) {
atomic.StoreUint64(&r.currentTerm, term)
}
func (r *raftState) getLastLogIndex() uint64 {
return atomic.LoadUint64(&r.LastLogIndex)
}
func (r *raftState) setLastLogIndex(term uint64) {
atomic.StoreUint64(&r.LastLogIndex, term)
}
func (r *raftState) getLastLogTerm() uint64 {
return atomic.LoadUint64(&r.LastLogTerm)
}
func (r *raftState) setLastLogTerm(term uint64) {
atomic.StoreUint64(&r.LastLogTerm, term)
}
func (r *raftState) getCommitIndex() uint64 {
return atomic.LoadUint64(&r.commitIndex)
}
func (r *raftState) setCommitIndex(term uint64) {
atomic.StoreUint64(&r.commitIndex, term)
}
func (r *raftState) getLastApplied() uint64 {
return atomic.LoadUint64(&r.lastApplied)
}
func (r *raftState) setLastApplied(term uint64) {
atomic.StoreUint64(&r.lastApplied, term)
}
func (r *raftState) getLastSnapshotIndex() uint64 {
return atomic.LoadUint64(&r.lastSnapshotIndex)
}
func (r *raftState) setLastSnapshotIndex(term uint64) {
atomic.StoreUint64(&r.lastSnapshotIndex, term)
}
func (r *raftState) getLastSnapshotTerm() uint64 {
return atomic.LoadUint64(&r.lastSnapshotTerm)
}
func (r *raftState) setLastSnapshotTerm(term uint64) {
atomic.StoreUint64(&r.lastSnapshotTerm, term)
}
func (r *raftState) incrRoutines() {
atomic.AddInt32(&r.runningRoutines, 1)
}
func (r *raftState) decrRoutines() {
atomic.AddInt32(&r.runningRoutines, -1)
}
func (r *raftState) getRoutines() int32 {
return atomic.LoadInt32(&r.runningRoutines)
}
// Start a goroutine and properly handle the race between a routine
// starting and incrementing, and exiting and decrementing.
func (r *raftState) goFunc(f func()) {
r.incrRoutines()
go func() {
defer r.decrRoutines()
f()
}()
}
// getLastIndex returns the last index in stable storage.
// Either from the last log or from the last snapshot.
func (r *raftState) getLastIndex() uint64 {
return max(r.getLastLogIndex(), r.getLastSnapshotIndex())
}
// getLastEntry returns the last index and term in stable storage.
// Either from the last log or from the last snapshot.
func (r *raftState) getLastEntry() (uint64, uint64) {
if r.getLastLogIndex() >= r.getLastSnapshotIndex() {
return r.getLastLogIndex(), r.getLastLogTerm()
}
return r.getLastSnapshotIndex(), r.getLastSnapshotTerm()
}

View File

@ -1,105 +0,0 @@
package raft
import (
"errors"
"io"
"log"
"net"
"time"
)
var (
errNotAdvertisable = errors.New("local bind address is not advertisable")
errNotTCP = errors.New("local address is not a TCP address")
)
// TCPStreamLayer implements StreamLayer interface for plain TCP.
type TCPStreamLayer struct {
advertise net.Addr
listener *net.TCPListener
}
// NewTCPTransport returns a NetworkTransport that is built on top of
// a TCP streaming transport layer.
func NewTCPTransport(
bindAddr string,
advertise net.Addr,
maxPool int,
timeout time.Duration,
logOutput io.Writer,
) (*NetworkTransport, error) {
return newTCPTransport(bindAddr, advertise, maxPool, timeout, func(stream StreamLayer) *NetworkTransport {
return NewNetworkTransport(stream, maxPool, timeout, logOutput)
})
}
// NewTCPTransportWithLogger returns a NetworkTransport that is built on top of
// a TCP streaming transport layer, with log output going to the supplied Logger
func NewTCPTransportWithLogger(
bindAddr string,
advertise net.Addr,
maxPool int,
timeout time.Duration,
logger *log.Logger,
) (*NetworkTransport, error) {
return newTCPTransport(bindAddr, advertise, maxPool, timeout, func(stream StreamLayer) *NetworkTransport {
return NewNetworkTransportWithLogger(stream, maxPool, timeout, logger)
})
}
func newTCPTransport(bindAddr string,
advertise net.Addr,
maxPool int,
timeout time.Duration,
transportCreator func(stream StreamLayer) *NetworkTransport) (*NetworkTransport, error) {
// Try to bind
list, err := net.Listen("tcp", bindAddr)
if err != nil {
return nil, err
}
// Create stream
stream := &TCPStreamLayer{
advertise: advertise,
listener: list.(*net.TCPListener),
}
// Verify that we have a usable advertise address
addr, ok := stream.Addr().(*net.TCPAddr)
if !ok {
list.Close()
return nil, errNotTCP
}
if addr.IP.IsUnspecified() {
list.Close()
return nil, errNotAdvertisable
}
// Create the network transport
trans := transportCreator(stream)
return trans, nil
}
// Dial implements the StreamLayer interface.
func (t *TCPStreamLayer) Dial(address string, timeout time.Duration) (net.Conn, error) {
return net.DialTimeout("tcp", address, timeout)
}
// Accept implements the net.Listener interface.
func (t *TCPStreamLayer) Accept() (c net.Conn, err error) {
return t.listener.Accept()
}
// Close implements the net.Listener interface.
func (t *TCPStreamLayer) Close() (err error) {
return t.listener.Close()
}
// Addr implements the net.Listener interface.
func (t *TCPStreamLayer) Addr() net.Addr {
// Use an advertise addr if provided
if t.advertise != nil {
return t.advertise
}
return t.listener.Addr()
}

View File

@ -1,85 +0,0 @@
package raft
import (
"io"
"time"
)
// RPCResponse captures both a response and a potential error.
type RPCResponse struct {
Response interface{}
Error error
}
// RPC has a command, and provides a response mechanism.
type RPC struct {
Command interface{}
Reader io.Reader // Set only for InstallSnapshot
RespChan chan<- RPCResponse
}
// Respond is used to respond with a response, error or both
func (r *RPC) Respond(resp interface{}, err error) {
r.RespChan <- RPCResponse{resp, err}
}
// Transport provides an interface for network transports
// to allow Raft to communicate with other nodes.
type Transport interface {
// Consumer returns a channel that can be used to
// consume and respond to RPC requests.
Consumer() <-chan RPC
// LocalAddr is used to return our local address to distinguish from our peers.
LocalAddr() string
// AppendEntriesPipeline returns an interface that can be used to pipeline
// AppendEntries requests.
AppendEntriesPipeline(target string) (AppendPipeline, error)
// AppendEntries sends the appropriate RPC to the target node.
AppendEntries(target string, args *AppendEntriesRequest, resp *AppendEntriesResponse) error
// RequestVote sends the appropriate RPC to the target node.
RequestVote(target string, args *RequestVoteRequest, resp *RequestVoteResponse) error
// InstallSnapshot is used to push a snapshot down to a follower. The data is read from
// the ReadCloser and streamed to the client.
InstallSnapshot(target string, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error
// EncodePeer is used to serialize a peer name.
EncodePeer(string) []byte
// DecodePeer is used to deserialize a peer name.
DecodePeer([]byte) string
// SetHeartbeatHandler is used to setup a heartbeat handler
// as a fast-pass. This is to avoid head-of-line blocking from
// disk IO. If a Transport does not support this, it can simply
// ignore the call, and push the heartbeat onto the Consumer channel.
SetHeartbeatHandler(cb func(rpc RPC))
}
// AppendPipeline is used for pipelining AppendEntries requests. It is used
// to increase the replication throughput by masking latency and better
// utilizing bandwidth.
type AppendPipeline interface {
// AppendEntries is used to add another request to the pipeline.
// The send may block which is an effective form of back-pressure.
AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error)
// Consumer returns a channel that can be used to consume
// response futures when they are ready.
Consumer() <-chan AppendFuture
// Closes pipeline and cancels all inflight RPCs
Close() error
}
// AppendFuture is used to return information about a pipelined AppendEntries request.
type AppendFuture interface {
Future
Start() time.Time
Request() *AppendEntriesRequest
Response() *AppendEntriesResponse
}

View File

@ -1,200 +0,0 @@
package raft
import (
"bytes"
crand "crypto/rand"
"encoding/binary"
"fmt"
"math"
"math/big"
"math/rand"
"time"
"github.com/hashicorp/go-msgpack/codec"
)
func init() {
// Ensure we use a high-entropy seed for the psuedo-random generator
rand.Seed(newSeed())
}
// returns an int64 from a crypto random source
// can be used to seed a source for a math/rand.
func newSeed() int64 {
r, err := crand.Int(crand.Reader, big.NewInt(math.MaxInt64))
if err != nil {
panic(fmt.Errorf("failed to read random bytes: %v", err))
}
return r.Int64()
}
// randomTimeout returns a value that is between the minVal and 2x minVal.
func randomTimeout(minVal time.Duration) <-chan time.Time {
if minVal == 0 {
return nil
}
extra := (time.Duration(rand.Int63()) % minVal)
return time.After(minVal + extra)
}
// min returns the minimum.
func min(a, b uint64) uint64 {
if a <= b {
return a
}
return b
}
// max returns the maximum.
func max(a, b uint64) uint64 {
if a >= b {
return a
}
return b
}
// generateUUID is used to generate a random UUID.
func generateUUID() string {
buf := make([]byte, 16)
if _, err := crand.Read(buf); err != nil {
panic(fmt.Errorf("failed to read random bytes: %v", err))
}
return fmt.Sprintf("%08x-%04x-%04x-%04x-%12x",
buf[0:4],
buf[4:6],
buf[6:8],
buf[8:10],
buf[10:16])
}
// asyncNotify is used to do an async channel send to
// a list of channels. This will not block.
func asyncNotify(chans []chan struct{}) {
for _, ch := range chans {
asyncNotifyCh(ch)
}
}
// asyncNotifyCh is used to do an async channel send
// to a single channel without blocking.
func asyncNotifyCh(ch chan struct{}) {
select {
case ch <- struct{}{}:
default:
}
}
// asyncNotifyBool is used to do an async notification
// on a bool channel.
func asyncNotifyBool(ch chan bool, v bool) {
select {
case ch <- v:
default:
}
}
// ExcludePeer is used to exclude a single peer from a list of peers.
func ExcludePeer(peers []string, peer string) []string {
otherPeers := make([]string, 0, len(peers))
for _, p := range peers {
if p != peer {
otherPeers = append(otherPeers, p)
}
}
return otherPeers
}
// PeerContained checks if a given peer is contained in a list.
func PeerContained(peers []string, peer string) bool {
for _, p := range peers {
if p == peer {
return true
}
}
return false
}
// AddUniquePeer is used to add a peer to a list of existing
// peers only if it is not already contained.
func AddUniquePeer(peers []string, peer string) []string {
if PeerContained(peers, peer) {
return peers
}
return append(peers, peer)
}
// encodePeers is used to serialize a list of peers.
func encodePeers(peers []string, trans Transport) []byte {
// Encode each peer
var encPeers [][]byte
for _, p := range peers {
encPeers = append(encPeers, trans.EncodePeer(p))
}
// Encode the entire array
buf, err := encodeMsgPack(encPeers)
if err != nil {
panic(fmt.Errorf("failed to encode peers: %v", err))
}
return buf.Bytes()
}
// decodePeers is used to deserialize a list of peers.
func decodePeers(buf []byte, trans Transport) []string {
// Decode the buffer first
var encPeers [][]byte
if err := decodeMsgPack(buf, &encPeers); err != nil {
panic(fmt.Errorf("failed to decode peers: %v", err))
}
// Deserialize each peer
var peers []string
for _, enc := range encPeers {
peers = append(peers, trans.DecodePeer(enc))
}
return peers
}
// Decode reverses the encode operation on a byte slice input.
func decodeMsgPack(buf []byte, out interface{}) error {
r := bytes.NewBuffer(buf)
hd := codec.MsgpackHandle{}
dec := codec.NewDecoder(r, &hd)
return dec.Decode(out)
}
// Encode writes an encoded object to a new bytes buffer.
func encodeMsgPack(in interface{}) (*bytes.Buffer, error) {
buf := bytes.NewBuffer(nil)
hd := codec.MsgpackHandle{}
enc := codec.NewEncoder(buf, &hd)
err := enc.Encode(in)
return buf, err
}
// Converts bytes to an integer.
func bytesToUint64(b []byte) uint64 {
return binary.BigEndian.Uint64(b)
}
// Converts a uint64 to a byte slice.
func uint64ToBytes(u uint64) []byte {
buf := make([]byte, 8)
binary.BigEndian.PutUint64(buf, u)
return buf
}
// backoff is used to compute an exponential backoff
// duration. Base time is scaled by the current round,
// up to some maximum scale factor.
func backoff(base time.Duration, round, limit uint64) time.Duration {
power := min(round, limit)
for power > 2 {
base *= 2
power--
}
return base
}

View File

@ -1,6 +1,6 @@
The MIT License (MIT)
Copyright (c) 2013-2015 Errplane Inc.
Copyright (c) 2013-2014 Errplane Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in

View File

@ -0,0 +1,20 @@
Copyright 2013 go-raft contributors
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@ -1,206 +1,2 @@
# InfluxDB Client
[![GoDoc](https://godoc.org/github.com/influxdb/influxdb?status.svg)](http://godoc.org/github.com/influxdb/influxdb/client)
## Description
A Go client library written and maintained by the **InfluxDB** team.
This package provides convenience functions to read and write time series data.
It uses the HTTP protocol to communicate with your **InfluxDB** cluster.
## Getting Started
### Connecting To Your Database
Connecting to an **InfluxDB** database is straightforward. You will need a host
name, a port and the cluster user credentials if applicable. The default port is 8086.
You can customize these settings to your specific installation via the
**InfluxDB** configuration file.
Thought not necessary for experimentation, you may want to create a new user
and authenticate the connection to your database.
For more information please check out the
[Cluster Admin Docs](http://influxdb.com/docs/v0.9/query_language/database_administration.html).
For the impatient, you can create a new admin user _bubba_ by firing off the
[InfluxDB CLI](https://github.com/influxdb/influxdb/blob/master/cmd/influx/main.go).
```shell
influx
> create user bubba with password 'bumblebeetuna'
> grant all privileges to bubba
```
And now for good measure set the credentials in you shell environment.
In the example below we will use $INFLUX_USER and $INFLUX_PWD
Now with the administrivia out of the way, let's connect to our database.
NOTE: If you've opted out of creating a user, you can omit Username and Password in
the configuration below.
```go
package main
import "github.com/influxdb/influxdb/client"
const (
MyHost = "localhost"
MyPort = 8086
MyDB = "square_holes"
MyMeasurement = "shapes"
)
func main() {
u, err := url.Parse(fmt.Sprintf("http://%s:%d", MyHost, MyPort))
if err != nil {
log.Fatal(err)
}
conf := client.Config{
URL: *u,
Username: os.Getenv("INFLUX_USER"),
Password: os.Getenv("INFLUX_PWD"),
}
con, err := client.NewClient(conf)
if err != nil {
log.Fatal(err)
}
dur, ver, err := con.Ping()
if err != nil {
log.Fatal(err)
}
log.Printf("Happy as a Hippo! %v, %s", dur, ver)
}
```
### Inserting Data
Time series data aka *points* are written to the database using batch inserts.
The mechanism is to create one or more points and then create a batch aka *batch points*
and write these to a given database and series. A series is a combination of a
measurement (time/values) and a set of tags.
In this sample we will create a batch of a 1,000 points. Each point has a time and
a single value as well as 2 tags indicating a shape and color. We write these points
to a database called _square_holes_ using a measurement named _shapes_.
NOTE: You can specify a RetentionPolicy as part of the batch points. If not
provided InfluxDB will use the database _default_ retention policy. By default, the _default_
retention policy never deletes any data it contains.
```go
func writePoints(con *client.Client) {
var (
shapes = []string{"circle", "rectangle", "square", "triangle"}
colors = []string{"red", "blue", "green"}
sampleSize = 1000
pts = make([]client.Point, sampleSize)
)
rand.Seed(42)
for i := 0; i < sampleSize; i++ {
pts[i] = client.Point{
Measurement: "shapes",
Tags: map[string]string{
"color": strconv.Itoa(rand.Intn(len(colors))),
"shape": strconv.Itoa(rand.Intn(len(shapes))),
},
Fields: map[string]interface{}{
"value": rand.Intn(sampleSize),
},
Time: time.Now(),
Precision: "s",
}
}
bps := client.BatchPoints{
Points: pts,
Database: MyDB,
RetentionPolicy: "default",
}
_, err := con.Write(bps)
if err != nil {
log.Fatal(err)
}
}
```
### Querying Data
One nice advantage of using **InfluxDB** the ability to query your data using familiar
SQL constructs. In this example we can create a convenience function to query the database
as follows:
```go
// queryDB convenience function to query the database
func queryDB(con *client.Client, cmd string) (res []client.Result, err error) {
q := client.Query{
Command: cmd,
Database: MyDB,
}
if response, err := con.Query(q); err == nil {
if response.Error() != nil {
return res, response.Error()
}
res = response.Results
}
return
}
```
#### Creating a Database
```go
_, err := queryDB(con, fmt.Sprintf("create database %s", MyDB))
if err != nil {
log.Fatal(err)
}
```
#### Count Records
```go
q := fmt.Sprintf("select count(%s) from %s", "value", MyMeasurement)
res, err := queryDB(con, q)
if err != nil {
log.Fatal(err)
}
count := res[0].Series[0].Values[0][1]
log.Printf("Found a total of `%v records", count)
```
#### Find the last 10 _shapes_ records
```go
q := fmt.Sprintf("select * from %s limit %d", MyMeasurement, 20)
res, err = queryDB(con, q)
if err != nil {
log.Fatal(err)
}
for i, row := range res[0].Series[0].Values {
t, err := time.Parse(time.RFC3339, row[0].(string))
if err != nil {
log.Fatal(err)
}
val, err := row[1].(json.Number).Int64()
log.Printf("[%2d] %s: %03d\n", i, t.Format(time.Stamp), val)
}
```
## Go Docs
Please refer to
[http://godoc.org/github.com/influxdb/influxdb/client](http://godoc.org/github.com/influxdb/influxdb/client)
for documentation.
## See Also
You can also examine how the client library is used by the
[InfluxDB CLI](https://github.com/influxdb/influxdb/blob/master/cmd/influx/main.go).
influxdb-go
===========

View File

@ -0,0 +1,200 @@
package examples
import (
"fmt"
"github.com/influxdb/influxdb/client"
)
func main() {
TestClient()
}
func TestClient() {
internalTest(true)
}
func TestClientWithoutCompression() {
internalTest(false)
}
func internalTest(compression bool) {
c, err := client.NewClient(&client.ClientConfig{})
if err != nil {
panic(err)
}
admins, err := c.GetClusterAdminList()
if err != nil {
panic(err)
}
if len(admins) == 1 {
if err := c.CreateClusterAdmin("admin", "password"); err != nil {
panic(err)
}
}
admins, err = c.GetClusterAdminList()
if err != nil {
panic(err)
}
if len(admins) != 2 {
panic("more than two admins returned")
}
dbs, err := c.GetDatabaseList()
if err != nil {
panic(err)
}
if len(dbs) == 0 {
if err := c.CreateDatabase("foobar"); err != nil {
panic(err)
}
}
dbs, err = c.GetDatabaseList()
if err != nil {
panic(err)
}
if len(dbs) != 1 && dbs[0]["foobar"] == nil {
panic("List of databases don't match")
}
users, err := c.GetDatabaseUserList("foobar")
if err != nil {
panic(err)
}
if len(users) == 0 {
if err := c.CreateDatabaseUser("foobar", "dbuser", "pass"); err != nil {
panic(err)
}
if err := c.AlterDatabasePrivilege("foobar", "dbuser", true); err != nil {
panic(err)
}
}
users, err = c.GetDatabaseUserList("foobar")
if err != nil {
panic(err)
}
if len(users) != 1 {
panic("more than one user returned")
}
c, err = client.NewClient(&client.ClientConfig{
Username: "dbuser",
Password: "pass",
Database: "foobar",
})
if !compression {
c.DisableCompression()
}
if err != nil {
panic(err)
}
name := "ts9"
if !compression {
name = "ts9_uncompressed"
}
series := &client.Series{
Name: name,
Columns: []string{"value"},
Points: [][]interface{}{
{1.0},
},
}
if err := c.WriteSeries([]*client.Series{series}); err != nil {
panic(err)
}
result, err := c.Query("select * from " + name)
if err != nil {
panic(err)
}
if len(result) != 1 {
panic(fmt.Errorf("expected one time series returned: %d", len(result)))
}
if len(result[0].Points) != 1 {
panic(fmt.Errorf("Expected one point: %d", len(result[0].Points)))
}
if result[0].Points[0][2].(float64) != 1 {
panic("Value not equal to 1")
}
c, err = client.NewClient(&client.ClientConfig{
Username: "root",
Password: "root",
})
if err != nil {
panic(err)
}
spaces, err := c.GetShardSpaces()
if err != nil || len(spaces) == 0 {
panic(fmt.Errorf("Got empty spaces back: %s", err))
}
if spaces[0].Name != "default" {
panic("Space name isn't default")
}
space := &client.ShardSpace{Name: "foo", Regex: "/^paul_is_rad/"}
err = c.CreateShardSpace("foobar", space)
if err != nil {
panic(err)
}
spaces, _ = c.GetShardSpaces()
if spaces[1].Name != "foo" {
panic("Space name isn't foo")
}
shards, err := c.GetShards()
if err != nil {
panic(fmt.Errorf("Couldn't get shards back: %s", err))
}
c, err = client.NewClient(&client.ClientConfig{
Username: "root",
Password: "root",
Database: "",
})
series = &client.Series{
Name: "paul_is_rad",
Columns: []string{"value"},
Points: [][]interface{}{
{1.0},
},
}
if err := c.WriteSeries([]*client.Series{series}); err != nil {
panic(err)
}
spaces, _ = c.GetShardSpaces()
count := 0
for _, s := range shards.All {
if s.SpaceName == "foo" {
count++
}
}
if err := c.DropShardSpace("foobar", "foo"); err != nil {
panic(fmt.Errorf("Error: %s", err))
}
spaces, err = c.GetShardSpaces()
if err != nil || len(spaces) != 1 || spaces[0].Name != "default" {
panic(fmt.Errorf("Error: %s, %d, %s", err, len(spaces), spaces[0].Name))
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,19 @@
package client
type Series struct {
Name string `json:"name"`
Columns []string `json:"columns"`
Points [][]interface{} `json:"points"`
}
func (self *Series) GetName() string {
return self.Name
}
func (self *Series) GetColumns() []string {
return self.Columns
}
func (self *Series) GetPoints() [][]interface{} {
return self.Points
}

View File

@ -0,0 +1,15 @@
package client
type ShardSpace struct {
// required, must be unique within the database
Name string `json:"name"`
// required, a database has many shard spaces and a shard space belongs to a database
Database string `json:"database"`
// this is optional, if they don't set it, we'll set to /.*/
Regex string `json:"regex"`
// this is optional, if they don't set it, it will default to the storage.dir in the config
RetentionPolicy string `json:"retentionPolicy"`
ShardDuration string `json:"shardDuration"`
ReplicationFactor uint32 `json:"replicationFactor"`
Split uint32 `json:"split"`
}

View File

@ -1,650 +0,0 @@
# The Influx Query Language Specification
## Introduction
This is a reference for the Influx Query Language ("InfluxQL").
InfluxQL is a SQL-like query language for interacting with InfluxDB. It has been lovingly crafted to feel familiar to those coming from other SQL or SQL-like environments while providing features specific to storing and analyzing time series data.
## Notation
The syntax is specified using Extended Backus-Naur Form ("EBNF"). EBNF is the same notation used in the [Go](http://golang.org) programming language specification, which can be found [here](https://golang.org/ref/spec). Not so coincidentally, InfluxDB is written in Go.
```
Production = production_name "=" [ Expression ] "." .
Expression = Alternative { "|" Alternative } .
Alternative = Term { Term } .
Term = production_name | token [ "…" token ] | Group | Option | Repetition .
Group = "(" Expression ")" .
Option = "[" Expression "]" .
Repetition = "{" Expression "}" .
```
Notation operators in order of increasing precedence:
```
| alternation
() grouping
[] option (0 or 1 times)
{} repetition (0 to n times)
```
## Query representation
### Characters
InfluxQL is Unicode text encoded in [UTF-8](http://en.wikipedia.org/wiki/UTF-8).
```
newline = /* the Unicode code point U+000A */ .
unicode_char = /* an arbitrary Unicode code point except newline */ .
```
## Letters and digits
Letters are the set of ASCII characters plus the underscore character _ (U+005F) is considered a letter.
Only decimal digits are supported.
```
letter = ascii_letter | "_" .
ascii_letter = "A" … "Z" | "a" … "z" .
digit = "0" … "9" .
```
## Identifiers
Identifiers are tokens which refer to database names, retention policy names, user names, measurement names, tag keys, and field names.
The rules:
- double quoted identifiers can contain any unicode character other than a new line
- double quoted identifiers can contain escaped `"` characters (i.e., `\"`)
- unquoted identifiers must start with an upper or lowercase ASCII character or "_"
- unquoted identifiers may contain only ASCII letters, decimal digits, and "_"
```
identifier = unquoted_identifier | quoted_identifier .
unquoted_identifier = ( letter ) { letter | digit } .
quoted_identifier = `"` unicode_char { unicode_char } `"` .
```
#### Examples:
```
cpu
_cpu_stats
"1h"
"anything really"
"1_Crazy-1337.identifier>NAME👍"
```
## Keywords
```
ALL ALTER AS ASC BEGIN BY
CREATE CONTINUOUS DATABASE DATABASES DEFAULT DELETE
DESC DROP DURATION END EXISTS EXPLAIN
FIELD FROM GRANT GROUP IF IN
INNER INSERT INTO KEY KEYS LIMIT
SHOW MEASUREMENT MEASUREMENTS OFFSET ON ORDER
PASSWORD POLICY POLICIES PRIVILEGES QUERIES QUERY
READ REPLICATION RETENTION REVOKE SELECT SERIES
SLIMIT SOFFSET TAG TO USER USERS
VALUES WHERE WITH WRITE
```
## Literals
### Integers
InfluxQL supports decimal integer literals. Hexadecimal and octal literals are not currently supported.
```
int_lit = ( "1" … "9" ) { digit } .
```
### Floats
InfluxQL supports floating-point literals. Exponents are not currently supported.
```
float_lit = int_lit "." int_lit .
```
### Strings
String literals must be surrounded by single quotes. Strings may contain `'` characters as long as they are escaped (i.e., `\'`).
```
string_lit = `'` { unicode_char } `'`' .
```
### Durations
Duration literals specify a length of time. An integer literal followed immediately (with no spaces) by a duration unit listed below is interpreted as a duration literal.
```
Duration unit definitions
-------------------------
| Units | Meaning |
|--------|-----------------------------------------|
| u or µ | microseconds (1 millionth of a second) |
| ms | milliseconds (1 thousandth of a second) |
| s | second |
| m | minute |
| h | hour |
| d | day |
| w | week |
```
```
duration_lit = int_lit duration_unit .
duration_unit = "u" | "µ" | "s" | "h" | "d" | "w" | "ms" .
```
### Dates & Times
The date and time literal format is not specified in EBNF like the rest of this document. It is specified using Go's date / time parsing format, which is a reference date written in the format required by InfluxQL. The reference date time is:
InfluxQL reference date time: January 2nd, 2006 at 3:04:05 PM
```
time_lit = "2006-01-02 15:04:05.999999" | "2006-01-02"
```
### Booleans
```
bool_lit = TRUE | FALSE .
```
### Regular Expressions
```
regex_lit = "/" { unicode_char } "/" .
```
## Queries
A query is composed of one or more statements separated by a semicolon.
```
query = statement { ; statement } .
statement = alter_retention_policy_stmt |
create_continuous_query_stmt |
create_database_stmt |
create_retention_policy_stmt |
create_user_stmt |
delete_stmt |
drop_continuous_query_stmt |
drop_database_stmt |
drop_measurement_stmt |
drop_retention_policy_stmt |
drop_series_stmt |
drop_user_stmt |
grant_stmt |
show_continuous_queries_stmt |
show_databases_stmt |
show_field_keys_stmt |
show_measurements_stmt |
show_retention_policies |
show_series_stmt |
show_tag_keys_stmt |
show_tag_values_stmt |
show_users_stmt |
revoke_stmt |
select_stmt .
```
## Statements
### ALTER RETENTION POLICY
```
alter_retention_policy_stmt = "ALTER RETENTION POLICY" policy_name "ON"
db_name retention_policy_option
[ retention_policy_option ]
[ retention_policy_option ] .
db_name = identifier .
policy_name = identifier .
retention_policy_option = retention_policy_duration |
retention_policy_replication |
"DEFAULT" .
retention_policy_duration = "DURATION" duration_lit .
retention_policy_replication = "REPLICATION" int_lit
```
#### Examples:
```sql
-- Set default retention policy for mydb to 1h.cpu.
ALTER RETENTION POLICY "1h.cpu" ON mydb DEFAULT;
-- Change duration and replication factor.
ALTER RETENTION POLICY policy1 ON somedb DURATION 1h REPLICATION 4
```
### CREATE CONTINUOUS QUERY
```
create_continuous_query_stmt = "CREATE CONTINUOUS QUERY" query_name "ON" db_name
"BEGIN" select_stmt "END" .
query_name = identifier .
```
#### Examples:
```sql
-- selects from default retention policy and writes into 6_months retention policy
CREATE CONTINUOUS QUERY "10m_event_count"
ON db_name
BEGIN
SELECT count(value)
INTO "6_months".events
FROM events
GROUP BY time(10m)
END;
-- this selects from the output of one continuous query in one retention policy and outputs to another series in another retention policy
CREATE CONTINUOUS QUERY "1h_event_count"
ON db_name
BEGIN
SELECT sum(count) as count
INTO "2_years".events
FROM "6_months".events
GROUP BY time(1h)
END;
```
### CREATE DATABASE
```
create_database_stmt = "CREATE DATABASE" db_name
```
#### Example:
```sql
CREATE DATABASE foo
```
### CREATE RETENTION POLICY
```
create_retention_policy_stmt = "CREATE RETENTION POLICY" policy_name "ON"
db_name retention_policy_duration
retention_policy_replication
[ "DEFAULT" ] .
```
#### Examples
```sql
-- Create a retention policy.
CREATE RETENTION POLICY "10m.events" ON somedb DURATION 10m REPLICATION 2;
-- Create a retention policy and set it as the default.
CREATE RETENTION POLICY "10m.events" ON somedb DURATION 10m REPLICATION 2 DEFAULT;
```
### CREATE USER
```
create_user_stmt = "CREATE USER" user_name "WITH PASSWORD" password
[ "WITH ALL PRIVILEGES" ] .
```
#### Examples:
```sql
-- Create a normal database user.
CREATE USER jdoe WITH PASSWORD '1337password';
-- Create a cluster admin.
-- Note: Unlike the GRANT statement, the "PRIVILEGES" keyword is required here.
CREATE USER jdoe WITH PASSWORD '1337password' WITH ALL PRIVILEGES;
```
### DELETE
```
delete_stmt = "DELETE" from_clause where_clause .
```
#### Example:
```sql
-- delete data points from the cpu measurement where the region tag
-- equals 'uswest'
DELETE FROM cpu WHERE region = 'uswest';
```
### DROP CONTINUOUS QUERY
drop_continuous_query_stmt = "DROP CONTINUOUS QUERY" query_name .
#### Example:
```sql
DROP CONTINUOUS QUERY myquery;
```
### DROP DATABASE
drop_database_stmt = "DROP DATABASE" db_name .
#### Example:
```sql
DROP DATABASE mydb;
```
### DROP MEASUREMENT
```
drop_measurement_stmt = "DROP MEASUREMENT" measurement .
```
#### Examples:
```sql
-- drop the cpu measurement
DROP MEASUREMENT cpu;
```
### DROP RETENTION POLICY
```
drop_retention_policy_stmt = "DROP RETENTION POLICY" policy_name "ON" db_name .
```
#### Example:
```sql
-- drop the retention policy named 1h.cpu from mydb
DROP RETENTION POLICY "1h.cpu" ON mydb;
```
### DROP SERIES
```
drop_series_stmt = "DROP SERIES" [ from_clause ] [ where_clause ]
```
#### Example:
```sql
```
### DROP USER
```
drop_user_stmt = "DROP USER" user_name .
```
#### Example:
```sql
DROP USER jdoe;
```
### GRANT
NOTE: Users can be granted privileges on databases that do not exist.
```
grant_stmt = "GRANT" privilege [ on_clause ] to_clause
```
#### Examples:
```sql
-- grant cluster admin privileges
GRANT ALL TO jdoe;
-- grant read access to a database
GRANT READ ON mydb TO jdoe;
```
### SHOW CONTINUOUS QUERIES
show_continuous_queries_stmt = "SHOW CONTINUOUS QUERIES"
#### Example:
```sql
-- show all continuous queries
SHOW CONTINUOUS QUERIES;
```
### SHOW DATABASES
```
show_databases_stmt = "SHOW DATABASES" .
```
#### Example:
```sql
-- show all databases
SHOW DATABASES;
```
### SHOW FIELD
show_field_keys_stmt = "SHOW FIELD KEYS" [ from_clause ] .
#### Examples:
```sql
-- show field keys from all measurements
SHOW FIELD KEYS;
-- show field keys from specified measurement
SHOW FIELD KEYS FROM cpu;
```
### SHOW MEASUREMENTS
show_measurements_stmt = [ where_clause ] [ group_by_clause ] [ limit_clause ]
[ offset_clause ] .
```sql
-- show all measurements
SHOW MEASUREMENTS;
-- show measurements where region tag = 'uswest' AND host tag = 'serverA'
SHOW MEASUREMENTS WHERE region = 'uswest' AND host = 'serverA';
```
### SHOW RETENTION POLICIES
```
show_retention_policies = "SHOW RETENTION POLICIES" db_name .
```
#### Example:
```sql
-- show all retention policies on a database
SHOW RETENTION POLICIES mydb;
```
### SHOW SERIES
```
show_series_stmt = [ from_clause ] [ where_clause ] [ group_by_clause ]
[ limit_clause ] [ offset_clause ] .
```
#### Example:
```sql
```
### SHOW TAG KEYS
```
show_tag_keys_stmt = [ from_clause ] [ where_clause ] [ group_by_clause ]
[ limit_clause ] [ offset_clause ] .
```
#### Examples:
```sql
-- show all tag keys
SHOW TAG KEYS;
-- show all tag keys from the cpu measurement
SHOW TAG KEYS FROM cpu;
-- show all tag keys from the cpu measurement where the region key = 'uswest'
SHOW TAG KEYS FROM cpu WHERE region = 'uswest';
-- show all tag keys where the host key = 'serverA'
SHOW TAG KEYS WHERE host = 'serverA';
```
### SHOW TAG VALUES
```
show_tag_values_stmt = [ from_clause ] with_tag_clause [ where_clause ]
[ group_by_clause ] [ limit_clause ] [ offset_clause ] .
```
#### Examples:
```sql
-- show all tag values across all measurements for the region tag
SHOW TAG VALUES WITH TAG = 'region';
-- show tag values from the cpu measurement for the region tag
SHOW TAG VALUES FROM cpu WITH TAG = 'region';
-- show tag values from the cpu measurement for region & host tag keys where service = 'redis'
SHOW TAG VALUES FROM cpu WITH TAG IN (region, host) WHERE service = 'redis';
```
### SHOW USERS
```
show_users_stmt = "SHOW USERS" .
```
#### Example:
```sql
-- show all users
SHOW USERS;
```
### REVOKE
```
revoke_stmt = privilege [ "ON" db_name ] "FROM" user_name
```
#### Examples:
```sql
-- revoke cluster admin from jdoe
REVOKE ALL PRIVILEGES FROM jdoe;
-- revoke read privileges from jdoe on mydb
REVOKE READ ON mydb FROM jdoe;
```
### SELECT
```
select_stmt = fields from_clause [ into_clause ] [ where_clause ]
[ group_by_clause ] [ order_by_clause ] [ limit_clause ]
[ offset_clause ] [ slimit_clause ] [ soffset_clause ].
```
#### Examples:
```sql
-- select mean value from the cpu measurement where region = 'uswest' grouped by 10 minute intervals
SELECT mean(value) FROM cpu WHERE region = 'uswest' GROUP BY time(10m) fill(0);
```
## Clauses
```
from_clause = "FROM" measurements .
group_by_clause = "GROUP BY" dimensions fill(<option>).
limit_clause = "LIMIT" int_lit .
offset_clause = "OFFSET" int_lit .
slimit_clause = "SLIMIT" int_lit .
soffset_clause = "SOFFSET" int_lit .
on_clause = db_name .
order_by_clause = "ORDER BY" sort_fields .
to_clause = user_name .
where_clause = "WHERE" expr .
```
## Expressions
```
binary_op = "+" | "-" | "*" | "/" | "AND" | "OR" | "=" | "!=" | "<" |
"<=" | ">" | ">=" .
expr = unary_expr { binary_op unary_expr } .
unary_expr = "(" expr ")" | var_ref | time_lit | string_lit | int_lit |
float_lit | bool_lit | duration_lit | regex_lit .
```
## Other
```
dimension = expr .
dimensions = dimension { "," dimension } .
field = expr [ alias ] .
fields = field { "," field } .
measurement = measurement_name |
( policy_name "." measurement_name ) |
( db_name "." [ policy_name ] "." measurement_name ) .
measurements = measurement { "," measurement } .
measurement_name = identifier .
password = identifier .
policy_name = identifier .
privilege = "ALL" [ "PRIVILEGES" ] | "READ" | "WRITE" .
series_id = int_lit .
sort_field = field_name [ ASC | DESC ] .
sort_fields = sort_field { "," sort_field } .
user_name = identifier .
```

View File

@ -1,682 +0,0 @@
SELECT mean(value) FROM cpu
WHERE service = 'redis'
GROUP BY region, time(10m)
based on group by, get unique tag sets for region
cpu region=uswest -> get series ids from cpu where <tagset> and <where cond>
cpu region=useast -> get series ids from cpu where <tagset> and <where cond>
for each shard group in time range {
for each group by tagset {
shardItrs := map[shard]itr
for id := range seriesIds {
shard := group.shardForId(id)
shardItrs[shard].addId(id)
}
for _, itr := range shardItrs {
itr.tags = tagset
itr.name = cpu
}
}
}
(host = 'serverA' AND value > 100) OR (region = 'uswest' AND value < 10)
value > 100 OR value < 10 (host=serverA, region=uswest)
value < 10 (host!=serverA, region=uswest)
value > 100
filters := make(map[whereCond]seriesIds)
filters := make(map[uint32]whereCond)
seriesIds
select mean(value) from foo WHERE someField = 'important' group by time(5m)
===================
select derivative(mean(value))
from cpu
group by time(5m)
select mean(value) from cpu group by time(5m)
select top(10, value) from cpu group by host where time > now() - 1h
this query uses this type of cycle
-------REMOTE HOST ------------- -----HOST THAT GOT QUERY ---
map -> reduce -> combine -> map -> reduce -> combine -> user
select mean(value) cpu group by time(5m), host where time > now() -4h
map -> reduce -> combine -> user
map -> reduce -> map -> reduce -> combine -> user
map -> reduce -> combine -> map -> reduce -> combine -> user
select value from
(
select mean(value) AS value FROM cpu GROUP BY time(5m)
)
[
{
name: cpu,
tags: {
host: servera,
},
columns: [time, mean],
values : [
[23423423, 88.8]
]
},
{
name: cpu,
tags: {
host: serverb,
}
}
]
================================================================================
// list series ->
/*
[
{
"name": "cpu",
"columns": ["id", "region", "host"],
"values": [
1, "uswest", "servera",
2, "uswest", "serverb"
]
},
{
""
}
]
list series where region = 'uswest'
list tags where name = 'cpu'
list tagKeys where name = 'cpu'
list series where name = 'cpu' and region = 'uswest'
select distinct(region) from cpu
list names
list tagKeys
list tagValeus where tagKey = 'region' and time > now() -1h
select a.value, b.value from a join b where a.user_id == 100
select a.value from a where a.user_id == 100
select b.value from b
3 1 2
select sum(a.value) + (sum(b.value) / min(b.value)) from a join b group by region
select suM(a.value) from a group by time(5m)
select sum(b.value) from b group by time(5m)
execute sum MR on series [23, 65, 88, 99, 101, 232]
map -> 1 tick per 5m
reduce -> combines ticks per 5m interval -> outputs
planner -> take reduce output per 5m interval from the two reducers
and combine with the join function, which is +
[1,/,2,+,3]
for v := s[0].Next(); v != nil; v = 2[0].Next() {
var result interface{}
for i := 1; i < len(s); i += 2 {
/ it's an operator
if i % 2 == 1 {
}
}
}
select count(distinct(host)) from cpu where time > now() - 5m
type mapper interface {
Map(iterator)
}
type floatCountMapper struct {}
func(m *floatCountMapper) Map(i Iterator) {
itr := i.(*floatIterator)
}
type Iterator interface {
itr()
}
type iterator struct {
cursor *bolt.Cursor
timeBucket time.Time
name string
seriesID uint32
tags map[string]string
fieldID uint8
where *WhereClause
}
func (i *intIterator) itr() {}
func (i *intIterator) Next() (k int64, v float64) {
// loop through bolt cursor applying where clause and yield next point
// if cursor is at end or time is out of range, yield nil
}
*/
field: ipaddress
select top(10, count, ipaddress) from hits group by time(5m), host
map -> 10 records, <key(time,host)>, <value(count,ipaddresses)>
reducer -> take in all map outputs for each 5m bucket
combine them, sort, take out the top 10
output -> 10 records, count, ipaddresses, time
==========
select top(10, count, host) from hits group by time(5m)
select host, value from cpu where time > now() - 1h
select last(value) from cpu group by time(auto), host fill(previous) where time > now() - 1h
select sum(value) from cpu group by host where time > now() - 1h
select sum(value) from cpu where time > now() - 1h
select * from a;
[
{
"name": "cpu",
"tags": {
"host": "servera"
},
"fields": [
"time",
"count",
"ipaddress"
]
"values": [
[t, v, "123.23.22.2"],
[t, v, "192.232.2.2"],
]
},
{
"name": "cpu",
"tags": {
"host": "serverb"
},
"values": [
[t, v],
[t + 1, v],
]
},
]
[t, v, "servera"]
[t, v, "serverb"]
[t+1, v, "servera"]
[t+1, v, "serverb"]
======
a INNER JOIN b
- planner always has "group by"
select count(errors.value) / count(requests.value) as error_rate
from errors join requests as "mysuperseries"
group by time(5m)
fill(previous)
where time > now() - 4h
select mean(value) as cpu_mean from cpu group by time(5m) where host = 'servera'
select count(value) from errors group by time(5m) fill(previous) where..
select count(value) from requests group by time(5m) fill(previ...
{
"name": "errors.requests",
"tags": {},
"fields": ["time", "errors.count", "requests.count"],
"values": [
[t, n, m]
]
}
a MERGE b
a - t
b - t
a - t + 1
b - t + 1
b - t + 2
a - t + 3
<cpu, host>
select value from cpu
select mean(value) from cpu group by time(5m)
select first(value) from cpu
=====
1. Group by time
2. Group by
3. Raw
======
SELECT sum(value) FROM myseries
host=servera
host=serverb
{"host":"servera", "value":100}
{"host":"serverb", "value":"hello!"}
series = <name, tags>
series = seriesID
seriesID -> name
name has_many seriesIDs
name has_many fields
field -> (type, id)
<seriesName,fieldID> -> (type, id)
<seriesID, time> -> fieldValues
field
type topMapper struct {
count int
}
func newTopMaper(count int) {
}
func (t *topCountMapper) Map(i Iterator) {
topValues := make(map[string]int)
for p := i.Next(); p != nil; p = i.Next() {
topValues[p.String()] += 1
}
for k, v := range topValues {
t.job.Emit(k, v)
}
}
type topCountReducer struct {
count int
}
func (r *topCountReducer) Reduce(i Iterator) {
realzTop10 := make(map[string]int)
for v := i.Next(); v != nil; v = i.Next() {
top10 := v.(map[string]int)
for k, n := range top10 {
realzTop10[k] += n
}
}
realyrealTop10 := make(map[string]int)
// do sorty magic on reazTop10 and set realyreal
r.job.Emit(realyrealTop10)
}
type Transformer interface {
Transform(interface{}) Series
}
type ReduceOutput struct {
values [][]interface{}
fieldIDs []
}
// for topCountReducer ReduceOutput would look like
// values = [t, c, "some string"]
// fieldIDs = [0, 0, 3]
SELECT val1, val2 FROM abc
select mean(value) from cpu where region='uswest' group by time(5m), host
2000 series
200 series to each machine
================================================================================
type Mapper interface {
Map(Iterator)
}
type countMapper struct {}
// Iterator is the entire series if not an aggregate query
// or iterator is the entire time bucket if an aggregate query
func (m *sumMapper) Map(i Iterator) {
var sum int
for p := i.Next(); p != nil; p = i.Next() {
sum += p.Float()
}
m.Emitter.Emit(k, sum)
}
type Point interface {
String(name)
Int(name)
}
type cursorIterator struct {
Cursor *bolt.Cursor
FieldID uint8
Value []byte
}
func (i cursorIterator) Next() Point {
_, i.Value = i.Cursor.Next()
return byteSlicePoint(i.Value)
}
type byteSlicePoint []byte
func (p byteSlicePoint) String() string {
// unmarshal from byte slice.
}
/*
{
"name": "foo",
"fields": {
"value": 23.2,
"user_id": 23
},
"tags": {
}
}
*/
CNT ID0 VALUEVALUEVALUEVALUEVALUEVALUEVALUEVALU
0001 0000 0000 0000 0000 0000 0000 0000 0000 0000
CNT ID0 ID1 ID2 FLOATFLOA STRINGSTR STRINGSTR
0002 0001 0002 0003 0000 0000 0000 0000 0000 0000
// SELECT count() FROM cpu GROUP BY host
// SELECT mean(value) from cpu where region = 'uswest'
// SELECT derivative(value) from redis_key_count GROUP BY time(5m)
// SELECT host, mean(value)
// FROM cpu
// GROUP BY host
// HAVING top(20, mean)
// WHERE time > now() - 1h
// AND region = 'uswest'
// SELECT ipaddress, count(ipaddress)
// FROM hits
// GROUP BY ipaddress
// HAVING top(10, count)
// WHERE time > now() - 1h
series := meta.DistinctTagValues("cpu", "host")
tye Series struct {
name string
fields map[uint8]string
}
type SeriesData struct {
ID
tags map[string]string
}
<id, time, value>
mrJobs := make([]*MRJob, 0, len(series))
for _, s := range series {
j := NewMRJob(s)
mrJobs = append(mrJobs, j)
j.Execute()
}
for _, j := range mrJobs {
// pull in results
// construct series object with same tags as series
}
================================================================================
type Mapper interface {
Map(Iterator)
}
type countMapper struct {}
// Iterator is the entire series if not an aggregate query
// or iterator is the entire time bucket if an aggregate query
func (m *sumMapper) Map(i Iterator) {
var sum int
for p := i.Next(); p != nil; p = i.Next() {
sum += p.Float()
}
m.Emitter.Emit(k, sum)
}
type Point interface {
String(name)
Int(name)
}
type cursorIterator struct {
Cursor *bolt.Cursor
FieldID uint8
Value []byte
}
func (i cursorIterator) Next() Point {
_, i.Value = i.Cursor.Next()
return byteSlicePoint(i.Value)
}
type byteSlicePoint []byte
func (p byteSlicePoint) String() string {
// unmarshal from byte slice.
}
/*
{
"name": "foo",
"fields": {
"value": 23.2,
"user_id": 23
},
"tags": {
}
}
*/
CNT ID0 VALUEVALUEVALUEVALUEVALUEVALUEVALUEVALU
0001 0000 0000 0000 0000 0000 0000 0000 0000 0000
CNT ID0 ID1 ID2 FLOATFLOA STRINGSTR STRINGSTR
0002 0001 0002 0003 0000 0000 0000 0000 0000 0000
// SELECT count() FROM cpu GROUP BY host
// SELECT mean(value) from cpu where region = 'uswest'
// SELECT derivative(value) from redis_key_count GROUP BY time(5m)
// SELECT host, mean(value)
// FROM cpu
// GROUP BY host
// HAVING top(20, mean)
// WHERE time > now() - 1h
// AND region = 'uswest'
// SELECT ipaddress, count(ipaddress)
// FROM hits
// GROUP BY ipaddress
// HAVING top(10, count)
// WHERE time > now() - 1h
series := meta.DistinctTagValues("cpu", "host")
mrJobs := make([]*MRJob, 0, len(series))
for _, s := range series {
j := NewMRJob(s)
mrJobs = append(mrJobs, j)
j.Execute()
}
for _, j := range mrJobs {
// pull in results
// construct series object with same tags as series
}
================================================================================
type Iterator interface {
Next() (interface{}, bool)
}
type iteratorCounter struct {
iterator Iterator
}
func (iteratorCounter) Next() {
}
SELECT max(a.value), min(a.value), max(b.value)
FROM a, b
WHERE a.host = 'influxdb.org'
grouper {
[]Iterator
}
SELECT max(a.value) FROM a WHERE a.host = 'influxdb.org' --> 1 value
SELECT min(a.value) FROM a WHERE a.host = 'influxdb.org' --> 1 value
SELECT max(b.value) FROM b --> 1 value
SELECT max(a.value) FROM a GROUP BY time WHERE a.host = 'influxdb.org' --> key,value
timeGrouper {
[]Iterator
}
type maxMapper struct {
}
IntervalIterator {
}
maxMapper.Map(Iterator)
- GROUP BY time
- GROUP BY time, <tag>
- GROUP BY <tag>
COUNT(field)
MIN(field)
MAX(field)
MEAN(field)
MODE(field)
MEDIAN(field)
COUNT(DISTINCT field)
PERCENTILE(field, N)
HISTOGRAM(field [, bucketSize])
DERIVATIVE(field)
SUM(field)
STDDEV(field)
FIRST(field)
LAST(field)
DIFFERENCE(field)
TOP(field, N)
BOTTOM(field, N) <----- multivalue
================================================================================

File diff suppressed because it is too large Load Diff

View File

@ -1,64 +0,0 @@
/*
Package influxql implements a parser for the InfluxDB query language.
InfluxQL is a DML and DDL language for the InfluxDB time series database.
It provides the ability to query for aggregate statistics as well as create
and configure the InfluxDB server.
Selecting data
The SELECT query is used for retrieving data from one or more series. It allows
for a list of columns followed by a list of series to select from.
SELECT value FROM cpu_load
You can also add a a conditional expression to limit the results of the query:
SELECT value FROM cpu_load WHERE host = 'influxdb.com'
Two or more series can be combined into a single query and executed together:
SELECT cpu0.value + cpu1.value
FROM cpu_load AS cpu0 INNER JOIN cpu_load cpu1 ON cpu0.host = cpu1.host
Limits and ordering can be set on selection queries as well:
SELECT value FROM cpu_load LIMIT 100 ORDER DESC;
Removing data
The DELETE query is available to remove time series data points from the
database. This query will delete "cpu_load" values older than an hour:
DELETE FROM cpu_load WHERE time < now() - 1h
Continuous Queries
Queries can be run indefinitely on the server in order to generate new series.
This is done by running a "SELECT INTO" query. For example, this query computes
the hourly mean for cpu_load and stores it into a "cpu_load" series in the
"daily" shard space.
SELECT mean(value) AS value FROM cpu_load GROUP BY 1h
INTO daily.cpu_load
If there is existing data on the source series then this query will be run for
all historic data. To only execute the query on new incoming data you can append
"NO BACKFILL" to the end of the query:
SELECT mean(value) AS value FROM cpu_load GROUP BY 1h
INTO daily.cpu_load NO BACKFILL
Continuous queries will return an id that can be used to remove them in the
future. To remove a continous query, use the DROP CONTINUOUS QUERY statement:
DROP CONTINUOUS QUERY 12
You can also list all continuous queries by running:
LIST CONTINUOUS QUERIES
*/
package influxql

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,223 +0,0 @@
package influxql
import (
"encoding/json"
"errors"
"hash/fnv"
"sort"
)
// TagSet is a fundamental concept within the query system. It represents a composite series,
// composed of multiple individual series that share a set of tag attributes.
type TagSet struct {
Tags map[string]string
Filters []Expr
SeriesKeys []string
Key []byte
}
// AddFilter adds a series-level filter to the Tagset.
func (t *TagSet) AddFilter(key string, filter Expr) {
t.SeriesKeys = append(t.SeriesKeys, key)
t.Filters = append(t.Filters, filter)
}
// Row represents a single row returned from the execution of a statement.
type Row struct {
Name string `json:"name,omitempty"`
Tags map[string]string `json:"tags,omitempty"`
Columns []string `json:"columns,omitempty"`
Values [][]interface{} `json:"values,omitempty"`
Err error `json:"err,omitempty"`
}
// tagsHash returns a hash of tag key/value pairs.
func (r *Row) tagsHash() uint64 {
h := fnv.New64a()
keys := r.tagsKeys()
for _, k := range keys {
h.Write([]byte(k))
h.Write([]byte(r.Tags[k]))
}
return h.Sum64()
}
// tagKeys returns a sorted list of tag keys.
func (r *Row) tagsKeys() []string {
a := make([]string, 0, len(r.Tags))
for k := range r.Tags {
a = append(a, k)
}
sort.Strings(a)
return a
}
// Rows represents a list of rows that can be sorted consistently by name/tag.
type Rows []*Row
func (p Rows) Len() int { return len(p) }
func (p Rows) Less(i, j int) bool {
// Sort by name first.
if p[i].Name != p[j].Name {
return p[i].Name < p[j].Name
}
// Sort by tag set hash. Tags don't have a meaningful sort order so we
// just compute a hash and sort by that instead. This allows the tests
// to receive rows in a predictable order every time.
return p[i].tagsHash() < p[j].tagsHash()
}
func (p Rows) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
// Result represents a resultset returned from a single statement.
type Result struct {
// StatementID is just the statement's position in the query. It's used
// to combine statement results if they're being buffered in memory.
StatementID int `json:"-"`
Series Rows
Err error
}
// MarshalJSON encodes the result into JSON.
func (r *Result) MarshalJSON() ([]byte, error) {
// Define a struct that outputs "error" as a string.
var o struct {
Series []*Row `json:"series,omitempty"`
Err string `json:"error,omitempty"`
}
// Copy fields to output struct.
o.Series = r.Series
if r.Err != nil {
o.Err = r.Err.Error()
}
return json.Marshal(&o)
}
// UnmarshalJSON decodes the data into the Result struct
func (r *Result) UnmarshalJSON(b []byte) error {
var o struct {
Series []*Row `json:"series,omitempty"`
Err string `json:"error,omitempty"`
}
err := json.Unmarshal(b, &o)
if err != nil {
return err
}
r.Series = o.Series
if o.Err != "" {
r.Err = errors.New(o.Err)
}
return nil
}
func GetProcessor(expr Expr, startIndex int) (Processor, int) {
switch expr := expr.(type) {
case *VarRef:
return newEchoProcessor(startIndex), startIndex + 1
case *Call:
return newEchoProcessor(startIndex), startIndex + 1
case *BinaryExpr:
return getBinaryProcessor(expr, startIndex)
case *ParenExpr:
return GetProcessor(expr.Expr, startIndex)
case *NumberLiteral:
return newLiteralProcessor(expr.Val), startIndex
case *StringLiteral:
return newLiteralProcessor(expr.Val), startIndex
case *BooleanLiteral:
return newLiteralProcessor(expr.Val), startIndex
case *TimeLiteral:
return newLiteralProcessor(expr.Val), startIndex
case *DurationLiteral:
return newLiteralProcessor(expr.Val), startIndex
}
panic("unreachable")
}
type Processor func(values []interface{}) interface{}
func newEchoProcessor(index int) Processor {
return func(values []interface{}) interface{} {
return values[index]
}
}
func newLiteralProcessor(val interface{}) Processor {
return func(values []interface{}) interface{} {
return val
}
}
func getBinaryProcessor(expr *BinaryExpr, startIndex int) (Processor, int) {
lhs, index := GetProcessor(expr.LHS, startIndex)
rhs, index := GetProcessor(expr.RHS, index)
return newBinaryExprEvaluator(expr.Op, lhs, rhs), index
}
func newBinaryExprEvaluator(op Token, lhs, rhs Processor) Processor {
switch op {
case ADD:
return func(values []interface{}) interface{} {
l := lhs(values)
r := rhs(values)
if lv, ok := l.(float64); ok {
if rv, ok := r.(float64); ok {
if rv != 0 {
return lv + rv
}
}
}
return nil
}
case SUB:
return func(values []interface{}) interface{} {
l := lhs(values)
r := rhs(values)
if lv, ok := l.(float64); ok {
if rv, ok := r.(float64); ok {
if rv != 0 {
return lv - rv
}
}
}
return nil
}
case MUL:
return func(values []interface{}) interface{} {
l := lhs(values)
r := rhs(values)
if lv, ok := l.(float64); ok {
if rv, ok := r.(float64); ok {
if rv != 0 {
return lv * rv
}
}
}
return nil
}
case DIV:
return func(values []interface{}) interface{} {
l := lhs(values)
r := rhs(values)
if lv, ok := l.(float64); ok {
if rv, ok := r.(float64); ok {
if rv != 0 {
return lv / rv
}
}
}
return nil
}
default:
// we shouldn't get here, but give them back nils if it goes this way
return func(values []interface{}) interface{} {
return nil
}
}
}

View File

@ -1,561 +0,0 @@
package influxql
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"strings"
)
// Scanner represents a lexical scanner for InfluxQL.
type Scanner struct {
r *reader
}
// NewScanner returns a new instance of Scanner.
func NewScanner(r io.Reader) *Scanner {
return &Scanner{r: &reader{r: bufio.NewReader(r)}}
}
// Scan returns the next token and position from the underlying reader.
// Also returns the literal text read for strings, numbers, and duration tokens
// since these token types can have different literal representations.
func (s *Scanner) Scan() (tok Token, pos Pos, lit string) {
// Read next code point.
ch0, pos := s.r.read()
// If we see whitespace then consume all contiguous whitespace.
// If we see a letter, or certain acceptable special characters, then consume
// as an ident or reserved word.
if isWhitespace(ch0) {
return s.scanWhitespace()
} else if isLetter(ch0) || ch0 == '_' {
s.r.unread()
return s.scanIdent()
} else if isDigit(ch0) {
return s.scanNumber()
}
// Otherwise parse individual characters.
switch ch0 {
case eof:
return EOF, pos, ""
case '"':
s.r.unread()
return s.scanIdent()
case '\'':
return s.scanString()
case '.':
ch1, _ := s.r.read()
s.r.unread()
if isDigit(ch1) {
return s.scanNumber()
}
return DOT, pos, ""
case '+', '-':
return s.scanNumber()
case '*':
return MUL, pos, ""
case '/':
return DIV, pos, ""
case '=':
if ch1, _ := s.r.read(); ch1 == '~' {
return EQREGEX, pos, ""
}
s.r.unread()
return EQ, pos, ""
case '!':
if ch1, _ := s.r.read(); ch1 == '=' {
return NEQ, pos, ""
} else if ch1 == '~' {
return NEQREGEX, pos, ""
}
s.r.unread()
case '>':
if ch1, _ := s.r.read(); ch1 == '=' {
return GTE, pos, ""
}
s.r.unread()
return GT, pos, ""
case '<':
if ch1, _ := s.r.read(); ch1 == '=' {
return LTE, pos, ""
} else if ch1 == '>' {
return NEQ, pos, ""
}
s.r.unread()
return LT, pos, ""
case '(':
return LPAREN, pos, ""
case ')':
return RPAREN, pos, ""
case ',':
return COMMA, pos, ""
case ';':
return SEMICOLON, pos, ""
}
return ILLEGAL, pos, string(ch0)
}
// scanWhitespace consumes the current rune and all contiguous whitespace.
func (s *Scanner) scanWhitespace() (tok Token, pos Pos, lit string) {
// Create a buffer and read the current character into it.
var buf bytes.Buffer
ch, pos := s.r.curr()
_, _ = buf.WriteRune(ch)
// Read every subsequent whitespace character into the buffer.
// Non-whitespace characters and EOF will cause the loop to exit.
for {
ch, _ = s.r.read()
if ch == eof {
break
} else if !isWhitespace(ch) {
s.r.unread()
break
} else {
_, _ = buf.WriteRune(ch)
}
}
return WS, pos, buf.String()
}
func (s *Scanner) scanIdent() (tok Token, pos Pos, lit string) {
// Save the starting position of the identifier.
_, pos = s.r.read()
s.r.unread()
var buf bytes.Buffer
for {
if ch, _ := s.r.read(); ch == eof {
break
} else if ch == '"' {
tok0, pos0, lit0 := s.scanString()
if tok0 == BADSTRING || tok0 == BADESCAPE {
return tok0, pos0, lit0
}
return IDENT, pos, lit0
} else if isIdentChar(ch) {
s.r.unread()
buf.WriteString(ScanBareIdent(s.r))
} else {
s.r.unread()
break
}
}
lit = buf.String()
// If the literal matches a keyword then return that keyword.
if tok = Lookup(lit); tok != IDENT {
return tok, pos, ""
}
return IDENT, pos, lit
}
// scanString consumes a contiguous string of non-quote characters.
// Quote characters can be consumed if they're first escaped with a backslash.
func (s *Scanner) scanString() (tok Token, pos Pos, lit string) {
s.r.unread()
_, pos = s.r.curr()
var err error
lit, err = ScanString(s.r)
if err == errBadString {
return BADSTRING, pos, lit
} else if err == errBadEscape {
_, pos = s.r.curr()
return BADESCAPE, pos, lit
}
return STRING, pos, lit
}
func (s *Scanner) ScanRegex() (tok Token, pos Pos, lit string) {
_, pos = s.r.curr()
// Start & end sentinels.
start, end := '/', '/'
// Valid escape chars.
escapes := map[rune]rune{'/': '/'}
b, err := ScanDelimited(s.r, start, end, escapes, true)
if err == errBadEscape {
_, pos = s.r.curr()
return BADESCAPE, pos, lit
} else if err != nil {
return BADREGEX, pos, lit
}
return REGEX, pos, string(b)
}
// scanNumber consumes anything that looks like the start of a number.
// Numbers start with a digit, full stop, plus sign or minus sign.
// This function can return non-number tokens if a scan is a false positive.
// For example, a minus sign followed by a letter will just return a minus sign.
func (s *Scanner) scanNumber() (tok Token, pos Pos, lit string) {
var buf bytes.Buffer
// Check if the initial rune is a "+" or "-".
ch, pos := s.r.curr()
if ch == '+' || ch == '-' {
// Peek at the next two runes.
ch1, _ := s.r.read()
ch2, _ := s.r.read()
s.r.unread()
s.r.unread()
// This rune must be followed by a digit or a full stop and a digit.
if isDigit(ch1) || (ch1 == '.' && isDigit(ch2)) {
_, _ = buf.WriteRune(ch)
} else if ch == '+' {
return ADD, pos, ""
} else if ch == '-' {
return SUB, pos, ""
}
} else if ch == '.' {
// Peek and see if the next rune is a digit.
ch1, _ := s.r.read()
s.r.unread()
if !isDigit(ch1) {
return ILLEGAL, pos, "."
}
// Unread the full stop so we can read it later.
s.r.unread()
} else {
s.r.unread()
}
// Read as many digits as possible.
_, _ = buf.WriteString(s.scanDigits())
// If next code points are a full stop and digit then consume them.
if ch0, _ := s.r.read(); ch0 == '.' {
if ch1, _ := s.r.read(); isDigit(ch1) {
_, _ = buf.WriteRune(ch0)
_, _ = buf.WriteRune(ch1)
_, _ = buf.WriteString(s.scanDigits())
} else {
s.r.unread()
s.r.unread()
}
} else {
s.r.unread()
}
// Attempt to read as a duration if it doesn't have a fractional part.
if !strings.Contains(buf.String(), ".") {
// If the next rune is a duration unit (u,µ,ms,s) then return a duration token
if ch0, _ := s.r.read(); ch0 == 'u' || ch0 == 'µ' || ch0 == 's' || ch0 == 'h' || ch0 == 'd' || ch0 == 'w' {
_, _ = buf.WriteRune(ch0)
return DURATION_VAL, pos, buf.String()
} else if ch0 == 'm' {
_, _ = buf.WriteRune(ch0)
if ch1, _ := s.r.read(); ch1 == 's' {
_, _ = buf.WriteRune(ch1)
} else {
s.r.unread()
}
return DURATION_VAL, pos, buf.String()
}
s.r.unread()
}
return NUMBER, pos, buf.String()
}
// scanDigits consume a contiguous series of digits.
func (s *Scanner) scanDigits() string {
var buf bytes.Buffer
for {
ch, _ := s.r.read()
if !isDigit(ch) {
s.r.unread()
break
}
_, _ = buf.WriteRune(ch)
}
return buf.String()
}
// isWhitespace returns true if the rune is a space, tab, or newline.
func isWhitespace(ch rune) bool { return ch == ' ' || ch == '\t' || ch == '\n' }
// isLetter returns true if the rune is a letter.
func isLetter(ch rune) bool { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') }
// isDigit returns true if the rune is a digit.
func isDigit(ch rune) bool { return (ch >= '0' && ch <= '9') }
// isIdentChar returns true if the rune can be used in an unquoted identifier.
func isIdentChar(ch rune) bool { return isLetter(ch) || isDigit(ch) || ch == '_' }
// isIdentFirstChar returns true if the rune can be used as the first char in an unquoted identifer.
func isIdentFirstChar(ch rune) bool { return isLetter(ch) || ch == '_' }
// bufScanner represents a wrapper for scanner to add a buffer.
// It provides a fixed-length circular buffer that can be unread.
type bufScanner struct {
s *Scanner
i int // buffer index
n int // buffer size
buf [3]struct {
tok Token
pos Pos
lit string
}
}
// newBufScanner returns a new buffered scanner for a reader.
func newBufScanner(r io.Reader) *bufScanner {
return &bufScanner{s: NewScanner(r)}
}
// Scan reads the next token from the scanner.
func (s *bufScanner) Scan() (tok Token, pos Pos, lit string) {
return s.scanFunc(s.s.Scan)
}
// ScanRegex reads a regex token from the scanner.
func (s *bufScanner) ScanRegex() (tok Token, pos Pos, lit string) {
return s.scanFunc(s.s.ScanRegex)
}
// scanFunc uses the provided function to scan the next token.
func (s *bufScanner) scanFunc(scan func() (Token, Pos, string)) (tok Token, pos Pos, lit string) {
// If we have unread tokens then read them off the buffer first.
if s.n > 0 {
s.n--
return s.curr()
}
// Move buffer position forward and save the token.
s.i = (s.i + 1) % len(s.buf)
buf := &s.buf[s.i]
buf.tok, buf.pos, buf.lit = scan()
return s.curr()
}
// Unscan pushes the previously token back onto the buffer.
func (s *bufScanner) Unscan() { s.n++ }
// curr returns the last read token.
func (s *bufScanner) curr() (tok Token, pos Pos, lit string) {
buf := &s.buf[(s.i-s.n+len(s.buf))%len(s.buf)]
return buf.tok, buf.pos, buf.lit
}
// reader represents a buffered rune reader used by the scanner.
// It provides a fixed-length circular buffer that can be unread.
type reader struct {
r io.RuneScanner
i int // buffer index
n int // buffer char count
pos Pos // last read rune position
buf [3]struct {
ch rune
pos Pos
}
eof bool // true if reader has ever seen eof.
}
// ReadRune reads the next rune from the reader.
// This is a wrapper function to implement the io.RuneReader interface.
// Note that this function does not return size.
func (r *reader) ReadRune() (ch rune, size int, err error) {
ch, _ = r.read()
if ch == eof {
err = io.EOF
}
return
}
// UnreadRune pushes the previously read rune back onto the buffer.
// This is a wrapper function to implement the io.RuneScanner interface.
func (r *reader) UnreadRune() error {
r.unread()
return nil
}
// read reads the next rune from the reader.
func (r *reader) read() (ch rune, pos Pos) {
// If we have unread characters then read them off the buffer first.
if r.n > 0 {
r.n--
return r.curr()
}
// Read next rune from underlying reader.
// Any error (including io.EOF) should return as EOF.
ch, _, err := r.r.ReadRune()
if err != nil {
ch = eof
} else if ch == '\r' {
if ch, _, err := r.r.ReadRune(); err != nil {
// nop
} else if ch != '\n' {
_ = r.r.UnreadRune()
}
ch = '\n'
}
// Save character and position to the buffer.
r.i = (r.i + 1) % len(r.buf)
buf := &r.buf[r.i]
buf.ch, buf.pos = ch, r.pos
// Update position.
// Only count EOF once.
if ch == '\n' {
r.pos.Line++
r.pos.Char = 0
} else if !r.eof {
r.pos.Char++
}
// Mark the reader as EOF.
// This is used so we don't double count EOF characters.
if ch == eof {
r.eof = true
}
return r.curr()
}
// unread pushes the previously read rune back onto the buffer.
func (r *reader) unread() {
r.n++
}
// curr returns the last read character and position.
func (r *reader) curr() (ch rune, pos Pos) {
i := (r.i - r.n + len(r.buf)) % len(r.buf)
buf := &r.buf[i]
return buf.ch, buf.pos
}
// eof is a marker code point to signify that the reader can't read any more.
const eof = rune(0)
func ScanDelimited(r io.RuneScanner, start, end rune, escapes map[rune]rune, escapesPassThru bool) ([]byte, error) {
// Scan start delimiter.
if ch, _, err := r.ReadRune(); err != nil {
return nil, err
} else if ch != start {
return nil, fmt.Errorf("expected %s; found %s", string(start), string(ch))
}
var buf bytes.Buffer
for {
ch0, _, err := r.ReadRune()
if ch0 == end {
return buf.Bytes(), nil
} else if err != nil {
return buf.Bytes(), err
} else if ch0 == '\n' {
return nil, errors.New("delimited text contains new line")
} else if ch0 == '\\' {
// If the next character is an escape then write the escaped char.
// If it's not a valid escape then return an error.
ch1, _, err := r.ReadRune()
if err != nil {
return nil, err
}
c, ok := escapes[ch1]
if !ok {
if escapesPassThru {
// Unread ch1 (char after the \)
_ = r.UnreadRune()
// Write ch0 (\) to the output buffer.
_, _ = buf.WriteRune(ch0)
continue
} else {
buf.Reset()
_, _ = buf.WriteRune(ch0)
_, _ = buf.WriteRune(ch1)
return buf.Bytes(), errBadEscape
}
}
_, _ = buf.WriteRune(c)
} else {
_, _ = buf.WriteRune(ch0)
}
}
}
// ScanString reads a quoted string from a rune reader.
func ScanString(r io.RuneScanner) (string, error) {
ending, _, err := r.ReadRune()
if err != nil {
return "", errBadString
}
var buf bytes.Buffer
for {
ch0, _, err := r.ReadRune()
if ch0 == ending {
return buf.String(), nil
} else if err != nil || ch0 == '\n' {
return buf.String(), errBadString
} else if ch0 == '\\' {
// If the next character is an escape then write the escaped char.
// If it's not a valid escape then return an error.
ch1, _, _ := r.ReadRune()
if ch1 == 'n' {
_, _ = buf.WriteRune('\n')
} else if ch1 == '\\' {
_, _ = buf.WriteRune('\\')
} else if ch1 == '"' {
_, _ = buf.WriteRune('"')
} else {
return string(ch0) + string(ch1), errBadEscape
}
} else {
_, _ = buf.WriteRune(ch0)
}
}
}
var errBadString = errors.New("bad string")
var errBadEscape = errors.New("bad escape")
var errBadRegex = errors.New("bad regex")
// ScanBareIdent reads bare identifier from a rune reader.
func ScanBareIdent(r io.RuneScanner) string {
// Read every ident character into the buffer.
// Non-ident characters and EOF will cause the loop to exit.
var buf bytes.Buffer
for {
ch, _, err := r.ReadRune()
if err != nil {
break
} else if !isIdentChar(ch) {
r.UnreadRune()
break
} else {
_, _ = buf.WriteRune(ch)
}
}
return buf.String()
}
var errInvalidIdentifier = errors.New("invalid identifier")
// IsRegexOp returns true if the operator accepts a regex operand.
func IsRegexOp(t Token) bool {
return (t == EQREGEX || t == NEQREGEX)
}
// assert will panic with a given formatted message if the given condition is false.
func assert(condition bool, msg string, v ...interface{}) {
if !condition {
panic(fmt.Sprintf("assert failed: "+msg, v...))
}
}

View File

@ -1,296 +0,0 @@
package influxql
import (
"strings"
)
// Token is a lexical token of the InfluxQL language.
type Token int
const (
// Special tokens
ILLEGAL Token = iota
EOF
WS
literal_beg
// Literals
IDENT // main
NUMBER // 12345.67
DURATION_VAL // 13h
STRING // "abc"
BADSTRING // "abc
BADESCAPE // \q
TRUE // true
FALSE // false
REGEX // Regular expressions
BADREGEX // `.*
literal_end
operator_beg
// Operators
ADD // +
SUB // -
MUL // *
DIV // /
AND // AND
OR // OR
EQ // =
NEQ // !=
EQREGEX // =~
NEQREGEX // !~
LT // <
LTE // <=
GT // >
GTE // >=
operator_end
LPAREN // (
RPAREN // )
COMMA // ,
SEMICOLON // ;
DOT // .
keyword_beg
// Keywords
ALL
ALTER
AS
ASC
BEGIN
BY
CREATE
CONTINUOUS
DATABASE
DATABASES
DEFAULT
DELETE
DESC
DISTINCT
DROP
DURATION
END
EXISTS
EXPLAIN
FIELD
FOR
FROM
GRANT
GRANTS
GROUP
IF
IN
INF
INNER
INSERT
INTO
KEY
KEYS
LIMIT
MEASUREMENT
MEASUREMENTS
OFFSET
ON
ORDER
PASSWORD
POLICY
POLICIES
PRIVILEGES
QUERIES
QUERY
READ
REPLICATION
RETENTION
REVOKE
SELECT
SERIES
SERVERS
SET
SHOW
SLIMIT
STATS
DIAGNOSTICS
SOFFSET
TAG
TO
USER
USERS
VALUES
WHERE
WITH
WRITE
keyword_end
)
var tokens = [...]string{
ILLEGAL: "ILLEGAL",
EOF: "EOF",
WS: "WS",
IDENT: "IDENT",
NUMBER: "NUMBER",
DURATION_VAL: "DURATION_VAL",
STRING: "STRING",
BADSTRING: "BADSTRING",
BADESCAPE: "BADESCAPE",
TRUE: "TRUE",
FALSE: "FALSE",
REGEX: "REGEX",
ADD: "+",
SUB: "-",
MUL: "*",
DIV: "/",
AND: "AND",
OR: "OR",
EQ: "=",
NEQ: "!=",
EQREGEX: "=~",
NEQREGEX: "!~",
LT: "<",
LTE: "<=",
GT: ">",
GTE: ">=",
LPAREN: "(",
RPAREN: ")",
COMMA: ",",
SEMICOLON: ";",
DOT: ".",
ALL: "ALL",
ALTER: "ALTER",
AS: "AS",
ASC: "ASC",
BEGIN: "BEGIN",
BY: "BY",
CREATE: "CREATE",
CONTINUOUS: "CONTINUOUS",
DATABASE: "DATABASE",
DATABASES: "DATABASES",
DEFAULT: "DEFAULT",
DELETE: "DELETE",
DESC: "DESC",
DROP: "DROP",
DISTINCT: "DISTINCT",
DURATION: "DURATION",
END: "END",
EXISTS: "EXISTS",
EXPLAIN: "EXPLAIN",
FIELD: "FIELD",
FOR: "FOR",
FROM: "FROM",
GRANT: "GRANT",
GRANTS: "GRANTS",
GROUP: "GROUP",
IF: "IF",
IN: "IN",
INF: "INF",
INNER: "INNER",
INSERT: "INSERT",
INTO: "INTO",
KEY: "KEY",
KEYS: "KEYS",
LIMIT: "LIMIT",
MEASUREMENT: "MEASUREMENT",
MEASUREMENTS: "MEASUREMENTS",
OFFSET: "OFFSET",
ON: "ON",
ORDER: "ORDER",
PASSWORD: "PASSWORD",
POLICY: "POLICY",
POLICIES: "POLICIES",
PRIVILEGES: "PRIVILEGES",
QUERIES: "QUERIES",
QUERY: "QUERY",
READ: "READ",
REPLICATION: "REPLICATION",
RETENTION: "RETENTION",
REVOKE: "REVOKE",
SELECT: "SELECT",
SERIES: "SERIES",
SERVERS: "SERVERS",
SET: "SET",
SHOW: "SHOW",
SLIMIT: "SLIMIT",
SOFFSET: "SOFFSET",
STATS: "STATS",
DIAGNOSTICS: "DIAGNOSTICS",
TAG: "TAG",
TO: "TO",
USER: "USER",
USERS: "USERS",
VALUES: "VALUES",
WHERE: "WHERE",
WITH: "WITH",
WRITE: "WRITE",
}
var keywords map[string]Token
func init() {
keywords = make(map[string]Token)
for tok := keyword_beg + 1; tok < keyword_end; tok++ {
keywords[strings.ToLower(tokens[tok])] = tok
}
for _, tok := range []Token{AND, OR} {
keywords[strings.ToLower(tokens[tok])] = tok
}
keywords["true"] = TRUE
keywords["false"] = FALSE
}
// String returns the string representation of the token.
func (tok Token) String() string {
if tok >= 0 && tok < Token(len(tokens)) {
return tokens[tok]
}
return ""
}
// Precedence returns the operator precedence of the binary operator token.
func (tok Token) Precedence() int {
switch tok {
case OR:
return 1
case AND:
return 2
case EQ, NEQ, EQREGEX, NEQREGEX, LT, LTE, GT, GTE:
return 3
case ADD, SUB:
return 4
case MUL, DIV:
return 5
}
return 0
}
// isOperator returns true for operator tokens.
func (tok Token) isOperator() bool { return tok > operator_beg && tok < operator_end }
// tokstr returns a literal if provided, otherwise returns the token string.
func tokstr(tok Token, lit string) string {
if lit != "" {
return lit
}
return tok.String()
}
// Lookup returns the token associated with a given string.
func Lookup(ident string) Token {
if tok, ok := keywords[strings.ToLower(ident)]; ok {
return tok
}
return IDENT
}
// Pos specifies the line and character position of a token.
// The Char and Line are both zero-based indexes.
type Pos struct {
Line int
Char int
}

View File

@ -1,52 +0,0 @@
package meta
import (
"time"
"github.com/influxdb/influxdb/toml"
)
const (
// DefaultHostname is the default hostname if one is not provided.
DefaultHostname = "localhost"
// DefaultBindAddress is the default address to bind to.
DefaultBindAddress = ":8088"
// DefaultHeartbeatTimeout is the default heartbeat timeout for the store.
DefaultHeartbeatTimeout = 1000 * time.Millisecond
// DefaultElectionTimeout is the default election timeout for the store.
DefaultElectionTimeout = 1000 * time.Millisecond
// DefaultLeaderLeaseTimeout is the default leader lease for the store.
DefaultLeaderLeaseTimeout = 500 * time.Millisecond
// DefaultCommitTimeout is the default commit timeout for the store.
DefaultCommitTimeout = 50 * time.Millisecond
)
// Config represents the meta configuration.
type Config struct {
Dir string `toml:"dir"`
Hostname string `toml:"hostname"`
BindAddress string `toml:"bind-address"`
Peers []string `toml:"peers"`
RetentionAutoCreate bool `toml:"retention-autocreate"`
ElectionTimeout toml.Duration `toml:"election-timeout"`
HeartbeatTimeout toml.Duration `toml:"heartbeat-timeout"`
LeaderLeaseTimeout toml.Duration `toml:"leader-lease-timeout"`
CommitTimeout toml.Duration `toml:"commit-timeout"`
}
func NewConfig() Config {
return Config{
Hostname: DefaultHostname,
BindAddress: DefaultBindAddress,
RetentionAutoCreate: true,
ElectionTimeout: toml.Duration(DefaultElectionTimeout),
HeartbeatTimeout: toml.Duration(DefaultHeartbeatTimeout),
LeaderLeaseTimeout: toml.Duration(DefaultLeaderLeaseTimeout),
CommitTimeout: toml.Duration(DefaultCommitTimeout),
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,116 +0,0 @@
package meta
import (
"errors"
"fmt"
)
var (
// ErrStoreOpen is returned when opening an already open store.
ErrStoreOpen = errors.New("store already open")
// ErrStoreClosed is returned when closing an already closed store.
ErrStoreClosed = errors.New("raft store already closed")
// ErrTooManyPeers is returned when more than 3 peers are used.
ErrTooManyPeers = errors.New("too many peers; influxdb v0.9.0 is limited to 3 nodes in a cluster")
)
var (
// ErrNodeExists is returned when creating an already existing node.
ErrNodeExists = errors.New("node already exists")
// ErrNodeNotFound is returned when mutating a node that doesn't exist.
ErrNodeNotFound = errors.New("node not found")
// ErrNodesRequired is returned when at least one node is required for an operation.
// This occurs when creating a shard group.
ErrNodesRequired = errors.New("at least one node required")
)
var (
// ErrDatabaseExists is returned when creating an already existing database.
ErrDatabaseExists = errors.New("database already exists")
// ErrDatabaseNotFound is returned when mutating a database that doesn't exist.
ErrDatabaseNotFound = errors.New("database not found")
// ErrDatabaseNameRequired is returned when creating a database without a name.
ErrDatabaseNameRequired = errors.New("database name required")
)
var (
// ErrRetentionPolicyExists is returned when creating an already existing policy.
ErrRetentionPolicyExists = errors.New("retention policy already exists")
// ErrRetentionPolicyNotFound is returned when mutating a policy that doesn't exist.
ErrRetentionPolicyNotFound = errors.New("retention policy not found")
// ErrRetentionPolicyNameRequired is returned when creating a policy without a name.
ErrRetentionPolicyNameRequired = errors.New("retention policy name required")
// ErrRetentionPolicyNameExists is returned when renaming a policy to
// the same name as another existing policy.
ErrRetentionPolicyNameExists = errors.New("retention policy name already exists")
// ErrRetentionPolicyDurationTooLow is returned when updating a retention
// policy that has a duration lower than the allowed minimum.
ErrRetentionPolicyDurationTooLow = errors.New(fmt.Sprintf("retention policy duration must be at least %s",
RetentionPolicyMinDuration))
// ErrReplicationFactorMismatch is returned when the replication factor
// does not match the number of nodes in the cluster. This is a temporary
// restriction until v0.9.1 is released.
ErrReplicationFactorMismatch = errors.New("replication factor must match cluster size; this limitation will be lifted in v0.9.1")
)
var (
// ErrShardGroupExists is returned when creating an already existing shard group.
ErrShardGroupExists = errors.New("shard group already exists")
// ErrShardGroupNotFound is returned when mutating a shard group that doesn't exist.
ErrShardGroupNotFound = errors.New("shard group not found")
)
var (
// ErrContinuousQueryExists is returned when creating an already existing continuous query.
ErrContinuousQueryExists = errors.New("continuous query already exists")
// ErrContinuousQueryNotFound is returned when removing a continuous query that doesn't exist.
ErrContinuousQueryNotFound = errors.New("continuous query not found")
)
var (
// ErrUserExists is returned when creating an already existing user.
ErrUserExists = errors.New("user already exists")
// ErrUserNotFound is returned when mutating a user that doesn't exist.
ErrUserNotFound = errors.New("user not found")
// ErrUsernameRequired is returned when creating a user without a username.
ErrUsernameRequired = errors.New("username required")
)
var errs = [...]error{
ErrStoreOpen, ErrStoreClosed,
ErrNodeExists, ErrNodeNotFound,
ErrDatabaseExists, ErrDatabaseNotFound, ErrDatabaseNameRequired,
}
// errLookup stores a mapping of error strings to well defined error types.
var errLookup = make(map[string]error)
func init() {
for _, err := range errs {
errLookup[err.Error()] = err
}
}
// lookupError returns a known error reference, if one exists.
// Otherwise returns err.
func lookupError(err error) error {
if e, ok := errLookup[err.Error()]; ok {
return e
}
return err
}

File diff suppressed because it is too large Load Diff

View File

@ -1,257 +0,0 @@
package internal;
//========================================================================
//
// Metadata
//
//========================================================================
message Data {
required uint64 Term = 1;
required uint64 Index = 2;
required uint64 ClusterID = 3;
repeated NodeInfo Nodes = 4;
repeated DatabaseInfo Databases = 5;
repeated UserInfo Users = 6;
required uint64 MaxNodeID = 7;
required uint64 MaxShardGroupID = 8;
required uint64 MaxShardID = 9;
}
message NodeInfo {
required uint64 ID = 1;
required string Host = 2;
}
message DatabaseInfo {
required string Name = 1;
required string DefaultRetentionPolicy = 2;
repeated RetentionPolicyInfo RetentionPolicies = 3;
repeated ContinuousQueryInfo ContinuousQueries = 4;
}
message RetentionPolicyInfo {
required string Name = 1;
required int64 Duration = 2;
required int64 ShardGroupDuration = 3;
required uint32 ReplicaN = 4;
repeated ShardGroupInfo ShardGroups = 5;
}
message ShardGroupInfo {
required uint64 ID = 1;
required int64 StartTime = 2;
required int64 EndTime = 3;
required int64 DeletedAt = 4;
repeated ShardInfo Shards = 5;
}
message ShardInfo {
required uint64 ID = 1;
repeated uint64 OwnerIDs = 2;
}
message ContinuousQueryInfo {
required string Name = 1;
required string Query = 2;
}
message UserInfo {
required string Name = 1;
required string Hash = 2;
required bool Admin = 3;
repeated UserPrivilege Privileges = 4;
}
message UserPrivilege {
required string Database = 1;
required int32 Privilege = 2;
}
//========================================================================
//
// COMMANDS
//
//========================================================================
message Command {
extensions 100 to max;
enum Type {
CreateNodeCommand = 1;
DeleteNodeCommand = 2;
CreateDatabaseCommand = 3;
DropDatabaseCommand = 4;
CreateRetentionPolicyCommand = 5;
DropRetentionPolicyCommand = 6;
SetDefaultRetentionPolicyCommand = 7;
UpdateRetentionPolicyCommand = 8;
CreateShardGroupCommand = 9;
DeleteShardGroupCommand = 10;
CreateContinuousQueryCommand = 11;
DropContinuousQueryCommand = 12;
CreateUserCommand = 13;
DropUserCommand = 14;
UpdateUserCommand = 15;
SetPrivilegeCommand = 16;
SetDataCommand = 17;
}
required Type type = 1;
}
message CreateNodeCommand {
extend Command {
optional CreateNodeCommand command = 101;
}
required string Host = 1;
required uint64 Rand = 2;
}
message DeleteNodeCommand {
extend Command {
optional DeleteNodeCommand command = 102;
}
required uint64 ID = 1;
}
message CreateDatabaseCommand {
extend Command {
optional CreateDatabaseCommand command = 103;
}
required string Name = 1;
}
message DropDatabaseCommand {
extend Command {
optional DropDatabaseCommand command = 104;
}
required string Name = 1;
}
message CreateRetentionPolicyCommand {
extend Command {
optional CreateRetentionPolicyCommand command = 105;
}
required string Database = 1;
required RetentionPolicyInfo RetentionPolicy = 2;
}
message DropRetentionPolicyCommand {
extend Command {
optional DropRetentionPolicyCommand command = 106;
}
required string Database = 1;
required string Name = 2;
}
message SetDefaultRetentionPolicyCommand {
extend Command {
optional SetDefaultRetentionPolicyCommand command = 107;
}
required string Database = 1;
required string Name = 2;
}
message UpdateRetentionPolicyCommand {
extend Command {
optional UpdateRetentionPolicyCommand command = 108;
}
required string Database = 1;
required string Name = 2;
optional string NewName = 3;
optional int64 Duration = 4;
optional uint32 ReplicaN = 5;
}
message CreateShardGroupCommand {
extend Command {
optional CreateShardGroupCommand command = 109;
}
required string Database = 1;
required string Policy = 2;
required int64 Timestamp = 3;
}
message DeleteShardGroupCommand {
extend Command {
optional DeleteShardGroupCommand command = 110;
}
required string Database = 1;
required string Policy = 2;
required uint64 ShardGroupID = 3;
}
message CreateContinuousQueryCommand {
extend Command {
optional CreateContinuousQueryCommand command = 111;
}
required string Database = 1;
required string Name = 2;
required string Query = 3;
}
message DropContinuousQueryCommand {
extend Command {
optional DropContinuousQueryCommand command = 112;
}
required string Database = 1;
required string Name = 2;
}
message CreateUserCommand {
extend Command {
optional CreateUserCommand command = 113;
}
required string Name = 1;
required string Hash = 2;
required bool Admin = 3;
}
message DropUserCommand {
extend Command {
optional DropUserCommand command = 114;
}
required string Name = 1;
}
message UpdateUserCommand {
extend Command {
optional UpdateUserCommand command = 115;
}
required string Name = 1;
required string Hash = 2;
}
message SetPrivilegeCommand {
extend Command {
optional SetPrivilegeCommand command = 116;
}
required string Username = 1;
required string Database = 2;
required int32 Privilege = 3;
}
message SetDataCommand {
extend Command {
optional SetDataCommand command = 117;
}
required Data Data = 1;
}
message SetAdminPrivilegeCommand {
extend Command {
optional SetAdminPrivilegeCommand command = 118;
}
required string Username = 1;
required bool Admin = 2;
}
message Response {
required bool OK = 1;
optional string Error = 2;
optional uint64 Index = 3;
}

View File

@ -1,280 +0,0 @@
package meta
import (
"fmt"
"github.com/influxdb/influxdb/influxql"
)
// StatementExecutor translates InfluxQL queries to meta store methods.
type StatementExecutor struct {
Store interface {
Nodes() ([]NodeInfo, error)
Database(name string) (*DatabaseInfo, error)
Databases() ([]DatabaseInfo, error)
CreateDatabase(name string) (*DatabaseInfo, error)
DropDatabase(name string) error
DefaultRetentionPolicy(database string) (*RetentionPolicyInfo, error)
CreateRetentionPolicy(database string, rpi *RetentionPolicyInfo) (*RetentionPolicyInfo, error)
UpdateRetentionPolicy(database, name string, rpu *RetentionPolicyUpdate) error
SetDefaultRetentionPolicy(database, name string) error
DropRetentionPolicy(database, name string) error
Users() ([]UserInfo, error)
CreateUser(name, password string, admin bool) (*UserInfo, error)
UpdateUser(name, password string) error
DropUser(name string) error
SetPrivilege(username, database string, p influxql.Privilege) error
SetAdminPrivilege(username string, admin bool) error
UserPrivileges(username string) (map[string]influxql.Privilege, error)
UserPrivilege(username, database string) (*influxql.Privilege, error)
CreateContinuousQuery(database, name, query string) error
DropContinuousQuery(database, name string) error
}
}
// ExecuteStatement executes stmt against the meta store as user.
func (e *StatementExecutor) ExecuteStatement(stmt influxql.Statement) *influxql.Result {
switch stmt := stmt.(type) {
case *influxql.CreateDatabaseStatement:
return e.executeCreateDatabaseStatement(stmt)
case *influxql.DropDatabaseStatement:
return e.executeDropDatabaseStatement(stmt)
case *influxql.ShowDatabasesStatement:
return e.executeShowDatabasesStatement(stmt)
case *influxql.ShowGrantsForUserStatement:
return e.executeShowGrantsForUserStatement(stmt)
case *influxql.ShowServersStatement:
return e.executeShowServersStatement(stmt)
case *influxql.CreateUserStatement:
return e.executeCreateUserStatement(stmt)
case *influxql.SetPasswordUserStatement:
return e.executeSetPasswordUserStatement(stmt)
case *influxql.DropUserStatement:
return e.executeDropUserStatement(stmt)
case *influxql.ShowUsersStatement:
return e.executeShowUsersStatement(stmt)
case *influxql.GrantStatement:
return e.executeGrantStatement(stmt)
case *influxql.GrantAdminStatement:
return e.executeGrantAdminStatement(stmt)
case *influxql.RevokeStatement:
return e.executeRevokeStatement(stmt)
case *influxql.RevokeAdminStatement:
return e.executeRevokeAdminStatement(stmt)
case *influxql.CreateRetentionPolicyStatement:
return e.executeCreateRetentionPolicyStatement(stmt)
case *influxql.AlterRetentionPolicyStatement:
return e.executeAlterRetentionPolicyStatement(stmt)
case *influxql.DropRetentionPolicyStatement:
return e.executeDropRetentionPolicyStatement(stmt)
case *influxql.ShowRetentionPoliciesStatement:
return e.executeShowRetentionPoliciesStatement(stmt)
case *influxql.CreateContinuousQueryStatement:
return e.executeCreateContinuousQueryStatement(stmt)
case *influxql.DropContinuousQueryStatement:
return e.executeDropContinuousQueryStatement(stmt)
case *influxql.ShowContinuousQueriesStatement:
return e.executeShowContinuousQueriesStatement(stmt)
case *influxql.ShowStatsStatement:
return e.executeShowStatsStatement(stmt)
default:
panic(fmt.Sprintf("unsupported statement type: %T", stmt))
}
}
func (e *StatementExecutor) executeCreateDatabaseStatement(q *influxql.CreateDatabaseStatement) *influxql.Result {
_, err := e.Store.CreateDatabase(q.Name)
return &influxql.Result{Err: err}
}
func (e *StatementExecutor) executeDropDatabaseStatement(q *influxql.DropDatabaseStatement) *influxql.Result {
return &influxql.Result{Err: e.Store.DropDatabase(q.Name)}
}
func (e *StatementExecutor) executeShowDatabasesStatement(q *influxql.ShowDatabasesStatement) *influxql.Result {
dis, err := e.Store.Databases()
if err != nil {
return &influxql.Result{Err: err}
}
row := &influxql.Row{Name: "databases", Columns: []string{"name"}}
for _, di := range dis {
row.Values = append(row.Values, []interface{}{di.Name})
}
return &influxql.Result{Series: []*influxql.Row{row}}
}
func (e *StatementExecutor) executeShowGrantsForUserStatement(q *influxql.ShowGrantsForUserStatement) *influxql.Result {
priv, err := e.Store.UserPrivileges(q.Name)
if err != nil {
return &influxql.Result{Err: err}
}
row := &influxql.Row{Columns: []string{"database", "privilege"}}
for d, p := range priv {
row.Values = append(row.Values, []interface{}{d, p.String()})
}
return &influxql.Result{Series: []*influxql.Row{row}}
}
func (e *StatementExecutor) executeShowServersStatement(q *influxql.ShowServersStatement) *influxql.Result {
nis, err := e.Store.Nodes()
if err != nil {
return &influxql.Result{Err: err}
}
row := &influxql.Row{Columns: []string{"id", "url"}}
for _, ni := range nis {
row.Values = append(row.Values, []interface{}{ni.ID, "http://" + ni.Host})
}
return &influxql.Result{Series: []*influxql.Row{row}}
}
func (e *StatementExecutor) executeCreateUserStatement(q *influxql.CreateUserStatement) *influxql.Result {
_, err := e.Store.CreateUser(q.Name, q.Password, q.Admin)
return &influxql.Result{Err: err}
}
func (e *StatementExecutor) executeSetPasswordUserStatement(q *influxql.SetPasswordUserStatement) *influxql.Result {
return &influxql.Result{Err: e.Store.UpdateUser(q.Name, q.Password)}
}
func (e *StatementExecutor) executeDropUserStatement(q *influxql.DropUserStatement) *influxql.Result {
return &influxql.Result{Err: e.Store.DropUser(q.Name)}
}
func (e *StatementExecutor) executeShowUsersStatement(q *influxql.ShowUsersStatement) *influxql.Result {
uis, err := e.Store.Users()
if err != nil {
return &influxql.Result{Err: err}
}
row := &influxql.Row{Columns: []string{"user", "admin"}}
for _, ui := range uis {
row.Values = append(row.Values, []interface{}{ui.Name, ui.Admin})
}
return &influxql.Result{Series: []*influxql.Row{row}}
}
func (e *StatementExecutor) executeGrantStatement(stmt *influxql.GrantStatement) *influxql.Result {
return &influxql.Result{Err: e.Store.SetPrivilege(stmt.User, stmt.On, stmt.Privilege)}
}
func (e *StatementExecutor) executeGrantAdminStatement(stmt *influxql.GrantAdminStatement) *influxql.Result {
return &influxql.Result{Err: e.Store.SetAdminPrivilege(stmt.User, true)}
}
func (e *StatementExecutor) executeRevokeStatement(stmt *influxql.RevokeStatement) *influxql.Result {
priv := influxql.NoPrivileges
// Revoking all privileges means there's no need to look at existing user privileges.
if stmt.Privilege != influxql.AllPrivileges {
p, err := e.Store.UserPrivilege(stmt.User, stmt.On)
if err != nil {
return &influxql.Result{Err: err}
}
// Bit clear (AND NOT) the user's privilege with the revoked privilege.
priv = *p &^ stmt.Privilege
}
return &influxql.Result{Err: e.Store.SetPrivilege(stmt.User, stmt.On, priv)}
}
func (e *StatementExecutor) executeRevokeAdminStatement(stmt *influxql.RevokeAdminStatement) *influxql.Result {
return &influxql.Result{Err: e.Store.SetAdminPrivilege(stmt.User, false)}
}
func (e *StatementExecutor) executeCreateRetentionPolicyStatement(stmt *influxql.CreateRetentionPolicyStatement) *influxql.Result {
rpi := NewRetentionPolicyInfo(stmt.Name)
rpi.Duration = stmt.Duration
rpi.ReplicaN = stmt.Replication
// Create new retention policy.
_, err := e.Store.CreateRetentionPolicy(stmt.Database, rpi)
if err != nil {
return &influxql.Result{Err: err}
}
// If requested, set new policy as the default.
if stmt.Default {
err = e.Store.SetDefaultRetentionPolicy(stmt.Database, stmt.Name)
}
return &influxql.Result{Err: err}
}
func (e *StatementExecutor) executeAlterRetentionPolicyStatement(stmt *influxql.AlterRetentionPolicyStatement) *influxql.Result {
rpu := &RetentionPolicyUpdate{
Duration: stmt.Duration,
ReplicaN: stmt.Replication,
}
// Update the retention policy.
err := e.Store.UpdateRetentionPolicy(stmt.Database, stmt.Name, rpu)
if err != nil {
return &influxql.Result{Err: err}
}
// If requested, set as default retention policy.
if stmt.Default {
err = e.Store.SetDefaultRetentionPolicy(stmt.Database, stmt.Name)
}
return &influxql.Result{Err: err}
}
func (e *StatementExecutor) executeDropRetentionPolicyStatement(q *influxql.DropRetentionPolicyStatement) *influxql.Result {
return &influxql.Result{Err: e.Store.DropRetentionPolicy(q.Database, q.Name)}
}
func (e *StatementExecutor) executeShowRetentionPoliciesStatement(q *influxql.ShowRetentionPoliciesStatement) *influxql.Result {
di, err := e.Store.Database(q.Database)
if err != nil {
return &influxql.Result{Err: err}
} else if di == nil {
return &influxql.Result{Err: ErrDatabaseNotFound}
}
row := &influxql.Row{Columns: []string{"name", "duration", "replicaN", "default"}}
for _, rpi := range di.RetentionPolicies {
row.Values = append(row.Values, []interface{}{rpi.Name, rpi.Duration.String(), rpi.ReplicaN, di.DefaultRetentionPolicy == rpi.Name})
}
return &influxql.Result{Series: []*influxql.Row{row}}
}
func (e *StatementExecutor) executeCreateContinuousQueryStatement(q *influxql.CreateContinuousQueryStatement) *influxql.Result {
return &influxql.Result{
Err: e.Store.CreateContinuousQuery(q.Database, q.Name, q.String()),
}
}
func (e *StatementExecutor) executeDropContinuousQueryStatement(q *influxql.DropContinuousQueryStatement) *influxql.Result {
return &influxql.Result{
Err: e.Store.DropContinuousQuery(q.Database, q.Name),
}
}
func (e *StatementExecutor) executeShowContinuousQueriesStatement(stmt *influxql.ShowContinuousQueriesStatement) *influxql.Result {
dis, err := e.Store.Databases()
if err != nil {
return &influxql.Result{Err: err}
}
rows := []*influxql.Row{}
for _, di := range dis {
row := &influxql.Row{Columns: []string{"name", "query"}, Name: di.Name}
for _, cqi := range di.ContinuousQueries {
row.Values = append(row.Values, []interface{}{cqi.Name, cqi.Query})
}
rows = append(rows, row)
}
return &influxql.Result{Series: rows}
}
func (e *StatementExecutor) executeShowStatsStatement(stmt *influxql.ShowStatsStatement) *influxql.Result {
return &influxql.Result{Err: fmt.Errorf("SHOW STATS is not implemented yet")}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,529 +0,0 @@
package snapshot
import (
"archive/tar"
"encoding/json"
"fmt"
"io"
"os"
"sort"
"time"
)
// manifestName is the name of the manifest file in the snapshot.
const manifestName = "manifest"
// Manifest represents a list of files in a snapshot.
type Manifest struct {
Files []File `json:"files"`
}
// Diff returns a Manifest of files that are newer in m than other.
func (m *Manifest) Diff(other *Manifest) *Manifest {
diff := &Manifest{}
// Find versions of files that are newer in m.
loop:
for _, a := range m.Files {
// Try to find a newer version of the file in other.
// If found then don't append this file and move to the next file.
for _, b := range other.Files {
if a.Name != b.Name {
continue
} else if !a.ModTime.After(b.ModTime) {
continue loop
} else {
break
}
}
// Append the newest version.
diff.Files = append(diff.Files, a)
}
// Sort files.
sort.Sort(Files(diff.Files))
return diff
}
// Merge returns a Manifest that combines m with other.
// Only the newest file between the two snapshots is returned.
func (m *Manifest) Merge(other *Manifest) *Manifest {
ret := &Manifest{}
ret.Files = make([]File, len(m.Files))
copy(ret.Files, m.Files)
// Update/insert versions of files that are newer in other.
loop:
for _, a := range other.Files {
for i, b := range ret.Files {
// Ignore if it doesn't match.
if a.Name != b.Name {
continue
}
// Update if it's newer and then start the next file.
if a.ModTime.After(b.ModTime) {
ret.Files[i] = a
}
continue loop
}
// If the file wasn't found then append it.
ret.Files = append(ret.Files, a)
}
// Sort files.
sort.Sort(Files(ret.Files))
return ret
}
// File represents a single file in a manifest.
type File struct {
Name string `json:"name"` // filename
Size int64 `json:"size"` // file size
ModTime time.Time `json:"lastModified"` // last modified time
}
// Files represents a sortable list of files.
type Files []File
func (p Files) Len() int { return len(p) }
func (p Files) Less(i, j int) bool { return p[i].Name < p[j].Name }
func (p Files) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
// Reader reads a snapshot from a Reader.
// This type is not safe for concurrent use.
type Reader struct {
tr *tar.Reader
manifest *Manifest
}
// NewReader returns a new Reader reading from r.
func NewReader(r io.Reader) *Reader {
return &Reader{
tr: tar.NewReader(r),
}
}
// Manifest returns the snapshot manifest.
func (sr *Reader) Manifest() (*Manifest, error) {
if err := sr.readManifest(); err != nil {
return nil, err
}
return sr.manifest, nil
}
// readManifest reads the first entry from the snapshot and materializes the snapshot.
// This is skipped if the snapshot manifest has already been read.
func (sr *Reader) readManifest() error {
// Already read, ignore.
if sr.manifest != nil {
return nil
}
// Read manifest header.
hdr, err := sr.tr.Next()
if err != nil {
return fmt.Errorf("snapshot header: %s", err)
} else if hdr.Name != manifestName {
return fmt.Errorf("invalid snapshot header: expected manifest")
}
// Materialize manifest.
var manifest Manifest
if err := json.NewDecoder(sr.tr).Decode(&manifest); err != nil {
return fmt.Errorf("decode manifest: %s", err)
}
sr.manifest = &manifest
return nil
}
// Next returns the next file in the snapshot.
func (sr *Reader) Next() (File, error) {
// Read manifest if it hasn't been read yet.
if err := sr.readManifest(); err != nil {
return File{}, err
}
// Read next header.
hdr, err := sr.tr.Next()
if err != nil {
return File{}, err
}
// Match header to file in snapshot.
for i := range sr.manifest.Files {
if sr.manifest.Files[i].Name == hdr.Name {
return sr.manifest.Files[i], nil
}
}
// Return error if file is not in the manifest.
return File{}, fmt.Errorf("snapshot entry not found in manifest: %s", hdr.Name)
}
// Read reads the current entry in the snapshot.
func (sr *Reader) Read(b []byte) (n int, err error) {
// Read manifest if it hasn't been read yet.
if err := sr.readManifest(); err != nil {
return 0, err
}
// Pass read through to the tar reader.
return sr.tr.Read(b)
}
// MultiReader reads from a collection of snapshots.
// Only files with the highest index are read from the reader.
// This type is not safe for concurrent use.
type MultiReader struct {
readers []*Reader // underlying snapshot readers
files []*File // current file for each reader
manifest *Manifest // combined manifest from all readers
index int // index of file in snapshot to read
curr *Reader // current reader
}
// NewMultiReader returns a new MultiReader reading from a list of readers.
func NewMultiReader(readers ...io.Reader) *MultiReader {
r := &MultiReader{
readers: make([]*Reader, len(readers)),
files: make([]*File, len(readers)),
index: -1,
}
for i := range readers {
r.readers[i] = NewReader(readers[i])
}
return r
}
// Manifest returns the combined manifest from all readers.
func (ssr *MultiReader) Manifest() (*Manifest, error) {
// Use manifest if it's already been calculated.
if ssr.manifest != nil {
return ssr.manifest, nil
}
// Build manifest from other readers.
ss := &Manifest{}
for i, sr := range ssr.readers {
other, err := sr.Manifest()
if err != nil {
return nil, fmt.Errorf("manifest: idx=%d, err=%s", i, err)
}
ss = ss.Merge(other)
}
// Cache manifest and return.
ssr.manifest = ss
return ss, nil
}
// Next returns the next file in the reader.
func (ssr *MultiReader) Next() (File, error) {
ss, err := ssr.Manifest()
if err != nil {
return File{}, fmt.Errorf("manifest: %s", err)
}
// Return EOF if there are no more files in snapshot.
if ssr.index == len(ss.Files)-1 {
ssr.curr = nil
return File{}, io.EOF
}
// Queue up next files.
if err := ssr.nextFiles(); err != nil {
return File{}, fmt.Errorf("next files: %s", err)
}
// Increment the file index.
ssr.index++
sf := ss.Files[ssr.index]
// Find the matching reader. Clear other readers.
var sr *Reader
for i, f := range ssr.files {
if f == nil || f.Name != sf.Name {
continue
}
// Set reader to the first match.
if sr == nil && *f == sf {
sr = ssr.readers[i]
}
ssr.files[i] = nil
}
// Return an error if file doesn't match.
// This shouldn't happen unless the underlying snapshot is altered.
if sr == nil {
return File{}, fmt.Errorf("snaphot file not found in readers: %s", sf.Name)
}
// Set current reader.
ssr.curr = sr
// Return file.
return sf, nil
}
// nextFiles queues up a next file for all readers.
func (ssr *MultiReader) nextFiles() error {
for i, sr := range ssr.readers {
if ssr.files[i] == nil {
// Read next file.
sf, err := sr.Next()
if err == io.EOF {
ssr.files[i] = nil
continue
} else if err != nil {
return fmt.Errorf("next: reader=%d, err=%s", i, err)
}
// Cache file.
ssr.files[i] = &sf
}
}
return nil
}
// nextIndex returns the index of the next reader to read from.
// Returns -1 if all readers are at EOF.
func (ssr *MultiReader) nextIndex() int {
// Find the next file by name and lowest index.
index := -1
for i, f := range ssr.files {
if f == nil {
continue
} else if index == -1 {
index = i
} else if f.Name < ssr.files[index].Name {
index = i
} else if f.Name == ssr.files[index].Name && f.ModTime.After(ssr.files[index].ModTime) {
index = i
}
}
return index
}
// Read reads the current entry in the reader.
func (ssr *MultiReader) Read(b []byte) (n int, err error) {
if ssr.curr == nil {
return 0, io.EOF
}
return ssr.curr.Read(b)
}
// OpenFileMultiReader returns a MultiReader based on the path of the base snapshot.
// Returns the underlying files which need to be closed separately.
func OpenFileMultiReader(path string) (*MultiReader, []io.Closer, error) {
var readers []io.Reader
var closers []io.Closer
if err := func() error {
// Open original snapshot file.
f, err := os.Open(path)
if os.IsNotExist(err) {
return err
} else if err != nil {
return fmt.Errorf("open snapshot: %s", err)
}
readers = append(readers, f)
closers = append(closers, f)
// Open all incremental snapshots.
for i := 0; ; i++ {
filename := path + fmt.Sprintf(".%d", i)
f, err := os.Open(filename)
if os.IsNotExist(err) {
break
} else if err != nil {
return fmt.Errorf("open incremental snapshot: file=%s, err=%s", filename, err)
}
readers = append(readers, f)
closers = append(closers, f)
}
return nil
}(); err != nil {
closeAll(closers)
return nil, nil, err
}
return NewMultiReader(readers...), nil, nil
}
// ReadFileManifest returns a Manifest for a given base snapshot path.
// This merges all incremental backup manifests as well.
func ReadFileManifest(path string) (*Manifest, error) {
// Open a multi-snapshot reader.
ssr, files, err := OpenFileMultiReader(path)
if os.IsNotExist(err) {
return nil, err
} else if err != nil {
return nil, fmt.Errorf("open file multi reader: %s", err)
}
defer closeAll(files)
// Read manifest.
ss, err := ssr.Manifest()
if err != nil {
return nil, fmt.Errorf("manifest: %s", err)
}
return ss, nil
}
func closeAll(a []io.Closer) {
for _, c := range a {
_ = c.Close()
}
}
// Writer writes a snapshot and the underlying files to disk as a tar archive.
type Writer struct {
// The manifest to write from.
// Removing files from the manifest after creation will cause those files to be ignored.
Manifest *Manifest
// Writers for each file by filename.
// Writers will be closed as they're processed and will close by the end of WriteTo().
FileWriters map[string]FileWriter
}
// NewWriter returns a new instance of Writer.
func NewWriter() *Writer {
return &Writer{
Manifest: &Manifest{},
FileWriters: make(map[string]FileWriter),
}
}
// Close closes all file writers on the snapshot.
func (sw *Writer) Close() error {
for _, fw := range sw.FileWriters {
_ = fw.Close()
}
return nil
}
// closeUnusedWriters closes all file writers not on the manifest.
// This allows transactions on these files to be short lived.
func (sw *Writer) closeUnusedWriters() {
loop:
for name, fw := range sw.FileWriters {
// Find writer in manifest.
for _, f := range sw.Manifest.Files {
if f.Name == name {
continue loop
}
}
// If not found then close it.
_ = fw.Close()
}
}
// WriteTo writes the snapshot to the writer.
// File writers are closed as they are written.
// This function will always return n == 0.
func (sw *Writer) WriteTo(w io.Writer) (n int64, err error) {
// Close any file writers that aren't required.
sw.closeUnusedWriters()
// Sort manifest files.
// This is required for combining multiple snapshots together.
sort.Sort(Files(sw.Manifest.Files))
// Begin writing a tar file to the output.
tw := tar.NewWriter(w)
defer tw.Close()
// Write manifest file.
if err := sw.writeManifestTo(tw); err != nil {
return 0, fmt.Errorf("write manifest: %s", err)
}
// Write each backup file.
for _, f := range sw.Manifest.Files {
if err := sw.writeFileTo(tw, &f); err != nil {
return 0, fmt.Errorf("write file: %s", err)
}
}
// Close tar writer and check error.
if err := tw.Close(); err != nil {
return 0, fmt.Errorf("tar close: %s", err)
}
return 0, nil
}
// writeManifestTo writes a manifest to the archive.
func (sw *Writer) writeManifestTo(tw *tar.Writer) error {
// Convert manifest to JSON.
b, err := json.Marshal(sw.Manifest)
if err != nil {
return fmt.Errorf("marshal json: %s", err)
}
// Write header & file.
if err := tw.WriteHeader(&tar.Header{
Name: manifestName,
Size: int64(len(b)),
Mode: 0666,
ModTime: time.Now(),
}); err != nil {
return fmt.Errorf("write header: %s", err)
}
if _, err := tw.Write(b); err != nil {
return fmt.Errorf("write: %s", err)
}
return nil
}
// writeFileTo writes a single file to the archive.
func (sw *Writer) writeFileTo(tw *tar.Writer, f *File) error {
// Retrieve the file writer by filename.
fw := sw.FileWriters[f.Name]
if fw == nil {
return fmt.Errorf("file writer not found: name=%s", f.Name)
}
// Write file header.
if err := tw.WriteHeader(&tar.Header{
Name: f.Name,
Size: f.Size,
Mode: 0666,
ModTime: time.Now(),
}); err != nil {
return fmt.Errorf("write header: file=%s, err=%s", f.Name, err)
}
// Copy the database to the writer.
if nn, err := fw.WriteTo(tw); err != nil {
return fmt.Errorf("write: file=%s, err=%s", f.Name, err)
} else if nn != f.Size {
return fmt.Errorf("short write: file=%s", f.Name)
}
// Close the writer.
if err := fw.Close(); err != nil {
return fmt.Errorf("close: file=%s, err=%s", f.Name, err)
}
return nil
}
// FileWriter is the interface used for writing a file to a snapshot.
type FileWriter interface {
io.WriterTo
io.Closer
}

View File

@ -1,72 +0,0 @@
package toml
import (
"fmt"
"strconv"
"time"
)
// maxInt is the largest integer representable by a word (architecture dependent).
const maxInt = int64(^uint(0) >> 1)
// Duration is a TOML wrapper type for time.Duration.
type Duration time.Duration
func (d Duration) String() string {
return time.Duration(d).String()
}
// UnmarshalText parses a TOML value into a duration value.
func (d *Duration) UnmarshalText(text []byte) error {
// Ignore if there is no value set.
if len(text) == 0 {
return nil
}
// Otherwise parse as a duration formatted string.
duration, err := time.ParseDuration(string(text))
if err != nil {
return err
}
// Set duration and return.
*d = Duration(duration)
return nil
}
// MarshalText converts a duration to a string for decoding toml
func (d Duration) MarshalText() (text []byte, err error) {
return []byte(d.String()), nil
}
// Size represents a TOML parseable file size.
// Users can specify size using "m" for megabytes and "g" for gigabytes.
type Size int
// UnmarshalText parses a byte size from text.
func (s *Size) UnmarshalText(text []byte) error {
// Parse numeric portion of value.
length := len(string(text))
size, err := strconv.ParseInt(string(text[:length-1]), 10, 64)
if err != nil {
return err
}
// Parse unit of measure ("m", "g", etc).
switch suffix := text[len(text)-1]; suffix {
case 'm':
size *= 1 << 20 // MB
case 'g':
size *= 1 << 30 // GB
default:
return fmt.Errorf("unknown size suffix: %c", suffix)
}
// Check for overflow.
if size > maxInt {
return fmt.Errorf("size %d cannot be represented by an int", size)
}
*s = Size(size)
return nil
}

View File

@ -1,85 +0,0 @@
# Line Protocol
The line protocol is a text based format for writing points to InfluxDB. Each line defines a single point.
Multiple lines must be separated by the newline character `\n`. The format of the line consists of three parts:
```
[key] [fields] [timestamp]
```
Each section is separated by spaces. The minimum required point consists of a measurement name and at least one field. Points without a specified timestamp will be written using the server's local timestamp. Timestamps are assumed to be in nanoseconds unless a `precision` value is passed in the query string.
## Key
The key is the measurement name and any optional tags separated by commas. Measurement names, tag keys, and tag values must escape any spaces or commas using a backslash (`\`). For example: `\ ` and `\,`. All tag values are stored as strings and should not be surrounded in quotes.
Tags should be sorted by key before being sent for best performance. The sort should match that from the Go `bytes.Compare` function (http://golang.org/pkg/bytes/#Compare).
### Examples
```
# measurement only
cpu
# measurment and tags
cpu,host=serverA,region=us-west
# measurment with commas
cpu\,01,host=serverA,region=us-west
# tag value with spaces
cpu,host=server\ A,region=us\ west
```
## Fields
Fields are key-value metrics associated with the measurement. Every line must have at least one field. Multiple fields must be separated with commas and not spaces.
Field keys are always strings and follow the same syntactical rules as described above for tag keys and values. Field values can be one of four types. The first value written for a given field on a given measurement defines the type of that field for all series under that measurement.
* _integer_ - Numeric values that do not include a decimal. (e.g. 1, 345, 2015, -10)
* _float_ - Numeric values that include a decimal. (e.g. 1.0, -3.14, 6.0+e5). Note that all values _must_ have a decimal even if the decimal value is zero (1 is an _integer_, 1.0 is a _float_).
* _boolean_ - A value indicating true or false. Valid boolean strings are (t, T, true, TRUE, f, F, false, and FALSE).
* _string_ - A text value. All string values _must_ be surrounded in double-quotes `"`. If the string contains
a double-quote, it must be escaped with a backslash, e.g. `\"`.
```
# integer value
cpu value=1
# float value
cpu_load value=1.2
# boolean value
error fatal=true
# string value
event msg="logged out"
# multiple values
cpu load=10.0,alert=true,reason="value above maximum threshold"
```
## Timestamp
The timestamp section is optional but should be specified if possible. The value is an integer representing nanoseconds since the epoch. If the timestamp is not provided the point will inherit the server's local timestamp.
Some write APIs allow passing a lower precision. If the API supports a lower precision, the timestamp may also be
an integer epoch in microseconds, milliseconds, seconds, minutes or hours.
## Full Example
A full example is shown below.
```
cpu,host=server01,region=uswest value=1.0 1434055562000000000
cpu,host=server02,region=uswest value=3.0 1434055562000010000
```
In this example the first line shows a `measurement` of "cpu", there are two tags "host" and "region, the `value` is 1.0, and the `timestamp` is 1434055562000000000. Following this is a second line, also a point in the `measurement` "cpu" but belonging to a different "host".
```
cpu,host=server\ 01,region=uswest value=1.0,msg="all systems nominal"
cpu,host=server\ 01,region=us\,west value_int=1
```
In these examples, the "host" is set to `server 01`. The field value associated with field key `msg` is double-quoted, as it is a string. The second example shows a region of `us,west` with the comma properly escaped. In the first example `value` is written as a floating point number. In the second, `value_int` is an integer.
# Distributed Queries

View File

@ -1,142 +0,0 @@
package tsdb
import (
"sync"
"sync/atomic"
"time"
)
// PointBatcher accepts Points and will emit a batch of those points when either
// a) the batch reaches a certain size, or b) a certain time passes.
type PointBatcher struct {
size int
duration time.Duration
stop chan struct{}
in chan Point
out chan []Point
flush chan struct{}
stats PointBatcherStats
wg *sync.WaitGroup
}
// NewPointBatcher returns a new PointBatcher.
func NewPointBatcher(sz int, d time.Duration) *PointBatcher {
return &PointBatcher{
size: sz,
duration: d,
stop: make(chan struct{}),
in: make(chan Point),
out: make(chan []Point),
flush: make(chan struct{}),
}
}
// PointBatcherStats are the statistics each batcher tracks.
type PointBatcherStats struct {
BatchTotal uint64 // Total count of batches transmitted.
PointTotal uint64 // Total count of points processed.
SizeTotal uint64 // Number of batches that reached size threshold.
TimeoutTotal uint64 // Number of timeouts that occurred.
}
// Start starts the batching process. Returns the in and out channels for points
// and point-batches respectively.
func (b *PointBatcher) Start() {
// Already running?
if b.wg != nil {
return
}
var timer *time.Timer
var batch []Point
var timerCh <-chan time.Time
emit := func() {
b.out <- batch
atomic.AddUint64(&b.stats.BatchTotal, 1)
batch = nil
}
b.wg = &sync.WaitGroup{}
b.wg.Add(1)
go func() {
defer b.wg.Done()
for {
select {
case <-b.stop:
if len(batch) > 0 {
emit()
timerCh = nil
}
return
case p := <-b.in:
atomic.AddUint64(&b.stats.PointTotal, 1)
if batch == nil {
batch = make([]Point, 0, b.size)
if b.duration > 0 {
timer = time.NewTimer(b.duration)
timerCh = timer.C
}
}
batch = append(batch, p)
if len(batch) >= b.size { // 0 means send immediately.
atomic.AddUint64(&b.stats.SizeTotal, 1)
emit()
timerCh = nil
}
case <-b.flush:
if len(batch) > 0 {
emit()
timerCh = nil
}
case <-timerCh:
atomic.AddUint64(&b.stats.TimeoutTotal, 1)
emit()
}
}
}()
}
func (b *PointBatcher) Stop() {
// If not running, nothing to stop.
if b.wg == nil {
return
}
close(b.stop)
b.wg.Wait()
}
// In returns the channel to which points should be written.
func (b *PointBatcher) In() chan<- Point {
return b.in
}
// Out returns the channel from which batches should be read.
func (b *PointBatcher) Out() <-chan []Point {
return b.out
}
// Flush instructs the batcher to emit any pending points in a batch, regardless of batch size.
// If there are no pending points, no batch is emitted.
func (b *PointBatcher) Flush() {
b.flush <- struct{}{}
}
// Stats returns a PointBatcherStats object for the PointBatcher. While the each statistic should be
// closely correlated with each other statistic, it is not guaranteed.
func (b *PointBatcher) Stats() *PointBatcherStats {
stats := PointBatcherStats{}
stats.BatchTotal = atomic.LoadUint64(&b.stats.BatchTotal)
stats.PointTotal = atomic.LoadUint64(&b.stats.PointTotal)
stats.SizeTotal = atomic.LoadUint64(&b.stats.SizeTotal)
stats.TimeoutTotal = atomic.LoadUint64(&b.stats.TimeoutTotal)
return &stats
}

View File

@ -1,34 +0,0 @@
package tsdb
import (
"time"
"github.com/influxdb/influxdb/toml"
)
const (
// DefaultMaxWALSize is the default size of the WAL before it is flushed.
DefaultMaxWALSize = 100 * 1024 * 1024 // 100MB
// DefaultWALFlushInterval is the frequency the WAL will get flushed if
// it doesn't reach its size threshold.
DefaultWALFlushInterval = 10 * time.Minute
// DefaultWALPartitionFlushDelay is the sleep time between WAL partition flushes.
DefaultWALPartitionFlushDelay = 2 * time.Second
)
type Config struct {
Dir string `toml:"dir"`
MaxWALSize int `toml:"max-wal-size"`
WALFlushInterval toml.Duration `toml:"wal-flush-interval"`
WALPartitionFlushDelay toml.Duration `toml:"wal-partition-flush-delay"`
}
func NewConfig() Config {
return Config{
MaxWALSize: DefaultMaxWALSize,
WALFlushInterval: toml.Duration(DefaultWALFlushInterval),
WALPartitionFlushDelay: toml.Duration(DefaultWALPartitionFlushDelay),
}
}

View File

@ -1,5 +0,0 @@
/*
Package tsdb implements a durable time series database.
*/
package tsdb

View File

@ -1,961 +0,0 @@
package tsdb
import (
"fmt"
"math"
"sort"
"time"
"github.com/influxdb/influxdb/influxql"
)
const (
// Return an error if the user is trying to select more than this number of points in a group by statement.
// Most likely they specified a group by interval without time boundaries.
MaxGroupByPoints = 100000
// Since time is always selected, the column count when selecting only a single other value will be 2
SelectColumnCountWithOneValue = 2
// IgnoredChunkSize is what gets passed into Mapper.Begin for aggregate queries as they don't chunk points out
IgnoredChunkSize = 0
)
// MapperResponse is the structure responses from mappers take over the network. Tagsets
// is only set with the first response. Data will be nil when the Mapper has no more data.
type MapperResponse struct {
TagSets []string `json:"tagSets,omitempty"`
Data []byte `json:"data"`
}
// Mapper is the interface all Mapper types must implement.
type Mapper interface {
Open() error
TagSets() []string
NextChunk() (interface{}, error)
Close()
}
// StatefulMapper encapsulates a Mapper and some state that the executor needs to
// track for that mapper.
type StatefulMapper struct {
Mapper
bufferedChunk *mapperOutput // Last read chunk.
drained bool
}
// Executor is the interface all Executor types must implement.
type Executor interface {
Execute() <-chan *influxql.Row
}
// NextChunk wraps a RawMapper and some state.
func (srm *StatefulMapper) NextChunk() (*mapperOutput, error) {
c, err := srm.Mapper.NextChunk()
if err != nil {
return nil, err
}
chunk, ok := c.(*mapperOutput)
if !ok {
if chunk == interface{}(nil) {
return nil, nil
}
}
return chunk, nil
}
// RawExecutor is an executor for RawMappers.
type RawExecutor struct {
stmt *influxql.SelectStatement
mappers []*StatefulMapper
chunkSize int
limitedTagSets map[string]struct{} // Set tagsets for which data has reached the LIMIT.
}
// NewRawExecutor returns a new RawExecutor.
func NewRawExecutor(stmt *influxql.SelectStatement, mappers []Mapper, chunkSize int) *RawExecutor {
a := []*StatefulMapper{}
for _, m := range mappers {
a = append(a, &StatefulMapper{m, nil, false})
}
return &RawExecutor{
stmt: stmt,
mappers: a,
chunkSize: chunkSize,
limitedTagSets: make(map[string]struct{}),
}
}
// Execute begins execution of the query and returns a channel to receive rows.
func (re *RawExecutor) Execute() <-chan *influxql.Row {
// Create output channel and stream data in a separate goroutine.
out := make(chan *influxql.Row, 0)
go re.execute(out)
return out
}
func (re *RawExecutor) execute(out chan *influxql.Row) {
// It's important that all resources are released when execution completes.
defer re.close()
// Open the mappers.
for _, m := range re.mappers {
if err := m.Open(); err != nil {
out <- &influxql.Row{Err: err}
return
}
}
// Used to read ahead chunks from mappers.
var rowWriter *limitedRowWriter
var currTagset string
// Keep looping until all mappers drained.
var err error
for {
// Get the next chunk from each Mapper.
for _, m := range re.mappers {
if m.drained {
continue
}
// Set the next buffered chunk on the mapper, or mark it drained.
for {
if m.bufferedChunk == nil {
m.bufferedChunk, err = m.NextChunk()
if err != nil {
out <- &influxql.Row{Err: err}
return
}
if m.bufferedChunk == nil {
// Mapper can do no more for us.
m.drained = true
break
}
}
if re.tagSetIsLimited(m.bufferedChunk.Name) {
// chunk's tagset is limited, so no good. Try again.
m.bufferedChunk = nil
continue
}
// This mapper has a chunk available, and it is not limited.
break
}
}
// All Mappers done?
if re.mappersDrained() {
rowWriter.Flush()
break
}
// Send out data for the next alphabetically-lowest tagset. All Mappers emit data in this order,
// so by always continuing with the lowest tagset until it is finished, we process all data in
// the required order, and don't "miss" any.
tagset := re.nextMapperTagSet()
if tagset != currTagset {
currTagset = tagset
// Tagset has changed, time for a new rowWriter. Be sure to kick out any residual values.
rowWriter.Flush()
rowWriter = nil
}
// Process the mapper outputs. We can send out everything up to the min of the last time
// of the chunks for the next tagset.
minTime := re.nextMapperLowestTime(tagset)
// Now empty out all the chunks up to the min time. Create new output struct for this data.
var chunkedOutput *mapperOutput
for _, m := range re.mappers {
if m.drained {
continue
}
// This mapper's next chunk is not for the next tagset, or the very first value of
// the chunk is at a higher acceptable timestamp. Skip it.
if m.bufferedChunk.key() != tagset || m.bufferedChunk.Values[0].Time > minTime {
continue
}
// Find the index of the point up to the min.
ind := len(m.bufferedChunk.Values)
for i, mo := range m.bufferedChunk.Values {
if mo.Time > minTime {
ind = i
break
}
}
// Add up to the index to the values
if chunkedOutput == nil {
chunkedOutput = &mapperOutput{
Name: m.bufferedChunk.Name,
Tags: m.bufferedChunk.Tags,
}
chunkedOutput.Values = m.bufferedChunk.Values[:ind]
} else {
chunkedOutput.Values = append(chunkedOutput.Values, m.bufferedChunk.Values[:ind]...)
}
// Clear out the values being sent out, keep the remainder.
m.bufferedChunk.Values = m.bufferedChunk.Values[ind:]
// If we emptied out all the values, clear the mapper's buffered chunk.
if len(m.bufferedChunk.Values) == 0 {
m.bufferedChunk = nil
}
}
// Sort the values by time first so we can then handle offset and limit
sort.Sort(mapperValues(chunkedOutput.Values))
// Now that we have full name and tag details, initialize the rowWriter.
// The Name and Tags will be the same for all mappers.
if rowWriter == nil {
rowWriter = &limitedRowWriter{
limit: re.stmt.Limit,
offset: re.stmt.Offset,
chunkSize: re.chunkSize,
name: chunkedOutput.Name,
tags: chunkedOutput.Tags,
selectNames: re.stmt.NamesInSelect(),
fields: re.stmt.Fields,
c: out,
}
}
if re.stmt.HasDerivative() {
interval, err := derivativeInterval(re.stmt)
if err != nil {
out <- &influxql.Row{Err: err}
return
}
rowWriter.transformer = &rawQueryDerivativeProcessor{
isNonNegative: re.stmt.FunctionCalls()[0].Name == "non_negative_derivative",
derivativeInterval: interval,
}
}
// Emit the data via the limiter.
if limited := rowWriter.Add(chunkedOutput.Values); limited {
// Limit for this tagset was reached, mark it and start draining a new tagset.
re.limitTagSet(chunkedOutput.key())
continue
}
}
close(out)
}
// mappersDrained returns whether all the executors Mappers have been drained of data.
func (re *RawExecutor) mappersDrained() bool {
for _, m := range re.mappers {
if !m.drained {
return false
}
}
return true
}
// nextMapperTagset returns the alphabetically lowest tagset across all Mappers.
func (re *RawExecutor) nextMapperTagSet() string {
tagset := ""
for _, m := range re.mappers {
if m.bufferedChunk != nil {
if tagset == "" {
tagset = m.bufferedChunk.key()
} else if m.bufferedChunk.key() < tagset {
tagset = m.bufferedChunk.key()
}
}
}
return tagset
}
// nextMapperLowestTime returns the lowest minimum time across all Mappers, for the given tagset.
func (re *RawExecutor) nextMapperLowestTime(tagset string) int64 {
minTime := int64(math.MaxInt64)
for _, m := range re.mappers {
if !m.drained && m.bufferedChunk != nil {
if m.bufferedChunk.key() != tagset {
continue
}
t := m.bufferedChunk.Values[len(m.bufferedChunk.Values)-1].Time
if t < minTime {
minTime = t
}
}
}
return minTime
}
// tagSetIsLimited returns whether data for the given tagset has been LIMITed.
func (re *RawExecutor) tagSetIsLimited(tagset string) bool {
_, ok := re.limitedTagSets[tagset]
return ok
}
// limitTagSet marks the given taset as LIMITed.
func (re *RawExecutor) limitTagSet(tagset string) {
re.limitedTagSets[tagset] = struct{}{}
}
// Close closes the executor such that all resources are released. Once closed,
// an executor may not be re-used.
func (re *RawExecutor) close() {
if re != nil {
for _, m := range re.mappers {
m.Close()
}
}
}
// AggregateExecutor is an executor for AggregateMappers.
type AggregateExecutor struct {
stmt *influxql.SelectStatement
queryTMin int64 // Needed?
queryTMax int64 // Needed?
mappers []*StatefulMapper
}
// NewAggregateExecutor returns a new AggregateExecutor.
func NewAggregateExecutor(stmt *influxql.SelectStatement, mappers []Mapper) *AggregateExecutor {
a := []*StatefulMapper{}
for _, m := range mappers {
a = append(a, &StatefulMapper{m, nil, false})
}
return &AggregateExecutor{
stmt: stmt,
mappers: a,
}
}
// Execute begins execution of the query and returns a channel to receive rows.
func (ae *AggregateExecutor) Execute() <-chan *influxql.Row {
// Create output channel and stream data in a separate goroutine.
out := make(chan *influxql.Row, 0)
go ae.execute(out)
return out
}
func (ae *AggregateExecutor) execute(out chan *influxql.Row) {
// It's important to close all resources when execution completes.
defer ae.close()
// Create the functions which will reduce values from mappers for
// a given interval. The function offsets within this slice match
// the offsets within the value slices that are returned by the
// mapper.
aggregates := ae.stmt.FunctionCalls()
reduceFuncs := make([]influxql.ReduceFunc, len(aggregates))
for i, c := range aggregates {
reduceFunc, err := influxql.InitializeReduceFunc(c)
if err != nil {
out <- &influxql.Row{Err: err}
return
}
reduceFuncs[i] = reduceFunc
}
// Put together the rows to return, starting with columns.
columnNames := make([]string, len(ae.stmt.Fields)+1)
columnNames[0] = "time"
for i, f := range ae.stmt.Fields {
columnNames[i+1] = f.Name()
}
// Open the mappers.
for _, m := range ae.mappers {
if err := m.Open(); err != nil {
out <- &influxql.Row{Err: err}
return
}
}
// Build the set of available tagsets across all mappers. This is used for
// later checks.
availTagSets := newStringSet()
for _, m := range ae.mappers {
for _, t := range m.TagSets() {
availTagSets.add(t)
}
}
// Prime each mapper's chunk buffer.
var err error
for _, m := range ae.mappers {
m.bufferedChunk, err = m.NextChunk()
if err != nil {
out <- &influxql.Row{Err: err}
return
}
if m.bufferedChunk == nil {
m.drained = true
}
}
// Keep looping until all mappers drained.
for !ae.mappersDrained() {
// Send out data for the next alphabetically-lowest tagset. All Mappers send out in this order
// so collect data for this tagset, ignoring all others.
tagset := ae.nextMapperTagSet()
chunks := []*mapperOutput{}
// Pull as much as possible from each mapper. Stop when a mapper offers
// data for a new tagset, or empties completely.
for _, m := range ae.mappers {
if m.drained {
continue
}
for {
if m.bufferedChunk == nil {
m.bufferedChunk, err = m.NextChunk()
if err != nil {
out <- &influxql.Row{Err: err}
return
}
if m.bufferedChunk == nil {
m.drained = true
break
}
}
// Got a chunk. Can we use it?
if m.bufferedChunk.key() != tagset {
// No, so just leave it in the buffer.
break
}
// We can, take it.
chunks = append(chunks, m.bufferedChunk)
m.bufferedChunk = nil
}
}
// Prep a row, ready for kicking out.
var row *influxql.Row
// Prep for bucketing data by start time of the interval.
buckets := map[int64][][]interface{}{}
for _, chunk := range chunks {
if row == nil {
row = &influxql.Row{
Name: chunk.Name,
Tags: chunk.Tags,
Columns: columnNames,
}
}
startTime := chunk.Values[0].Time
_, ok := buckets[startTime]
values := chunk.Values[0].Value.([]interface{})
if !ok {
buckets[startTime] = make([][]interface{}, len(values))
}
for i, v := range values {
buckets[startTime][i] = append(buckets[startTime][i], v)
}
}
// Now, after the loop above, within each time bucket is a slice. Within the element of each
// slice is another slice of interface{}, ready for passing to the reducer functions.
// Work each bucket of time, in time ascending order.
tMins := make(int64arr, 0, len(buckets))
for k, _ := range buckets {
tMins = append(tMins, k)
}
sort.Sort(tMins)
values := make([][]interface{}, len(tMins))
for i, t := range tMins {
values[i] = make([]interface{}, 0, len(columnNames))
values[i] = append(values[i], time.Unix(0, t).UTC()) // Time value is always first.
for j, f := range reduceFuncs {
reducedVal := f(buckets[t][j])
values[i] = append(values[i], reducedVal)
}
}
// Perform any mathematics.
values = processForMath(ae.stmt.Fields, values)
// Handle any fill options
values = ae.processFill(values)
// process derivatives
values = ae.processDerivative(values)
// If we have multiple tag sets we'll want to filter out the empty ones
if len(availTagSets.list()) > 1 && resultsEmpty(values) {
continue
}
row.Values = values
out <- row
}
close(out)
}
// processFill will take the results and return new results (or the same if no fill modifications are needed)
// with whatever fill options the query has.
func (ae *AggregateExecutor) processFill(results [][]interface{}) [][]interface{} {
// don't do anything if we're supposed to leave the nulls
if ae.stmt.Fill == influxql.NullFill {
return results
}
if ae.stmt.Fill == influxql.NoFill {
// remove any rows that have even one nil value. This one is tricky because they could have multiple
// aggregates, but this option means that any row that has even one nil gets purged.
newResults := make([][]interface{}, 0, len(results))
for _, vals := range results {
hasNil := false
// start at 1 because the first value is always time
for j := 1; j < len(vals); j++ {
if vals[j] == nil {
hasNil = true
break
}
}
if !hasNil {
newResults = append(newResults, vals)
}
}
return newResults
}
// They're either filling with previous values or a specific number
for i, vals := range results {
// start at 1 because the first value is always time
for j := 1; j < len(vals); j++ {
if vals[j] == nil {
switch ae.stmt.Fill {
case influxql.PreviousFill:
if i != 0 {
vals[j] = results[i-1][j]
}
case influxql.NumberFill:
vals[j] = ae.stmt.FillValue
}
}
}
}
return results
}
// processDerivative returns the derivatives of the results
func (ae *AggregateExecutor) processDerivative(results [][]interface{}) [][]interface{} {
// Return early if we're not supposed to process the derivatives
if ae.stmt.HasDerivative() {
interval, err := derivativeInterval(ae.stmt)
if err != nil {
return results // XXX need to handle this better.
}
// Determines whether to drop negative differences
isNonNegative := ae.stmt.FunctionCalls()[0].Name == "non_negative_derivative"
return processAggregateDerivative(results, isNonNegative, interval)
}
return results
}
// mappersDrained returns whether all the executors Mappers have been drained of data.
func (ae *AggregateExecutor) mappersDrained() bool {
for _, m := range ae.mappers {
if !m.drained {
return false
}
}
return true
}
// nextMapperTagset returns the alphabetically lowest tagset across all Mappers.
func (ae *AggregateExecutor) nextMapperTagSet() string {
tagset := ""
for _, m := range ae.mappers {
if m.bufferedChunk != nil {
if tagset == "" {
tagset = m.bufferedChunk.key()
} else if m.bufferedChunk.key() < tagset {
tagset = m.bufferedChunk.key()
}
}
}
return tagset
}
// Close closes the executor such that all resources are released. Once closed,
// an executor may not be re-used.
func (ae *AggregateExecutor) close() {
for _, m := range ae.mappers {
m.Close()
}
}
// limitedRowWriter accepts raw mapper values, and will emit those values as rows in chunks
// of the given size. If the chunk size is 0, no chunking will be performed. In addiiton if
// limit is reached, outstanding values will be emitted. If limit is zero, no limit is enforced.
type limitedRowWriter struct {
chunkSize int
limit int
offset int
name string
tags map[string]string
selectNames []string
fields influxql.Fields
c chan *influxql.Row
currValues []*mapperValue
totalOffSet int
totalSent int
transformer interface {
process(input []*mapperValue) []*mapperValue
}
}
// Add accepts a slice of values, and will emit those values as per chunking requirements.
// If limited is returned as true, the limit was also reached and no more values should be
// added. In that case only up the limit of values are emitted.
func (r *limitedRowWriter) Add(values []*mapperValue) (limited bool) {
if r.currValues == nil {
r.currValues = make([]*mapperValue, 0, r.chunkSize)
}
// Enforce offset.
if r.totalOffSet < r.offset {
// Still some offsetting to do.
offsetRequired := r.offset - r.totalOffSet
if offsetRequired >= len(values) {
r.totalOffSet += len(values)
return false
} else {
// Drop leading values and keep going.
values = values[offsetRequired:]
r.totalOffSet += offsetRequired
}
}
r.currValues = append(r.currValues, values...)
// Check limit.
limitReached := r.limit > 0 && r.totalSent+len(r.currValues) >= r.limit
if limitReached {
// Limit will be satified with current values. Truncate 'em.
r.currValues = r.currValues[:r.limit-r.totalSent]
}
// Is chunking in effect?
if r.chunkSize != IgnoredChunkSize {
// Chunking level reached?
for len(r.currValues) >= r.chunkSize {
index := len(r.currValues) - (len(r.currValues) - r.chunkSize)
r.c <- r.processValues(r.currValues[:index])
r.currValues = r.currValues[index:]
}
// After values have been sent out by chunking, there may still be some
// values left, if the remainder is less than the chunk size. But if the
// limit has been reached, kick them out.
if len(r.currValues) > 0 && limitReached {
r.c <- r.processValues(r.currValues)
r.currValues = nil
}
} else if limitReached {
// No chunking in effect, but the limit has been reached.
r.c <- r.processValues(r.currValues)
r.currValues = nil
}
return limitReached
}
// Flush instructs the limitedRowWriter to emit any pending values as a single row,
// adhering to any limits. Chunking is not enforced.
func (r *limitedRowWriter) Flush() {
if r == nil {
return
}
// If at least some rows were sent, and no values are pending, then don't
// emit anything, since at least 1 row was previously emitted. This ensures
// that if no rows were ever sent, at least 1 will be emitted, even an empty row.
if r.totalSent != 0 && len(r.currValues) == 0 {
return
}
if r.limit > 0 && len(r.currValues) > r.limit {
r.currValues = r.currValues[:r.limit]
}
r.c <- r.processValues(r.currValues)
r.currValues = nil
}
// processValues emits the given values in a single row.
func (r *limitedRowWriter) processValues(values []*mapperValue) *influxql.Row {
defer func() {
r.totalSent += len(values)
}()
selectNames := r.selectNames
if r.transformer != nil {
values = r.transformer.process(values)
}
// ensure that time is in the select names and in the first position
hasTime := false
for i, n := range selectNames {
if n == "time" {
// Swap time to the first argument for names
if i != 0 {
selectNames[0], selectNames[i] = selectNames[i], selectNames[0]
}
hasTime = true
break
}
}
// time should always be in the list of names they get back
if !hasTime {
selectNames = append([]string{"time"}, selectNames...)
}
// since selectNames can contain tags, we need to strip them out
selectFields := make([]string, 0, len(selectNames))
for _, n := range selectNames {
if _, found := r.tags[n]; !found {
selectFields = append(selectFields, n)
}
}
row := &influxql.Row{
Name: r.name,
Tags: r.tags,
Columns: selectFields,
}
// Kick out an empty row it no results available.
if len(values) == 0 {
return row
}
// if they've selected only a single value we have to handle things a little differently
singleValue := len(selectFields) == SelectColumnCountWithOneValue
// the results will have all of the raw mapper results, convert into the row
for _, v := range values {
vals := make([]interface{}, len(selectFields))
if singleValue {
vals[0] = time.Unix(0, v.Time).UTC()
vals[1] = v.Value.(interface{})
} else {
fields := v.Value.(map[string]interface{})
// time is always the first value
vals[0] = time.Unix(0, v.Time).UTC()
// populate the other values
for i := 1; i < len(selectFields); i++ {
vals[i] = fields[selectFields[i]]
}
}
row.Values = append(row.Values, vals)
}
// Perform any mathematical post-processing.
row.Values = processForMath(r.fields, row.Values)
return row
}
type rawQueryDerivativeProcessor struct {
lastValueFromPreviousChunk *mapperValue
isNonNegative bool // Whether to drop negative differences
derivativeInterval time.Duration
}
func (rqdp *rawQueryDerivativeProcessor) process(input []*mapperValue) []*mapperValue {
if len(input) == 0 {
return input
}
// If we only have 1 value, then the value did not change, so return
// a single row with 0.0
if len(input) == 1 {
return []*mapperValue{
&mapperValue{
Time: input[0].Time,
Value: 0.0,
},
}
}
if rqdp.lastValueFromPreviousChunk == nil {
rqdp.lastValueFromPreviousChunk = input[0]
}
derivativeValues := []*mapperValue{}
for i := 1; i < len(input); i++ {
v := input[i]
// Calculate the derivative of successive points by dividing the difference
// of each value by the elapsed time normalized to the interval
diff := int64toFloat64(v.Value) - int64toFloat64(rqdp.lastValueFromPreviousChunk.Value)
elapsed := v.Time - rqdp.lastValueFromPreviousChunk.Time
value := 0.0
if elapsed > 0 {
value = diff / (float64(elapsed) / float64(rqdp.derivativeInterval))
}
rqdp.lastValueFromPreviousChunk = v
// Drop negative values for non-negative derivatives
if rqdp.isNonNegative && diff < 0 {
continue
}
derivativeValues = append(derivativeValues, &mapperValue{
Time: v.Time,
Value: value,
})
}
return derivativeValues
}
// processForMath will apply any math that was specified in the select statement
// against the passed in results
func processForMath(fields influxql.Fields, results [][]interface{}) [][]interface{} {
hasMath := false
for _, f := range fields {
if _, ok := f.Expr.(*influxql.BinaryExpr); ok {
hasMath = true
} else if _, ok := f.Expr.(*influxql.ParenExpr); ok {
hasMath = true
}
}
if !hasMath {
return results
}
processors := make([]influxql.Processor, len(fields))
startIndex := 1
for i, f := range fields {
processors[i], startIndex = influxql.GetProcessor(f.Expr, startIndex)
}
mathResults := make([][]interface{}, len(results))
for i, _ := range mathResults {
mathResults[i] = make([]interface{}, len(fields)+1)
// put the time in
mathResults[i][0] = results[i][0]
for j, p := range processors {
mathResults[i][j+1] = p(results[i])
}
}
return mathResults
}
// processAggregateDerivative returns the derivatives of an aggregate result set
func processAggregateDerivative(results [][]interface{}, isNonNegative bool, interval time.Duration) [][]interface{} {
// Return early if we can't calculate derivatives
if len(results) == 0 {
return results
}
// If we only have 1 value, then the value did not change, so return
// a single row w/ 0.0
if len(results) == 1 {
return [][]interface{}{
[]interface{}{results[0][0], 0.0},
}
}
// Otherwise calculate the derivatives as the difference between consecutive
// points divided by the elapsed time. Then normalize to the requested
// interval.
derivatives := [][]interface{}{}
for i := 1; i < len(results); i++ {
prev := results[i-1]
cur := results[i]
if cur[1] == nil || prev[1] == nil {
continue
}
elapsed := cur[0].(time.Time).Sub(prev[0].(time.Time))
diff := int64toFloat64(cur[1]) - int64toFloat64(prev[1])
value := 0.0
if elapsed > 0 {
value = float64(diff) / (float64(elapsed) / float64(interval))
}
// Drop negative values for non-negative derivatives
if isNonNegative && diff < 0 {
continue
}
val := []interface{}{
cur[0],
value,
}
derivatives = append(derivatives, val)
}
return derivatives
}
// derivativeInterval returns the time interval for the one (and only) derivative func
func derivativeInterval(stmt *influxql.SelectStatement) (time.Duration, error) {
if len(stmt.FunctionCalls()[0].Args) == 2 {
return stmt.FunctionCalls()[0].Args[1].(*influxql.DurationLiteral).Val, nil
}
interval, err := stmt.GroupByInterval()
if err != nil {
return 0, err
}
if interval > 0 {
return interval, nil
}
return time.Second, nil
}
// resultsEmpty will return true if the all the result values are empty or contain only nulls
func resultsEmpty(resultValues [][]interface{}) bool {
for _, vals := range resultValues {
// start the loop at 1 because we want to skip over the time value
for i := 1; i < len(vals); i++ {
if vals[i] != nil {
return false
}
}
}
return true
}
func int64toFloat64(v interface{}) float64 {
switch v.(type) {
case int64:
return float64(v.(int64))
case float64:
return v.(float64)
}
panic(fmt.Sprintf("expected either int64 or float64, got %v", v))
}
type int64arr []int64
func (a int64arr) Len() int { return len(a) }
func (a int64arr) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a int64arr) Less(i, j int) bool { return a[i] < a[j] }

View File

@ -1,123 +0,0 @@
// Code generated by protoc-gen-go.
// source: meta.proto
// DO NOT EDIT!
/*
Package internal is a generated protocol buffer package.
It is generated from these files:
meta.proto
It has these top-level messages:
Series
Tag
MeasurementFields
Field
*/
package internal
import proto "github.com/golang/protobuf/proto"
import math "math"
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
var _ = math.Inf
type Series struct {
Key *string `protobuf:"bytes,1,req" json:"Key,omitempty"`
Tags []*Tag `protobuf:"bytes,2,rep" json:"Tags,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *Series) Reset() { *m = Series{} }
func (m *Series) String() string { return proto.CompactTextString(m) }
func (*Series) ProtoMessage() {}
func (m *Series) GetKey() string {
if m != nil && m.Key != nil {
return *m.Key
}
return ""
}
func (m *Series) GetTags() []*Tag {
if m != nil {
return m.Tags
}
return nil
}
type Tag struct {
Key *string `protobuf:"bytes,1,req" json:"Key,omitempty"`
Value *string `protobuf:"bytes,2,req" json:"Value,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *Tag) Reset() { *m = Tag{} }
func (m *Tag) String() string { return proto.CompactTextString(m) }
func (*Tag) ProtoMessage() {}
func (m *Tag) GetKey() string {
if m != nil && m.Key != nil {
return *m.Key
}
return ""
}
func (m *Tag) GetValue() string {
if m != nil && m.Value != nil {
return *m.Value
}
return ""
}
type MeasurementFields struct {
Fields []*Field `protobuf:"bytes,1,rep" json:"Fields,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *MeasurementFields) Reset() { *m = MeasurementFields{} }
func (m *MeasurementFields) String() string { return proto.CompactTextString(m) }
func (*MeasurementFields) ProtoMessage() {}
func (m *MeasurementFields) GetFields() []*Field {
if m != nil {
return m.Fields
}
return nil
}
type Field struct {
ID *int32 `protobuf:"varint,1,req" json:"ID,omitempty"`
Name *string `protobuf:"bytes,2,req" json:"Name,omitempty"`
Type *int32 `protobuf:"varint,3,req" json:"Type,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *Field) Reset() { *m = Field{} }
func (m *Field) String() string { return proto.CompactTextString(m) }
func (*Field) ProtoMessage() {}
func (m *Field) GetID() int32 {
if m != nil && m.ID != nil {
return *m.ID
}
return 0
}
func (m *Field) GetName() string {
if m != nil && m.Name != nil {
return *m.Name
}
return ""
}
func (m *Field) GetType() int32 {
if m != nil && m.Type != nil {
return *m.Type
}
return 0
}
func init() {
}

View File

@ -1,27 +0,0 @@
package internal;
//========================================================================
//
// Metadata
//
//========================================================================
message Series {
required string Key = 1;
repeated Tag Tags = 2;
}
message Tag {
required string Key = 1;
required string Value = 2;
}
message MeasurementFields {
repeated Field Fields = 1;
}
message Field {
required int32 ID = 1;
required string Name = 2;
required int32 Type = 3;
}

View File

@ -1,751 +0,0 @@
package tsdb
import (
"encoding/binary"
"errors"
"fmt"
"math"
"sort"
"strings"
"github.com/boltdb/bolt"
"github.com/influxdb/influxdb/influxql"
)
// mapperValue is a complex type, which can encapsulate data from both raw and aggregate
// mappers. This currently allows marshalling and network system to remain simpler. For
// aggregate output Time is ignored, and actual Time-Value pairs are contained soley
// within the Value field.
type mapperValue struct {
Time int64 `json:"time,omitempty"` // Ignored for aggregate output.
Value interface{} `json:"value,omitempty"` // For aggregate, contains interval time multiple values.
}
type mapperValues []*mapperValue
func (a mapperValues) Len() int { return len(a) }
func (a mapperValues) Less(i, j int) bool { return a[i].Time < a[j].Time }
func (a mapperValues) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
type mapperOutput struct {
Name string `json:"name,omitempty"`
Tags map[string]string `json:"tags,omitempty"`
Values []*mapperValue `json:"values,omitempty"` // For aggregates contains a single value at [0]
}
func (mo *mapperOutput) key() string {
return formMeasurementTagSetKey(mo.Name, mo.Tags)
}
// RawMapper is for retrieving data, for a raw query, for a single shard.
type RawMapper struct {
shard *Shard
stmt *influxql.SelectStatement
chunkSize int
tx *bolt.Tx // Read transaction for this shard.
queryTMin int64
queryTMax int64
whereFields []string // field names that occur in the where clause
selectFields []string // field names that occur in the select clause
selectTags []string // tag keys that occur in the select clause
fieldName string // the field name being read.
decoders map[string]*FieldCodec // byte decoder per measurement
cursors []*tagSetCursor // Cursors per tag sets.
currCursorIndex int // Current tagset cursor being drained.
}
// NewRawMapper returns a mapper for the given shard, which will return data for the SELECT statement.
func NewRawMapper(shard *Shard, stmt *influxql.SelectStatement, chunkSize int) *RawMapper {
return &RawMapper{
shard: shard,
stmt: stmt,
chunkSize: chunkSize,
cursors: make([]*tagSetCursor, 0),
}
}
// Open opens the raw mapper.
func (rm *RawMapper) Open() error {
// Get a read-only transaction.
tx, err := rm.shard.DB().Begin(false)
if err != nil {
return err
}
rm.tx = tx
// Set all time-related parameters on the mapper.
rm.queryTMin, rm.queryTMax = influxql.TimeRangeAsEpochNano(rm.stmt.Condition)
// Create the TagSet cursors for the Mapper.
for _, src := range rm.stmt.Sources {
mm, ok := src.(*influxql.Measurement)
if !ok {
return fmt.Errorf("invalid source type: %#v", src)
}
m := rm.shard.index.Measurement(mm.Name)
if m == nil {
// This shard have never received data for the measurement. No Mapper
// required.
return nil
}
// Create tagset cursors and determine various field types within SELECT statement.
tsf, err := createTagSetsAndFields(m, rm.stmt)
if err != nil {
return err
}
tagSets := tsf.tagSets
rm.selectFields = tsf.selectFields
rm.selectTags = tsf.selectTags
rm.whereFields = tsf.whereFields
if len(rm.selectFields) == 0 {
return fmt.Errorf("select statement must include at least one field")
}
// SLIMIT and SOFFSET the unique series
if rm.stmt.SLimit > 0 || rm.stmt.SOffset > 0 {
if rm.stmt.SOffset > len(tagSets) {
tagSets = nil
} else {
if rm.stmt.SOffset+rm.stmt.SLimit > len(tagSets) {
rm.stmt.SLimit = len(tagSets) - rm.stmt.SOffset
}
tagSets = tagSets[rm.stmt.SOffset : rm.stmt.SOffset+rm.stmt.SLimit]
}
}
// Create all cursors for reading the data from this shard.
for _, t := range tagSets {
cursors := []*seriesCursor{}
for i, key := range t.SeriesKeys {
c := createCursorForSeries(rm.tx, rm.shard, key)
if c == nil {
// No data exists for this key.
continue
}
cm := newSeriesCursor(c, t.Filters[i])
cursors = append(cursors, cm)
}
tsc := newTagSetCursor(m.Name, t.Tags, cursors, rm.shard.FieldCodec(m.Name))
// Prime the buffers.
for i := 0; i < len(tsc.cursors); i++ {
k, v := tsc.cursors[i].SeekTo(rm.queryTMin)
tsc.keyBuffer[i] = k
tsc.valueBuffer[i] = v
}
rm.cursors = append(rm.cursors, tsc)
}
sort.Sort(tagSetCursors(rm.cursors))
}
return nil
}
// TagSets returns the list of TagSets for which this mapper has data.
func (rm *RawMapper) TagSets() []string {
return tagSetCursors(rm.cursors).Keys()
}
// NextChunk returns the next chunk of data. Data comes in the same order as the
// tags return by TagSets. A chunk never contains data for more than 1 tagset.
// If there is no more data for any tagset, nil will be returned.
func (rm *RawMapper) NextChunk() (interface{}, error) {
var output *mapperOutput
for {
if rm.currCursorIndex == len(rm.cursors) {
// All tagset cursors processed. NextChunk'ing complete.
return nil, nil
}
cursor := rm.cursors[rm.currCursorIndex]
k, v := cursor.Next(rm.queryTMin, rm.queryTMax, rm.selectFields, rm.whereFields)
if v == nil {
// Tagset cursor is empty, move to next one.
rm.currCursorIndex++
if output != nil {
// There is data, so return it and continue when next called.
return output, nil
} else {
// Just go straight to the next cursor.
continue
}
}
if output == nil {
output = &mapperOutput{
Name: cursor.measurement,
Tags: cursor.tags,
}
}
value := &mapperValue{Time: k, Value: v}
output.Values = append(output.Values, value)
if len(output.Values) == rm.chunkSize {
return output, nil
}
}
}
// Close closes the mapper.
func (rm *RawMapper) Close() {
if rm != nil && rm.tx != nil {
_ = rm.tx.Rollback()
}
}
// AggMapper is for retrieving data, for an aggregate query, from a given shard.
type AggMapper struct {
shard *Shard
stmt *influxql.SelectStatement
tx *bolt.Tx // Read transaction for this shard.
queryTMin int64 // Minimum time of the query.
queryTMinWindow int64 // Minimum time of the query floored to start of interval.
queryTMax int64 // Maximum time of the query.
intervalSize int64 // Size of each interval.
mapFuncs []influxql.MapFunc // The mapping functions.
fieldNames []string // the field name being read for mapping.
whereFields []string // field names that occur in the where clause
selectFields []string // field names that occur in the select clause
selectTags []string // tag keys that occur in the select clause
numIntervals int // Maximum number of intervals to return.
currInterval int // Current interval for which data is being fetched.
cursors []*tagSetCursor // Cursors per tag sets.
currCursorIndex int // Current tagset cursor being drained.
}
// NewAggMapper returns a mapper for the given shard, which will return data for the SELECT statement.
func NewAggMapper(shard *Shard, stmt *influxql.SelectStatement) *AggMapper {
return &AggMapper{
shard: shard,
stmt: stmt,
cursors: make([]*tagSetCursor, 0),
}
}
// Open opens the aggregate mapper.
func (am *AggMapper) Open() error {
var err error
// Get a read-only transaction.
tx, err := am.shard.DB().Begin(false)
if err != nil {
return err
}
am.tx = tx
// Set up each mapping function for this statement.
aggregates := am.stmt.FunctionCalls()
am.mapFuncs = make([]influxql.MapFunc, len(aggregates))
am.fieldNames = make([]string, len(am.mapFuncs))
for i, c := range aggregates {
am.mapFuncs[i], err = influxql.InitializeMapFunc(c)
if err != nil {
return err
}
// Check for calls like `derivative(mean(value), 1d)`
var nested *influxql.Call = c
if fn, ok := c.Args[0].(*influxql.Call); ok {
nested = fn
}
switch lit := nested.Args[0].(type) {
case *influxql.VarRef:
am.fieldNames[i] = lit.Val
case *influxql.Distinct:
if c.Name != "count" {
return fmt.Errorf("aggregate call didn't contain a field %s", c.String())
}
am.fieldNames[i] = lit.Val
default:
return fmt.Errorf("aggregate call didn't contain a field %s", c.String())
}
}
// Set all time-related parameters on the mapper.
am.queryTMin, am.queryTMax = influxql.TimeRangeAsEpochNano(am.stmt.Condition)
// For GROUP BY time queries, limit the number of data points returned by the limit and offset
d, err := am.stmt.GroupByInterval()
if err != nil {
return err
}
am.intervalSize = d.Nanoseconds()
if am.queryTMin == 0 || am.intervalSize == 0 {
am.numIntervals = 1
am.intervalSize = am.queryTMax - am.queryTMin
} else {
intervalTop := am.queryTMax/am.intervalSize*am.intervalSize + am.intervalSize
intervalBottom := am.queryTMin / am.intervalSize * am.intervalSize
am.numIntervals = int((intervalTop - intervalBottom) / am.intervalSize)
}
if am.stmt.Limit > 0 || am.stmt.Offset > 0 {
// ensure that the offset isn't higher than the number of points we'd get
if am.stmt.Offset > am.numIntervals {
return nil
}
// Take the lesser of either the pre computed number of GROUP BY buckets that
// will be in the result or the limit passed in by the user
if am.stmt.Limit < am.numIntervals {
am.numIntervals = am.stmt.Limit
}
}
// If we are exceeding our MaxGroupByPoints error out
if am.numIntervals > MaxGroupByPoints {
return errors.New("too many points in the group by interval. maybe you forgot to specify a where time clause?")
}
// Ensure that the start time for the results is on the start of the window.
am.queryTMinWindow = am.queryTMin
if am.intervalSize > 0 && am.numIntervals > 1 {
am.queryTMinWindow = am.queryTMinWindow / am.intervalSize * am.intervalSize
}
// Create the TagSet cursors for the Mapper.
for _, src := range am.stmt.Sources {
mm, ok := src.(*influxql.Measurement)
if !ok {
return fmt.Errorf("invalid source type: %#v", src)
}
m := am.shard.index.Measurement(mm.Name)
if m == nil {
// This shard have never received data for the measurement. No Mapper
// required.
return nil
}
// Create tagset cursors and determine various field types within SELECT statement.
tsf, err := createTagSetsAndFields(m, am.stmt)
if err != nil {
return err
}
tagSets := tsf.tagSets
am.selectFields = tsf.selectFields
am.selectTags = tsf.selectTags
am.whereFields = tsf.whereFields
// Validate that group by is not a field
if err := m.ValidateGroupBy(am.stmt); err != nil {
return err
}
// SLIMIT and SOFFSET the unique series
if am.stmt.SLimit > 0 || am.stmt.SOffset > 0 {
if am.stmt.SOffset > len(tagSets) {
tagSets = nil
} else {
if am.stmt.SOffset+am.stmt.SLimit > len(tagSets) {
am.stmt.SLimit = len(tagSets) - am.stmt.SOffset
}
tagSets = tagSets[am.stmt.SOffset : am.stmt.SOffset+am.stmt.SLimit]
}
}
// Create all cursors for reading the data from this shard.
for _, t := range tagSets {
cursors := []*seriesCursor{}
for i, key := range t.SeriesKeys {
c := createCursorForSeries(am.tx, am.shard, key)
if c == nil {
// No data exists for this key.
continue
}
cm := newSeriesCursor(c, t.Filters[i])
cursors = append(cursors, cm)
}
tsc := newTagSetCursor(m.Name, t.Tags, cursors, am.shard.FieldCodec(m.Name))
am.cursors = append(am.cursors, tsc)
}
sort.Sort(tagSetCursors(am.cursors))
}
return nil
}
// NextChunk returns the next chunk of data, which is the next interval of data
// for the current tagset. Tagsets are always processed in the same order as that
// returned by AvailTagsSets(). When there is no more data for any tagset nil
// is returned.
func (am *AggMapper) NextChunk() (interface{}, error) {
var output *mapperOutput
for {
if am.currCursorIndex == len(am.cursors) {
// All tagset cursors processed. NextChunk'ing complete.
return nil, nil
}
tsc := am.cursors[am.currCursorIndex]
tmin, tmax := am.nextInterval()
if tmin < 0 {
// All intervals complete for this tagset. Move to the next tagset.
am.resetIntervals()
am.currCursorIndex++
continue
}
// Prep the return data for this tagset. This will hold data for a single interval
// for a single tagset.
if output == nil {
output = &mapperOutput{
Name: tsc.measurement,
Tags: tsc.tags,
Values: make([]*mapperValue, 1),
}
// Aggregate values only use the first entry in the Values field. Set the time
// to the start of the interval.
output.Values[0] = &mapperValue{
Time: tmin,
Value: make([]interface{}, 0)}
}
// Always clamp tmin. This can happen as bucket-times are bucketed to the nearest
// interval, and this can be less than the times in the query.
qmin := tmin
if qmin < am.queryTMin {
qmin = am.queryTMin
}
for i := range am.mapFuncs {
// Prime the tagset cursor for the start of the interval. This is not ideal, as
// it should really calculate the values all in 1 pass, but that would require
// changes to the mapper functions, which can come later.
// Prime the buffers.
for i := 0; i < len(tsc.cursors); i++ {
k, v := tsc.cursors[i].SeekTo(tmin)
tsc.keyBuffer[i] = k
tsc.valueBuffer[i] = v
}
// Wrap the tagset cursor so it implements the mapping functions interface.
f := func() (time int64, value interface{}) {
return tsc.Next(qmin, tmax, []string{am.fieldNames[i]}, am.whereFields)
}
tagSetCursor := &aggTagSetCursor{
nextFunc: f,
}
// Execute the map function which walks the entire interval, and aggregates
// the result.
values := output.Values[0].Value.([]interface{})
output.Values[0].Value = append(values, am.mapFuncs[i](tagSetCursor))
}
return output, nil
}
}
// nextInterval returns the next interval for which to return data. If start is less than 0
// there are no more intervals.
func (am *AggMapper) nextInterval() (start, end int64) {
t := am.queryTMinWindow + int64(am.currInterval+am.stmt.Offset)*am.intervalSize
// Onto next interval.
am.currInterval++
if t > am.queryTMax || am.currInterval > am.numIntervals {
start, end = -1, 1
} else {
start, end = t, t+am.intervalSize
}
return
}
// resetIntervals starts the Mapper at the first interval. Subsequent intervals
// should be retrieved via nextInterval().
func (am *AggMapper) resetIntervals() {
am.currInterval = 0
}
// TagSets returns the list of TagSets for which this mapper has data.
func (am *AggMapper) TagSets() []string {
return tagSetCursors(am.cursors).Keys()
}
// Close closes the mapper.
func (am *AggMapper) Close() {
if am != nil && am.tx != nil {
_ = am.tx.Rollback()
}
}
// aggTagSetCursor wraps a standard tagSetCursor, such that the values it emits are aggregated
// by intervals.
type aggTagSetCursor struct {
nextFunc func() (time int64, value interface{})
}
// Next returns the next value for the aggTagSetCursor. It implements the interface expected
// by the mapping functions.
func (a *aggTagSetCursor) Next() (time int64, value interface{}) {
return a.nextFunc()
}
// tagSetCursor is virtual cursor that iterates over mutiple series cursors, as though it were
// a single series.
type tagSetCursor struct {
measurement string // Measurement name
tags map[string]string // Tag key-value pairs
cursors []*seriesCursor // Underlying series cursors.
decoder *FieldCodec // decoder for the raw data bytes
// Lookahead buffers for the cursors. Performance analysis shows that it is critical
// that these buffers are part of the tagSetCursor type and not part of the the
// cursors type.
keyBuffer []int64 // The current timestamp key for each cursor
valueBuffer [][]byte // The current value for each cursor
}
// tagSetCursors represents a sortable slice of tagSetCursors.
type tagSetCursors []*tagSetCursor
func (a tagSetCursors) Len() int { return len(a) }
func (a tagSetCursors) Less(i, j int) bool { return a[i].key() < a[j].key() }
func (a tagSetCursors) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a tagSetCursors) Keys() []string {
keys := []string{}
for i := range a {
keys = append(keys, a[i].key())
}
sort.Strings(keys)
return keys
}
// newTagSetCursor returns a tagSetCursor
func newTagSetCursor(m string, t map[string]string, c []*seriesCursor, d *FieldCodec) *tagSetCursor {
return &tagSetCursor{
measurement: m,
tags: t,
cursors: c,
decoder: d,
keyBuffer: make([]int64, len(c)),
valueBuffer: make([][]byte, len(c)),
}
}
func (tsc *tagSetCursor) key() string {
return formMeasurementTagSetKey(tsc.measurement, tsc.tags)
}
// Next returns the next matching series-key, timestamp and byte slice for the tagset. Filtering
// is enforced on the values. If there is no matching value, then a nil result is returned.
func (tsc *tagSetCursor) Next(tmin, tmax int64, selectFields, whereFields []string) (int64, interface{}) {
for {
// Find the next lowest timestamp
min := -1
minKey := int64(math.MaxInt64)
for i, k := range tsc.keyBuffer {
if k != -1 && (k == tmin) || k < minKey && k >= tmin && k < tmax {
min = i
minKey = k
}
}
// Return if there is no more data for this tagset.
if min == -1 {
return -1, nil
}
// set the current timestamp and seriesID
timestamp := tsc.keyBuffer[min]
var value interface{}
if len(selectFields) > 1 {
if fieldsWithNames, err := tsc.decoder.DecodeFieldsWithNames(tsc.valueBuffer[min]); err == nil {
value = fieldsWithNames
// if there's a where clause, make sure we don't need to filter this value
if tsc.cursors[min].filter != nil && !matchesWhere(tsc.cursors[min].filter, fieldsWithNames) {
value = nil
}
}
} else {
// With only 1 field SELECTed, decoding all fields may be avoidable, which is faster.
var err error
value, err = tsc.decoder.DecodeByName(selectFields[0], tsc.valueBuffer[min])
if err != nil {
value = nil
} else {
// If there's a WHERE clase, see if we need to filter
if tsc.cursors[min].filter != nil {
// See if the WHERE is only on this field or on one or more other fields.
// If the latter, we'll have to decode everything
if len(whereFields) == 1 && whereFields[0] == selectFields[0] {
if !matchesWhere(tsc.cursors[min].filter, map[string]interface{}{selectFields[0]: value}) {
value = nil
}
} else { // Decode everything
fieldsWithNames, err := tsc.decoder.DecodeFieldsWithNames(tsc.valueBuffer[min])
if err != nil || !matchesWhere(tsc.cursors[min].filter, fieldsWithNames) {
value = nil
}
}
}
}
}
// Advance the cursor
nextKey, nextVal := tsc.cursors[min].Next()
tsc.keyBuffer[min] = nextKey
tsc.valueBuffer[min] = nextVal
// Value didn't match, look for the next one.
if value == nil {
continue
}
return timestamp, value
}
}
// seriesCursor is a cursor that walks a single series. It provides lookahead functionality.
type seriesCursor struct {
cursor *shardCursor // BoltDB cursor for a series
filter influxql.Expr
}
// newSeriesCursor returns a new instance of a series cursor.
func newSeriesCursor(b *shardCursor, filter influxql.Expr) *seriesCursor {
return &seriesCursor{
cursor: b,
filter: filter,
}
}
// Seek positions returning the timestamp and value at that key.
func (sc *seriesCursor) SeekTo(key int64) (timestamp int64, value []byte) {
k, v := sc.cursor.Seek(u64tob(uint64(key)))
if k == nil {
timestamp = -1
} else {
timestamp, value = int64(btou64(k)), v
}
return
}
// Next returns the next timestamp and value from the cursor.
func (sc *seriesCursor) Next() (key int64, value []byte) {
k, v := sc.cursor.Next()
if k == nil {
key = -1
} else {
key, value = int64(btou64(k)), v
}
return
}
// createCursorForSeries creates a cursor for walking the given series key. The cursor
// consolidates both the Bolt store and any WAL cache.
func createCursorForSeries(tx *bolt.Tx, shard *Shard, key string) *shardCursor {
// Retrieve key bucket.
b := tx.Bucket([]byte(key))
// Ignore if there is no bucket or points in the cache.
partitionID := WALPartition([]byte(key))
if b == nil && len(shard.cache[partitionID][key]) == 0 {
return nil
}
// Retrieve a copy of the in-cache points for the key.
cache := make([][]byte, len(shard.cache[partitionID][key]))
copy(cache, shard.cache[partitionID][key])
// Build a cursor that merges the bucket and cache together.
cur := &shardCursor{cache: cache}
if b != nil {
cur.cursor = b.Cursor()
}
return cur
}
type tagSetsAndFields struct {
tagSets []*influxql.TagSet
selectFields []string
selectTags []string
whereFields []string
}
// createTagSetsAndFields returns the tagsets and various fields given a measurement and
// SELECT statement. It also ensures that the fields and tags exist.
func createTagSetsAndFields(m *Measurement, stmt *influxql.SelectStatement) (*tagSetsAndFields, error) {
_, tagKeys, err := stmt.Dimensions.Normalize()
if err != nil {
return nil, err
}
sfs := newStringSet()
sts := newStringSet()
wfs := newStringSet()
// Validate the fields and tags asked for exist and keep track of which are in the select vs the where
for _, n := range stmt.NamesInSelect() {
if m.HasField(n) {
sfs.add(n)
continue
}
if !m.HasTagKey(n) {
return nil, fmt.Errorf("unknown field or tag name in select clause: %s", n)
}
sts.add(n)
tagKeys = append(tagKeys, n)
}
for _, n := range stmt.NamesInWhere() {
if n == "time" {
continue
}
if m.HasField(n) {
wfs.add(n)
continue
}
if !m.HasTagKey(n) {
return nil, fmt.Errorf("unknown field or tag name in where clause: %s", n)
}
}
// Get the sorted unique tag sets for this statement.
tagSets, err := m.TagSets(stmt, tagKeys)
if err != nil {
return nil, err
}
return &tagSetsAndFields{
tagSets: tagSets,
selectFields: sfs.list(),
selectTags: sts.list(),
whereFields: wfs.list(),
}, nil
}
// matchesFilter returns true if the value matches the where clause
func matchesWhere(f influxql.Expr, fields map[string]interface{}) bool {
if ok, _ := influxql.Eval(f, fields).(bool); !ok {
return false
}
return true
}
func formMeasurementTagSetKey(name string, tags map[string]string) string {
if len(tags) == 0 {
return name
}
return strings.Join([]string{name, string(marshalTags(tags))}, "|")
}
// btou64 converts an 8-byte slice into an uint64.
func btou64(b []byte) uint64 { return binary.BigEndian.Uint64(b) }

File diff suppressed because it is too large Load Diff

View File

@ -1,83 +0,0 @@
package tsdb
// Monitor represents a TSDB monitoring service.
type Monitor struct {
Store interface{}
}
func (m *Monitor) Open() error { return nil }
func (m *Monitor) Close() error { return nil }
// StartSelfMonitoring starts a goroutine which monitors the InfluxDB server
// itself and stores the results in the specified database at a given interval.
/*
func (s *Server) StartSelfMonitoring(database, retention string, interval time.Duration) error {
if interval == 0 {
return fmt.Errorf("statistics check interval must be non-zero")
}
go func() {
tick := time.NewTicker(interval)
for {
<-tick.C
// Create the batch and tags
tags := map[string]string{"serverID": strconv.FormatUint(s.ID(), 10)}
if h, err := os.Hostname(); err == nil {
tags["host"] = h
}
batch := pointsFromStats(s.stats, tags)
// Shard-level stats.
tags["shardID"] = strconv.FormatUint(s.id, 10)
s.mu.RLock()
for _, sh := range s.shards {
if !sh.HasDataNodeID(s.id) {
// No stats for non-local shards.
continue
}
batch = append(batch, pointsFromStats(sh.stats, tags)...)
}
s.mu.RUnlock()
// Server diagnostics.
for _, row := range s.DiagnosticsAsRows() {
points, err := s.convertRowToPoints(row.Name, row)
if err != nil {
s.Logger.Printf("failed to write diagnostic row for %s: %s", row.Name, err.Error())
continue
}
for _, p := range points {
p.AddTag("serverID", strconv.FormatUint(s.ID(), 10))
}
batch = append(batch, points...)
}
s.WriteSeries(database, retention, batch)
}
}()
return nil
}
// Function for local use turns stats into a slice of points
func pointsFromStats(st *Stats, tags map[string]string) []tsdb.Point {
var points []tsdb.Point
now := time.Now()
st.Walk(func(k string, v int64) {
point := tsdb.NewPoint(
st.name+"_"+k,
make(map[string]string),
map[string]interface{}{"value": int(v)},
now,
)
// Specifically create a new map.
for k, v := range tags {
tags[k] = v
point.AddTag(k, v)
}
points = append(points, point)
})
return points
}
*/

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,124 +0,0 @@
package tsdb
import (
"bytes"
"fmt"
"io"
"os"
"path/filepath"
"time"
"github.com/boltdb/bolt"
"github.com/influxdb/influxdb/snapshot"
)
// NewSnapshotWriter returns a new snapshot.Writer that will write
// metadata and the store's shards to an archive.
func NewSnapshotWriter(meta []byte, store *Store) (*snapshot.Writer, error) {
// Create snapshot writer.
sw := snapshot.NewWriter()
if err := func() error {
// Create meta file.
f := &snapshot.File{
Name: "meta",
Size: int64(len(meta)),
ModTime: time.Now(),
}
sw.Manifest.Files = append(sw.Manifest.Files, *f)
sw.FileWriters[f.Name] = NopWriteToCloser(bytes.NewReader(meta))
// Create files for each shard.
if err := appendShardSnapshotFiles(sw, store); err != nil {
return fmt.Errorf("create shard snapshot files: %s", err)
}
return nil
}(); err != nil {
_ = sw.Close()
return nil, err
}
return sw, nil
}
// appendShardSnapshotFiles adds snapshot files for each shard in the store.
func appendShardSnapshotFiles(sw *snapshot.Writer, store *Store) error {
// Calculate absolute path of store to use for relative shard paths.
storePath, err := filepath.Abs(store.Path())
if err != nil {
return fmt.Errorf("store abs path: %s", err)
}
// Create files for each shard.
for _, shardID := range store.ShardIDs() {
// Retrieve shard.
sh := store.Shard(shardID)
if sh == nil {
return fmt.Errorf("shard not found: %d", shardID)
}
// Calculate relative path from store.
shardPath, err := filepath.Abs(sh.Path())
if err != nil {
return fmt.Errorf("shard abs path: %s", err)
}
name, err := filepath.Rel(storePath, shardPath)
if err != nil {
return fmt.Errorf("shard rel path: %s", err)
}
if err := appendShardSnapshotFile(sw, sh, name); err != nil {
return fmt.Errorf("append shard: name=%s, err=%s", name, err)
}
}
return nil
}
func appendShardSnapshotFile(sw *snapshot.Writer, sh *Shard, name string) error {
// Stat the underlying data file to retrieve last modified date.
fi, err := os.Stat(sh.Path())
if err != nil {
return fmt.Errorf("stat shard data file: %s", err)
}
// Begin transaction.
tx, err := sh.db.Begin(false)
if err != nil {
return fmt.Errorf("begin: %s", err)
}
// Create file.
f := snapshot.File{
Name: name,
Size: tx.Size(),
ModTime: fi.ModTime(),
}
// Append to snapshot writer.
sw.Manifest.Files = append(sw.Manifest.Files, f)
sw.FileWriters[f.Name] = &boltTxCloser{tx}
return nil
}
// boltTxCloser wraps a Bolt transaction to implement io.Closer.
type boltTxCloser struct {
*bolt.Tx
}
// Close rolls back the transaction.
func (tx *boltTxCloser) Close() error { return tx.Rollback() }
// NopWriteToCloser returns an io.WriterTo that implements io.Closer.
func NopWriteToCloser(w io.WriterTo) interface {
io.WriterTo
io.Closer
} {
return &nopWriteToCloser{w}
}
type nopWriteToCloser struct {
io.WriterTo
}
func (w *nopWriteToCloser) Close() error { return nil }

View File

@ -1,343 +0,0 @@
package tsdb
import (
"fmt"
"io/ioutil"
"log"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
"github.com/influxdb/influxdb/influxql"
)
func NewStore(path string) *Store {
return &Store{
path: path,
MaxWALSize: DefaultMaxWALSize,
WALFlushInterval: DefaultWALFlushInterval,
WALPartitionFlushDelay: DefaultWALPartitionFlushDelay,
Logger: log.New(os.Stderr, "[store] ", log.LstdFlags),
}
}
var (
ErrShardNotFound = fmt.Errorf("shard not found")
)
type Store struct {
mu sync.RWMutex
path string
databaseIndexes map[string]*DatabaseIndex
shards map[uint64]*Shard
MaxWALSize int
WALFlushInterval time.Duration
WALPartitionFlushDelay time.Duration
Logger *log.Logger
}
// Path returns the store's root path.
func (s *Store) Path() string { return s.path }
func (s *Store) CreateShard(database, retentionPolicy string, shardID uint64) error {
s.mu.Lock()
defer s.mu.Unlock()
// shard already exists
if _, ok := s.shards[shardID]; ok {
return nil
}
// created the db and retention policy dirs if they don't exist
if err := os.MkdirAll(filepath.Join(s.path, database, retentionPolicy), 0700); err != nil {
return err
}
// create the database index if it does not exist
db, ok := s.databaseIndexes[database]
if !ok {
db = NewDatabaseIndex()
s.databaseIndexes[database] = db
}
shardPath := filepath.Join(s.path, database, retentionPolicy, strconv.FormatUint(shardID, 10))
shard := s.newShard(db, shardPath)
if err := shard.Open(); err != nil {
return err
}
s.shards[shardID] = shard
return nil
}
// DeleteShard removes a shard from disk.
func (s *Store) DeleteShard(shardID uint64) error {
s.mu.Lock()
defer s.mu.Unlock()
// ensure shard exists
sh, ok := s.shards[shardID]
if !ok {
return nil
}
if err := sh.Close(); err != nil {
return err
}
if err := os.Remove(sh.path); err != nil {
return err
}
delete(s.shards, shardID)
return nil
}
// newShard returns a shard and copies configuration settings from the store.
func (s *Store) newShard(index *DatabaseIndex, path string) *Shard {
sh := NewShard(index, path)
sh.MaxWALSize = s.MaxWALSize
sh.WALFlushInterval = s.WALFlushInterval
sh.WALPartitionFlushDelay = s.WALPartitionFlushDelay
return sh
}
// DeleteDatabase will close all shards associated with a database and remove the directory and files from disk.
func (s *Store) DeleteDatabase(name string, shardIDs []uint64) error {
s.mu.Lock()
defer s.mu.Unlock()
for _, id := range shardIDs {
shard := s.shards[id]
if shard != nil {
shard.Close()
}
}
if err := os.RemoveAll(filepath.Join(s.path, name)); err != nil {
return err
}
delete(s.databaseIndexes, name)
return nil
}
func (s *Store) Shard(shardID uint64) *Shard {
s.mu.RLock()
defer s.mu.RUnlock()
return s.shards[shardID]
}
// ShardIDs returns a slice of all ShardIDs under management.
func (s *Store) ShardIDs() []uint64 {
ids := make([]uint64, 0, len(s.shards))
for i, _ := range s.shards {
ids = append(ids, i)
}
return ids
}
func (s *Store) ValidateAggregateFieldsInStatement(shardID uint64, measurementName string, stmt *influxql.SelectStatement) error {
s.mu.RLock()
shard := s.shards[shardID]
s.mu.RUnlock()
if shard == nil {
return ErrShardNotFound
}
return shard.ValidateAggregateFieldsInStatement(measurementName, stmt)
}
func (s *Store) DatabaseIndex(name string) *DatabaseIndex {
s.mu.RLock()
defer s.mu.RUnlock()
return s.databaseIndexes[name]
}
func (s *Store) Measurement(database, name string) *Measurement {
s.mu.RLock()
db := s.databaseIndexes[database]
s.mu.RUnlock()
if db == nil {
return nil
}
return db.Measurement(name)
}
// deleteSeries loops through the local shards and deletes the series data and metadata for the passed in series keys
func (s *Store) deleteSeries(keys []string) error {
s.mu.RLock()
defer s.mu.RUnlock()
for _, sh := range s.shards {
if err := sh.deleteSeries(keys); err != nil {
return err
}
}
return nil
}
// deleteMeasurement loops through the local shards and removes the measurement field encodings from each shard
func (s *Store) deleteMeasurement(name string, seriesKeys []string) error {
s.mu.RLock()
defer s.mu.RUnlock()
for _, sh := range s.shards {
if err := sh.deleteMeasurement(name, seriesKeys); err != nil {
return err
}
}
return nil
}
func (s *Store) loadIndexes() error {
dbs, err := ioutil.ReadDir(s.path)
if err != nil {
return err
}
for _, db := range dbs {
if !db.IsDir() {
s.Logger.Printf("Skipping database dir: %s. Not a directory", db.Name())
continue
}
s.databaseIndexes[db.Name()] = NewDatabaseIndex()
}
return nil
}
func (s *Store) loadShards() error {
// loop through the current database indexes
for db := range s.databaseIndexes {
rps, err := ioutil.ReadDir(filepath.Join(s.path, db))
if err != nil {
return err
}
for _, rp := range rps {
// retention policies should be directories. Skip anything that is not a dir.
if !rp.IsDir() {
s.Logger.Printf("Skipping retention policy dir: %s. Not a directory", rp.Name())
continue
}
shards, err := ioutil.ReadDir(filepath.Join(s.path, db, rp.Name()))
if err != nil {
return err
}
for _, sh := range shards {
path := filepath.Join(s.path, db, rp.Name(), sh.Name())
// Shard file names are numeric shardIDs
shardID, err := strconv.ParseUint(sh.Name(), 10, 64)
if err != nil {
s.Logger.Printf("Skipping shard: %s. Not a valid path", rp.Name())
continue
}
shard := s.newShard(s.databaseIndexes[db], path)
shard.Open()
s.shards[shardID] = shard
}
}
}
return nil
}
func (s *Store) Open() error {
s.mu.Lock()
defer s.mu.Unlock()
s.shards = map[uint64]*Shard{}
s.databaseIndexes = map[string]*DatabaseIndex{}
// Create directory.
if err := os.MkdirAll(s.path, 0777); err != nil {
return err
}
// TODO: Start AE for Node
if err := s.loadIndexes(); err != nil {
return err
}
if err := s.loadShards(); err != nil {
return err
}
return nil
}
func (s *Store) WriteToShard(shardID uint64, points []Point) error {
s.mu.RLock()
defer s.mu.RUnlock()
sh, ok := s.shards[shardID]
if !ok {
return ErrShardNotFound
}
return sh.WritePoints(points)
}
// Flush forces all shards to write their WAL data to the index.
func (s *Store) Flush() error {
s.mu.RLock()
defer s.mu.RUnlock()
for shardID, sh := range s.shards {
if err := sh.Flush(s.WALPartitionFlushDelay); err != nil {
return fmt.Errorf("flush: shard=%d, err=%s", shardID, err)
}
}
return nil
}
func (s *Store) CreateMapper(shardID uint64, query string, chunkSize int) (Mapper, error) {
q, err := influxql.NewParser(strings.NewReader(query)).ParseStatement()
if err != nil {
return nil, err
}
stmt, ok := q.(*influxql.SelectStatement)
if !ok {
return nil, fmt.Errorf("query is not a SELECT statement: %s", err.Error())
}
shard := s.Shard(shardID)
if shard == nil {
// This can happen if the shard has been assigned, but hasn't actually been created yet.
return nil, nil
}
if (stmt.IsRawQuery && !stmt.HasDistinct()) || stmt.IsSimpleDerivative() {
return NewRawMapper(shard, stmt, chunkSize), nil
}
return NewAggMapper(shard, stmt), nil
}
func (s *Store) Close() error {
s.mu.Lock()
defer s.mu.Unlock()
for _, sh := range s.shards {
if err := sh.Close(); err != nil {
return err
}
}
s.shards = nil
s.databaseIndexes = nil
return nil
}
// IsRetryable returns true if this error is temporary and could be retried
func IsRetryable(err error) bool {
if err == nil {
return true
}
if strings.Contains(err.Error(), "field type conflict") {
return false
}
return true
}

Some files were not shown because too many files have changed in this diff Show More