diff --git a/cluster/kube-up.sh b/cluster/kube-up.sh index bb5d6223166..b3e799f6640 100755 --- a/cluster/kube-up.sh +++ b/cluster/kube-up.sh @@ -127,7 +127,7 @@ echo " This might loop forever if there was some uncaught error during start" echo " up." echo -until $(curl --insecure --user ${user}:${passwd} --max-time 1 \ +until $(curl --insecure --user ${user}:${passwd} --max-time 5 \ --fail --output /dev/null --silent https://${KUBE_MASTER_IP}/api/v1beta1/pods); do printf "." sleep 2 diff --git a/cluster/saltbase/salt/cadvisor/cadvisor.manifest b/cluster/saltbase/salt/cadvisor/cadvisor.manifest index b3a70a308f2..5ca966523e1 100644 --- a/cluster/saltbase/salt/cadvisor/cadvisor.manifest +++ b/cluster/saltbase/salt/cadvisor/cadvisor.manifest @@ -2,7 +2,7 @@ version: v1beta2 id: cadvisor-agent containers: - name: cadvisor - image: google/cadvisor + image: google/cadvisor:latest ports: - name: http containerPort: 8080 diff --git a/pkg/api/types.go b/pkg/api/types.go index 61ac813efe1..bdaec3756ae 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -147,20 +147,6 @@ type Container struct { LivenessProbe *LivenessProbe `yaml:"livenessProbe,omitempty" json:"livenessProbe,omitempty"` } -// Percentile represents a pair which contains a percentage from 0 to 100 and -// its corresponding value. -type Percentile struct { - Percentage int `json:"percentage,omitempty"` - Value uint64 `json:"value,omitempty"` -} - -// ContainerStats represents statistical information of a container -type ContainerStats struct { - CPUUsagePercentiles []Percentile `json:"cpu_usage_percentiles,omitempty"` - MemoryUsagePercentiles []Percentile `json:"memory_usage_percentiles,omitempty"` - MaxMemoryUsage uint64 `json:"max_memory_usage,omitempty"` -} - // Event is the representation of an event logged to etcd backends type Event struct { Event string `json:"event,omitempty"` diff --git a/pkg/apiserver/apiserver.go b/pkg/apiserver/apiserver.go index d423c5b7e7f..3e0bbadb2b7 100644 --- a/pkg/apiserver/apiserver.go +++ b/pkg/apiserver/apiserver.go @@ -20,12 +20,14 @@ import ( "fmt" "io/ioutil" "net/http" - "net/url" + "path" "runtime/debug" "strings" "time" "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + "github.com/GoogleCloudPlatform/kubernetes/pkg/healthz" + "github.com/GoogleCloudPlatform/kubernetes/pkg/httplog" "github.com/GoogleCloudPlatform/kubernetes/pkg/labels" "github.com/GoogleCloudPlatform/kubernetes/pkg/tools" "github.com/GoogleCloudPlatform/kubernetes/pkg/util" @@ -83,25 +85,37 @@ func MakeAsync(fn WorkFunc) <-chan interface{} { // // TODO: consider migrating this to go-restful which is a more full-featured version of the same thing. type APIServer struct { - prefix string - storage map[string]RESTStorage - ops *Operations - logserver http.Handler + prefix string + storage map[string]RESTStorage + ops *Operations + mux *http.ServeMux } // New creates a new APIServer object. // 'storage' contains a map of handlers. // 'prefix' is the hosting path prefix. func New(storage map[string]RESTStorage, prefix string) *APIServer { - return &APIServer{ - storage: storage, - prefix: prefix, - ops: NewOperations(), - logserver: http.StripPrefix("/logs/", http.FileServer(http.Dir("/var/log/"))), + s := &APIServer{ + storage: storage, + prefix: strings.TrimRight(prefix, "/"), + ops: NewOperations(), + mux: http.NewServeMux(), } + + s.mux.Handle("/logs/", http.StripPrefix("/logs/", http.FileServer(http.Dir("/var/log/")))) + s.mux.HandleFunc(s.prefix+"/", s.ServeREST) + healthz.InstallHandler(s.mux) + s.mux.HandleFunc("/index.html", s.handleIndex) + + // Handle both operations and operations/* with the same handler + opPrefix := path.Join(s.prefix, "operations") + s.mux.HandleFunc(opPrefix, s.handleOperationRequest) + s.mux.HandleFunc(opPrefix+"/", s.handleOperationRequest) + + return s } -func (server *APIServer) handleIndex(w http.ResponseWriter) { +func (server *APIServer) handleIndex(w http.ResponseWriter, req *http.Request) { w.WriteHeader(http.StatusOK) // TODO: serve this out of a file? data := "Welcome to Kubernetes" @@ -117,47 +131,37 @@ func (server *APIServer) ServeHTTP(w http.ResponseWriter, req *http.Request) { glog.Infof("APIServer panic'd on %v %v: %#v\n%s\n", req.Method, req.RequestURI, x, debug.Stack()) } }() - defer MakeLogged(req, &w).StacktraceWhen( - StatusIsNot( + defer httplog.MakeLogged(req, &w).StacktraceWhen( + httplog.StatusIsNot( http.StatusOK, http.StatusAccepted, http.StatusConflict, ), ).Log() - url, err := url.ParseRequestURI(req.RequestURI) - if err != nil { - server.error(err, w) - return - } - if url.Path == "/index.html" || url.Path == "/" || url.Path == "" { - server.handleIndex(w) - return - } - if strings.HasPrefix(url.Path, "/logs/") { - server.logserver.ServeHTTP(w, req) - return - } - if !strings.HasPrefix(url.Path, server.prefix) { + + // Dispatch via our mux. + server.mux.ServeHTTP(w, req) +} + +// ServeREST handles requests to all our RESTStorage objects. +func (server *APIServer) ServeREST(w http.ResponseWriter, req *http.Request) { + if !strings.HasPrefix(req.URL.Path, server.prefix) { server.notFound(req, w) return } - requestParts := strings.Split(url.Path[len(server.prefix):], "/")[1:] + requestParts := strings.Split(req.URL.Path[len(server.prefix):], "/")[1:] if len(requestParts) < 1 { server.notFound(req, w) return } - if requestParts[0] == "operations" { - server.handleOperationRequest(requestParts[1:], w, req) - return - } storage := server.storage[requestParts[0]] if storage == nil { - LogOf(w).Addf("'%v' has no storage object", requestParts[0]) + httplog.LogOf(w).Addf("'%v' has no storage object", requestParts[0]) server.notFound(req, w) return } - server.handleREST(requestParts, url, req, w, storage) + server.handleREST(requestParts, req, w, storage) } func (server *APIServer) notFound(req *http.Request, w http.ResponseWriter) { @@ -197,7 +201,7 @@ func (server *APIServer) finishReq(out <-chan interface{}, sync bool, timeout ti status := http.StatusOK switch stat := obj.(type) { case api.Status: - LogOf(w).Addf("programmer error: use *api.Status as a result, not api.Status.") + httplog.LogOf(w).Addf("programmer error: use *api.Status as a result, not api.Status.") if stat.Code != 0 { status = stat.Code } @@ -236,14 +240,14 @@ func parseTimeout(str string) time.Duration { // sync=[false|true] Synchronous request (only applies to create, update, delete operations) // timeout= Timeout for synchronous requests, only applies if sync=true // labels= Used for filtering list operations -func (server *APIServer) handleREST(parts []string, requestURL *url.URL, req *http.Request, w http.ResponseWriter, storage RESTStorage) { - sync := requestURL.Query().Get("sync") == "true" - timeout := parseTimeout(requestURL.Query().Get("timeout")) +func (server *APIServer) handleREST(parts []string, req *http.Request, w http.ResponseWriter, storage RESTStorage) { + sync := req.URL.Query().Get("sync") == "true" + timeout := parseTimeout(req.URL.Query().Get("timeout")) switch req.Method { case "GET": switch len(parts) { case 1: - selector, err := labels.ParseSelector(requestURL.Query().Get("labels")) + selector, err := labels.ParseSelector(req.URL.Query().Get("labels")) if err != nil { server.error(err, w) return @@ -325,7 +329,18 @@ func (server *APIServer) handleREST(parts []string, requestURL *url.URL, req *ht } } -func (server *APIServer) handleOperationRequest(parts []string, w http.ResponseWriter, req *http.Request) { +func (server *APIServer) handleOperationRequest(w http.ResponseWriter, req *http.Request) { + opPrefix := path.Join(server.prefix, "operations") + if !strings.HasPrefix(req.URL.Path, opPrefix) { + server.notFound(req, w) + return + } + trimmed := strings.TrimLeft(req.URL.Path[len(opPrefix):], "/") + parts := strings.Split(trimmed, "/") + if len(parts) > 1 { + server.notFound(req, w) + return + } if req.Method != "GET" { server.notFound(req, w) } diff --git a/pkg/apiserver/apiserver_test.go b/pkg/apiserver/apiserver_test.go index f3371300c3a..f7d47ce1d4b 100644 --- a/pkg/apiserver/apiserver_test.go +++ b/pkg/apiserver/apiserver_test.go @@ -409,3 +409,37 @@ func TestSyncCreateTimeout(t *testing.T) { t.Errorf("Unexpected status: %d, Expected: %d, %#v", response.StatusCode, 202, response) } } + +func TestOpGet(t *testing.T) { + simpleStorage := &SimpleRESTStorage{} + handler := New(map[string]RESTStorage{ + "foo": simpleStorage, + }, "/prefix/version") + server := httptest.NewServer(handler) + client := http.Client{} + + simple := Simple{Name: "foo"} + data, _ := api.Encode(simple) + request, err := http.NewRequest("POST", server.URL+"/prefix/version/foo", bytes.NewBuffer(data)) + expectNoError(t, err) + response, err := client.Do(request) + expectNoError(t, err) + if response.StatusCode != http.StatusAccepted { + t.Errorf("Unexpected response %#v", response) + } + + var itemOut api.Status + body, err := extractBody(response, &itemOut) + expectNoError(t, err) + if itemOut.Status != api.StatusWorking || itemOut.Details == "" { + t.Errorf("Unexpected status: %#v (%s)", itemOut, string(body)) + } + + req2, err := http.NewRequest("GET", server.URL+"/prefix/version/operations/"+itemOut.Details, nil) + expectNoError(t, err) + _, err = client.Do(req2) + expectNoError(t, err) + if response.StatusCode != http.StatusAccepted { + t.Errorf("Unexpected response %#v", response) + } +} diff --git a/pkg/apiserver/operation.go b/pkg/apiserver/operation.go index ed25857c0cf..7b88531d214 100644 --- a/pkg/apiserver/operation.go +++ b/pkg/apiserver/operation.go @@ -34,7 +34,7 @@ type Operation struct { awaiting <-chan interface{} finished *time.Time lock sync.Mutex - notify chan bool + notify chan struct{} } // Operations tracks all the ongoing operations. @@ -62,7 +62,7 @@ func (ops *Operations) NewOperation(from <-chan interface{}) *Operation { op := &Operation{ ID: strconv.FormatInt(id, 10), awaiting: from, - notify: make(chan bool, 1), + notify: make(chan struct{}), } go op.wait() go ops.insert(op) @@ -116,7 +116,7 @@ func (ops *Operations) expire(maxAge time.Duration) { // Waits forever for the operation to complete; call via go when // the operation is created. Sets op.finished when the operation -// does complete, and sends on the notify channel, in case there +// does complete, and closes the notify channel, in case there // are any WaitFor() calls in progress. // Does not keep op locked while waiting. func (op *Operation) wait() { @@ -128,7 +128,7 @@ func (op *Operation) wait() { op.result = result finished := time.Now() op.finished = &finished - op.notify <- true + close(op.notify) } // WaitFor waits for the specified duration, or until the operation finishes, @@ -137,9 +137,6 @@ func (op *Operation) WaitFor(timeout time.Duration) { select { case <-time.After(timeout): case <-op.notify: - // Re-send on this channel in case there are others - // waiting for notification. - op.notify <- true } } diff --git a/pkg/health/doc.go b/pkg/health/doc.go new file mode 100644 index 00000000000..6c5580a1301 --- /dev/null +++ b/pkg/health/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2014 Google Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package health contains utilities for health checking, as well as health status information. +package health diff --git a/pkg/health/health.go b/pkg/health/health.go new file mode 100644 index 00000000000..9d986d36771 --- /dev/null +++ b/pkg/health/health.go @@ -0,0 +1,51 @@ +/* +Copyright 2014 Google Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package health + +import ( + "net/http" + + "github.com/golang/glog" +) + +type Status int + +const ( + Healthy Status = iota + Unhealthy + Unknown +) + +type HTTPGetInterface interface { + Get(url string) (*http.Response, error) +} + +func Check(url string, client HTTPGetInterface) (Status, error) { + res, err := client.Get(url) + if res.Body != nil { + defer res.Body.Close() + } + if err != nil { + return Unknown, err + } + if res.StatusCode >= http.StatusOK && res.StatusCode < http.StatusBadRequest { + return Healthy, nil + } else { + glog.V(1).Infof("Health check failed for %s, Response: %v", url, *res) + return Unhealthy, nil + } +} diff --git a/pkg/kubelet/health_check.go b/pkg/health/health_check.go similarity index 61% rename from pkg/kubelet/health_check.go rename to pkg/health/health_check.go index f784b13979a..695559e755c 100644 --- a/pkg/kubelet/health_check.go +++ b/pkg/health/health_check.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package kubelet +package health import ( "fmt" @@ -25,23 +25,8 @@ import ( "github.com/golang/glog" ) -// HealthCheckStatus is an enum type which describes a status of health check. -type HealthCheckStatus int - -// These are the valid values of HealthCheckStatus. -const ( - CheckHealthy HealthCheckStatus = 0 - CheckUnhealthy HealthCheckStatus = 1 - CheckUnknown HealthCheckStatus = 2 -) - -// HealthChecker hides implementation details of checking health of containers. type HealthChecker interface { - HealthCheck(container api.Container) (HealthCheckStatus, error) -} - -type httpDoInterface interface { - Get(string) (*http.Response, error) + HealthCheck(container api.Container) (Status, error) } // MakeHealthChecker creates a new HealthChecker. @@ -60,20 +45,18 @@ type MuxHealthChecker struct { checkers map[string]HealthChecker } -// HealthCheck delegates the health-checking of the container to one of the bundled implementations. -// It chooses an implementation according to container.LivenessProbe.Type. -func (m *MuxHealthChecker) HealthCheck(container api.Container) (HealthCheckStatus, error) { +func (m *MuxHealthChecker) HealthCheck(container api.Container) (Status, error) { checker, ok := m.checkers[container.LivenessProbe.Type] if !ok || checker == nil { glog.Warningf("Failed to find health checker for %s %s", container.Name, container.LivenessProbe.Type) - return CheckUnknown, nil + return Unknown, nil } return checker.HealthCheck(container) } // HTTPHealthChecker is an implementation of HealthChecker which checks container health by sending HTTP Get requests. type HTTPHealthChecker struct { - client httpDoInterface + client HTTPGetInterface } func (h *HTTPHealthChecker) findPort(container api.Container, portName string) int64 { @@ -86,18 +69,17 @@ func (h *HTTPHealthChecker) findPort(container api.Container, portName string) i return -1 } -// HealthCheck checks if the container is healthy by trying sending HTTP Get requests to the container. -func (h *HTTPHealthChecker) HealthCheck(container api.Container) (HealthCheckStatus, error) { +func (h *HTTPHealthChecker) HealthCheck(container api.Container) (Status, error) { params := container.LivenessProbe.HTTPGet if params == nil { - return CheckUnknown, fmt.Errorf("Error, no HTTP parameters specified: %v", container) + return Unknown, fmt.Errorf("Error, no HTTP parameters specified: %v", container) } port := h.findPort(container, params.Port) if port == -1 { var err error port, err = strconv.ParseInt(params.Port, 10, 0) if err != nil { - return CheckUnknown, err + return Unknown, err } } var host string @@ -107,17 +89,5 @@ func (h *HTTPHealthChecker) HealthCheck(container api.Container) (HealthCheckSta host = "localhost" } url := fmt.Sprintf("http://%s:%d%s", host, port, params.Path) - res, err := h.client.Get(url) - if res != nil && res.Body != nil { - defer res.Body.Close() - } - if err != nil { - // At this point, if it fails, its either a policy (unlikely) or HTTP protocol (likely) error. - return CheckUnhealthy, nil - } - if res.StatusCode == http.StatusOK { - return CheckHealthy, nil - } - glog.V(1).Infof("Health check failed for %v, Response: %v", container, *res) - return CheckUnhealthy, nil + return Check(url, h.client) } diff --git a/pkg/kubelet/health_check_test.go b/pkg/health/health_check_test.go similarity index 97% rename from pkg/kubelet/health_check_test.go rename to pkg/health/health_check_test.go index 557f34702b8..17dc3612466 100644 --- a/pkg/kubelet/health_check_test.go +++ b/pkg/health/health_check_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package kubelet +package health import ( "net/http" @@ -56,7 +56,7 @@ func TestHttpHealth(t *testing.T) { } ok, err := check.HealthCheck(container) - if ok != CheckHealthy { + if ok != Healthy { t.Error("Unexpected unhealthy") } if err != nil { diff --git a/pkg/healthz/healthz.go b/pkg/healthz/healthz.go index 8ae6b97f73f..1f71bb88579 100644 --- a/pkg/healthz/healthz.go +++ b/pkg/healthz/healthz.go @@ -29,3 +29,7 @@ func handleHealthz(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) w.Write([]byte("ok")) } + +func InstallHandler(mux *http.ServeMux) { + mux.HandleFunc("/healthz", handleHealthz) +} diff --git a/pkg/httplog/doc.go b/pkg/httplog/doc.go new file mode 100644 index 00000000000..872879cadf3 --- /dev/null +++ b/pkg/httplog/doc.go @@ -0,0 +1,19 @@ +/* +Copyright 2014 Google Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package httplog contains a helper object and functions to maintain a log +// along with an http response. +package httplog diff --git a/pkg/apiserver/logger.go b/pkg/httplog/log.go similarity index 85% rename from pkg/apiserver/logger.go rename to pkg/httplog/log.go index 8690f80ae17..c65af57a2b6 100644 --- a/pkg/apiserver/logger.go +++ b/pkg/httplog/log.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package apiserver +package httplog import ( "fmt" @@ -25,6 +25,18 @@ import ( "github.com/golang/glog" ) +// Handler wraps all HTTP calls to delegate with nice logging. +// delegate may use LogOf(w).Addf(...) to write additional info to +// the per-request log message. +// +// Intended to wrap calls to your ServeMux. +func Handler(delegate http.Handler, pred StacktracePred) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + defer MakeLogged(req, &w).StacktraceWhen(pred).Log() + delegate.ServeHTTP(w, req) + }) +} + // StacktracePred returns true if a stacktrace should be logged for this status type StacktracePred func(httpStatus int) (logStacktrace bool) @@ -44,7 +56,7 @@ type respLogger struct { // DefaultStacktracePred is the default implementation of StacktracePred. func DefaultStacktracePred(status int) bool { - return status != http.StatusOK && status != http.StatusAccepted + return status < http.StatusOK || status >= http.StatusBadRequest } // MakeLogged turns a normal response writer into a logged response writer. @@ -60,6 +72,10 @@ func DefaultStacktracePred(status int) bool { // // Use LogOf(w).Addf(...) to log something along with the response result. func MakeLogged(req *http.Request, w *http.ResponseWriter) *respLogger { + if _, ok := (*w).(*respLogger); ok { + // Don't double-wrap! + panic("multiple MakeLogged calls!") + } rl := &respLogger{ startTime: time.Now(), req: req, diff --git a/pkg/kubelet/docker.go b/pkg/kubelet/docker.go new file mode 100644 index 00000000000..33b26e6b7c1 --- /dev/null +++ b/pkg/kubelet/docker.go @@ -0,0 +1,138 @@ +/* +Copyright 2014 Google Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kubelet + +import ( + "fmt" + "math/rand" + "strings" + + "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + "github.com/fsouza/go-dockerclient" +) + +// DockerContainerData is the structured representation of the JSON object returned by Docker inspect +type DockerContainerData struct { + state struct { + Running bool + } +} + +// DockerInterface is an abstract interface for testability. It abstracts the interface of docker.Client. +type DockerInterface interface { + ListContainers(options docker.ListContainersOptions) ([]docker.APIContainers, error) + InspectContainer(id string) (*docker.Container, error) + CreateContainer(docker.CreateContainerOptions) (*docker.Container, error) + StartContainer(id string, hostConfig *docker.HostConfig) error + StopContainer(id string, timeout uint) error + PullImage(opts docker.PullImageOptions, auth docker.AuthConfiguration) error +} + +// DockerID is an ID of docker container. It is a type to make it clear when we're working with docker container Ids +type DockerID string + +// DockerPuller is an abstract interface for testability. It abstracts image pull operations. +type DockerPuller interface { + Pull(image string) error +} + +// dockerPuller is the default implementation of DockerPuller. +type dockerPuller struct { + client DockerInterface +} + +// NewDockerPuller creates a new instance of the default implementation of DockerPuller. +func NewDockerPuller(client DockerInterface) DockerPuller { + return dockerPuller{ + client: client, + } +} + +func (p dockerPuller) Pull(image string) error { + image, tag := parseImageName(image) + opts := docker.PullImageOptions{ + Repository: image, + Tag: tag, + } + return p.client.PullImage(opts, docker.AuthConfiguration{}) +} + +// Converts "-" to "_-_" and "_" to "___" so that we can use "--" to meaningfully separate parts of a docker name. +func escapeDash(in string) (out string) { + out = strings.Replace(in, "_", "___", -1) + out = strings.Replace(out, "-", "_-_", -1) + return +} + +// Reverses the transformation of escapeDash. +func unescapeDash(in string) (out string) { + out = strings.Replace(in, "_-_", "-", -1) + out = strings.Replace(out, "___", "_", -1) + return +} + +const containerNamePrefix = "k8s" + +// Creates a name which can be reversed to identify both manifest id and container name. +func buildDockerName(manifest *api.ContainerManifest, container *api.Container) string { + // Note, manifest.ID could be blank. + return fmt.Sprintf("%s--%s--%s--%08x", containerNamePrefix, escapeDash(container.Name), escapeDash(manifest.ID), rand.Uint32()) +} + +// Upacks a container name, returning the manifest id and container name we would have used to +// construct the docker name. If the docker name isn't one we created, we may return empty strings. +func parseDockerName(name string) (manifestID, containerName string) { + // For some reason docker appears to be appending '/' to names. + // If its there, strip it. + if name[0] == '/' { + name = name[1:] + } + parts := strings.Split(name, "--") + if len(parts) == 0 || parts[0] != containerNamePrefix { + return + } + if len(parts) > 1 { + containerName = unescapeDash(parts[1]) + } + if len(parts) > 2 { + manifestID = unescapeDash(parts[2]) + } + return +} + +// Parses image name including an tag and returns image name and tag +// TODO: Future Docker versions can parse the tag on daemon side, see: +// https://github.com/dotcloud/docker/issues/6876 +// So this can be deprecated at some point. +func parseImageName(image string) (string, string) { + tag := "" + parts := strings.SplitN(image, "/", 2) + repo := "" + if len(parts) == 2 { + repo = parts[0] + image = parts[1] + } + parts = strings.SplitN(image, ":", 2) + if len(parts) == 2 { + image = parts[0] + tag = parts[1] + } + if repo != "" { + image = fmt.Sprintf("%s/%s", repo, image) + } + return image, tag +} diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 9a89f129931..9561537666b 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -21,7 +21,6 @@ import ( "errors" "fmt" "io/ioutil" - "math/rand" "net" "net/http" "os" @@ -34,6 +33,7 @@ import ( "time" "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + "github.com/GoogleCloudPlatform/kubernetes/pkg/health" "github.com/GoogleCloudPlatform/kubernetes/pkg/tools" "github.com/GoogleCloudPlatform/kubernetes/pkg/util" "github.com/coreos/go-etcd/etcd" @@ -46,34 +46,14 @@ import ( const defaultChanSize = 1024 -// DockerContainerData is the structured representation of the JSON object returned by Docker inspect -type DockerContainerData struct { - state struct { - Running bool - } -} - -// DockerInterface is an abstract interface for testability. It abstracts the interface of docker.Client. -type DockerInterface interface { - ListContainers(options docker.ListContainersOptions) ([]docker.APIContainers, error) - InspectContainer(id string) (*docker.Container, error) - CreateContainer(docker.CreateContainerOptions) (*docker.Container, error) - StartContainer(id string, hostConfig *docker.HostConfig) error - StopContainer(id string, timeout uint) error - PullImage(opts docker.PullImageOptions, auth docker.AuthConfiguration) error -} - -// DockerID is an ID of docker container. It is a type to make it clear when we're working with docker container Ids -type DockerID string - -// DockerPuller is an abstract interface for testability. It abstracts image pull operations. -type DockerPuller interface { - Pull(image string) error -} +// taken from lmctfy https://github.com/google/lmctfy/blob/master/lmctfy/controllers/cpu_controller.cc +const minShares = 2 +const sharesPerCpu = 1024 +const milliCpuToCpu = 1000 // CadvisorInterface is an abstract interface for testability. It abstracts the interface of "github.com/google/cadvisor/client".Client. type CadvisorInterface interface { - ContainerInfo(name string) (*info.ContainerInfo, error) + ContainerInfo(name string, req *info.ContainerInfoRequest) (*info.ContainerInfo, error) MachineInfo() (*info.MachineInfo, error) } @@ -93,7 +73,7 @@ type Kubelet struct { SyncFrequency time.Duration HTTPCheckFrequency time.Duration pullLock sync.Mutex - HealthChecker HealthChecker + HealthChecker health.HealthChecker } type manifestUpdate struct { @@ -119,7 +99,7 @@ func (kl *Kubelet) RunKubelet(dockerEndpoint, configPath, manifestURL, etcdServe } } if kl.DockerPuller == nil { - kl.DockerPuller = kl.MakeDockerPuller() + kl.DockerPuller = NewDockerPuller(kl.DockerClient) } updateChannel := make(chan manifestUpdate) if configPath != "" { @@ -158,7 +138,7 @@ func (kl *Kubelet) RunKubelet(dockerEndpoint, configPath, manifestURL, etcdServe } go util.Forever(func() { s.ListenAndServe() }, 0) } - kl.HealthChecker = MakeHealthChecker() + kl.HealthChecker = health.MakeHealthChecker() kl.syncLoop(updateChannel, kl) } @@ -236,70 +216,6 @@ func (kl *Kubelet) getContainer(ID DockerID) (*docker.APIContainers, error) { return nil, nil } -// MakeDockerPuller creates a new instance of the default implementation of DockerPuller. -func (kl *Kubelet) MakeDockerPuller() DockerPuller { - return dockerPuller{ - client: kl.DockerClient, - } -} - -// dockerPuller is the default implementation of DockerPuller. -type dockerPuller struct { - client DockerInterface -} - -func (p dockerPuller) Pull(image string) error { - image, tag := parseImageName(image) - opts := docker.PullImageOptions{ - Repository: image, - Tag: tag, - } - return p.client.PullImage(opts, docker.AuthConfiguration{}) -} - -// Converts "-" to "_-_" and "_" to "___" so that we can use "--" to meaningfully separate parts of a docker name. -func escapeDash(in string) (out string) { - out = strings.Replace(in, "_", "___", -1) - out = strings.Replace(out, "-", "_-_", -1) - return -} - -// Reverses the transformation of escapeDash. -func unescapeDash(in string) (out string) { - out = strings.Replace(in, "_-_", "-", -1) - out = strings.Replace(out, "___", "_", -1) - return -} - -const containerNamePrefix = "k8s" - -// Creates a name which can be reversed to identify both manifest id and container name. -func buildDockerName(manifest *api.ContainerManifest, container *api.Container) string { - // Note, manifest.ID could be blank. - return fmt.Sprintf("%s--%s--%s--%08x", containerNamePrefix, escapeDash(container.Name), escapeDash(manifest.ID), rand.Uint32()) -} - -// Upacks a container name, returning the manifest id and container name we would have used to -// construct the docker name. If the docker name isn't one we created, we may return empty strings. -func parseDockerName(name string) (manifestID, containerName string) { - // For some reason docker appears to be appending '/' to names. - // If its there, strip it. - if name[0] == '/' { - name = name[1:] - } - parts := strings.Split(name, "--") - if len(parts) == 0 || parts[0] != containerNamePrefix { - return - } - if len(parts) > 1 { - containerName = unescapeDash(parts[1]) - } - if len(parts) > 2 { - manifestID = unescapeDash(parts[2]) - } - return -} - func makeEnvironmentVariables(container *api.Container) []string { var result []string for _, value := range container.Env { @@ -358,27 +274,13 @@ func makePortsAndBindings(container *api.Container) (map[docker.Port]struct{}, m return exposedPorts, portBindings } -// Parses image name including an tag and returns image name and tag -// TODO: Future Docker versions can parse the tag on daemon side, see: -// https://github.com/dotcloud/docker/issues/6876 -// So this can be deprecated at some point. -func parseImageName(image string) (string, string) { - tag := "" - parts := strings.SplitN(image, "/", 2) - repo := "" - if len(parts) == 2 { - repo = parts[0] - image = parts[1] +func milliCpuToShares(milliCpu int) int { + // Conceptually (milliCpu / milliCpuToCpu) * sharesPerCpu, but factored to improve rounding. + shares := (milliCpu * sharesPerCpu) / milliCpuToCpu + if shares < minShares { + return minShares } - parts = strings.SplitN(image, ":", 2) - if len(parts) == 2 { - image = parts[0] - tag = parts[1] - } - if repo != "" { - image = fmt.Sprintf("%s/%s", repo, image) - } - return image, tag + return shares } // Run a single container from a manifest. Returns the docker container ID @@ -390,13 +292,15 @@ func (kl *Kubelet) runContainer(manifest *api.ContainerManifest, container *api. opts := docker.CreateContainerOptions{ Name: buildDockerName(manifest, container), Config: &docker.Config{ + Cmd: container.Command, + Env: envVariables, + ExposedPorts: exposedPorts, Hostname: container.Name, Image: container.Image, - ExposedPorts: exposedPorts, - Env: envVariables, + Memory: int64(container.Memory), + CpuShares: int64(milliCpuToShares(container.CPU)), Volumes: volumes, WorkingDir: container.WorkingDir, - Cmd: container.Command, }, } dockerContainer, err := kl.DockerClient.CreateContainer(opts) @@ -735,7 +639,7 @@ func (kl *Kubelet) syncManifest(manifest *api.ContainerManifest, keepChannel cha glog.V(1).Infof("health check errored: %v", err) continue } - if healthy != CheckHealthy { + if healthy != health.Healthy { glog.V(1).Infof("manifest %s container %s is unhealthy.", manifest.ID, container.Name) if err != nil { glog.V(1).Infof("Failed to get container info %v, for %s", err, containerID) @@ -939,44 +843,29 @@ func (kl *Kubelet) getDockerIDFromPodIDAndContainerName(podID, containerName str return "", errors.New("couldn't find container") } +func getCadvisorContainerInfoRequest(req *info.ContainerInfoRequest) *info.ContainerInfoRequest { + ret := &info.ContainerInfoRequest{ + NumStats: req.NumStats, + CpuUsagePercentiles: req.CpuUsagePercentiles, + MemoryUsagePercentages: req.MemoryUsagePercentages, + } + return ret +} + // This method takes a container's absolute path and returns the stats for the // container. The container's absolute path refers to its hierarchy in the // cgroup file system. e.g. The root container, which represents the whole // machine, has path "/"; all docker containers have path "/docker/" -func (kl *Kubelet) statsFromContainerPath(containerPath string) (*api.ContainerStats, error) { - info, err := kl.CadvisorClient.ContainerInfo(containerPath) - +func (kl *Kubelet) statsFromContainerPath(containerPath string, req *info.ContainerInfoRequest) (*info.ContainerInfo, error) { + cinfo, err := kl.CadvisorClient.ContainerInfo(containerPath, getCadvisorContainerInfoRequest(req)) if err != nil { return nil, err } - // When the stats data for the container is not available yet. - if info.StatsPercentiles == nil { - return nil, nil - } - - ret := new(api.ContainerStats) - ret.MaxMemoryUsage = info.StatsPercentiles.MaxMemoryUsage - if len(info.StatsPercentiles.CpuUsagePercentiles) > 0 { - percentiles := make([]api.Percentile, len(info.StatsPercentiles.CpuUsagePercentiles)) - for i, p := range info.StatsPercentiles.CpuUsagePercentiles { - percentiles[i].Percentage = p.Percentage - percentiles[i].Value = p.Value - } - ret.CPUUsagePercentiles = percentiles - } - if len(info.StatsPercentiles.MemoryUsagePercentiles) > 0 { - percentiles := make([]api.Percentile, len(info.StatsPercentiles.MemoryUsagePercentiles)) - for i, p := range info.StatsPercentiles.MemoryUsagePercentiles { - percentiles[i].Percentage = p.Percentage - percentiles[i].Value = p.Value - } - ret.MemoryUsagePercentiles = percentiles - } - return ret, nil + return cinfo, nil } // GetContainerStats returns stats (from Cadvisor) for a container. -func (kl *Kubelet) GetContainerStats(podID, containerName string) (*api.ContainerStats, error) { +func (kl *Kubelet) GetContainerInfo(podID, containerName string, req *info.ContainerInfoRequest) (*info.ContainerInfo, error) { if kl.CadvisorClient == nil { return nil, nil } @@ -984,24 +873,24 @@ func (kl *Kubelet) GetContainerStats(podID, containerName string) (*api.Containe if err != nil || len(dockerID) == 0 { return nil, err } - return kl.statsFromContainerPath(fmt.Sprintf("/docker/%s", string(dockerID))) + return kl.statsFromContainerPath(fmt.Sprintf("/docker/%s", string(dockerID)), req) } // GetMachineStats returns stats (from Cadvisor) of current machine. -func (kl *Kubelet) GetMachineStats() (*api.ContainerStats, error) { - return kl.statsFromContainerPath("/") +func (kl *Kubelet) GetMachineStats(req *info.ContainerInfoRequest) (*info.ContainerInfo, error) { + return kl.statsFromContainerPath("/", req) } -func (kl *Kubelet) healthy(container api.Container, dockerContainer *docker.APIContainers) (HealthCheckStatus, error) { +func (kl *Kubelet) healthy(container api.Container, dockerContainer *docker.APIContainers) (health.Status, error) { // Give the container 60 seconds to start up. if container.LivenessProbe == nil { - return CheckHealthy, nil + return health.Healthy, nil } if time.Now().Unix()-dockerContainer.Created < container.LivenessProbe.InitialDelaySeconds { - return CheckHealthy, nil + return health.Healthy, nil } if kl.HealthChecker == nil { - return CheckHealthy, nil + return health.Healthy, nil } return kl.HealthChecker.HealthCheck(container) } diff --git a/pkg/kubelet/kubelet_server.go b/pkg/kubelet/kubelet_server.go index 8dfd68bb909..5789df17fc0 100644 --- a/pkg/kubelet/kubelet_server.go +++ b/pkg/kubelet/kubelet_server.go @@ -20,6 +20,7 @@ import ( "encoding/json" "errors" "fmt" + "io" "io/ioutil" "net/http" "net/url" @@ -27,7 +28,8 @@ import ( "strings" "github.com/GoogleCloudPlatform/kubernetes/pkg/api" - "github.com/GoogleCloudPlatform/kubernetes/pkg/apiserver" + "github.com/GoogleCloudPlatform/kubernetes/pkg/httplog" + "github.com/google/cadvisor/info" "gopkg.in/v1/yaml" ) @@ -41,8 +43,8 @@ type Server struct { // kubeletInterface contains all the kubelet methods required by the server. // For testablitiy. type kubeletInterface interface { - GetContainerStats(podID, containerName string) (*api.ContainerStats, error) - GetMachineStats() (*api.ContainerStats, error) + GetContainerInfo(podID, containerName string, req *info.ContainerInfoRequest) (*info.ContainerInfo, error) + GetMachineStats(req *info.ContainerInfoRequest) (*info.ContainerInfo, error) GetPodInfo(name string) (api.PodInfo, error) } @@ -51,13 +53,14 @@ func (s *Server) error(w http.ResponseWriter, err error) { } func (s *Server) ServeHTTP(w http.ResponseWriter, req *http.Request) { - defer apiserver.MakeLogged(req, &w).Log() + defer httplog.MakeLogged(req, &w).Log() u, err := url.ParseRequestURI(req.RequestURI) if err != nil { s.error(w, err) return } + // TODO: use an http.ServeMux instead of a switch. switch { case u.Path == "/container" || u.Path == "/containers": defer req.Body.Close() @@ -113,18 +116,25 @@ func (s *Server) ServeHTTP(w http.ResponseWriter, req *http.Request) { func (s *Server) serveStats(w http.ResponseWriter, req *http.Request) { // /stats// components := strings.Split(strings.TrimPrefix(path.Clean(req.URL.Path), "/"), "/") - var stats *api.ContainerStats + var stats *info.ContainerInfo var err error + var query info.ContainerInfoRequest + decoder := json.NewDecoder(req.Body) + err = decoder.Decode(&query) + if err != nil && err != io.EOF { + s.error(w, err) + return + } switch len(components) { case 1: // Machine stats - stats, err = s.Kubelet.GetMachineStats() + stats, err = s.Kubelet.GetMachineStats(&query) case 2: // pod stats // TODO(monnand) Implement this errors.New("pod level status currently unimplemented") case 3: - stats, err = s.Kubelet.GetContainerStats(components[1], components[2]) + stats, err = s.Kubelet.GetContainerInfo(components[1], components[2], &query) default: http.Error(w, "unknown resource.", http.StatusNotFound) return diff --git a/pkg/kubelet/kubelet_server_test.go b/pkg/kubelet/kubelet_server_test.go index 38ad6261392..fce6913be8e 100644 --- a/pkg/kubelet/kubelet_server_test.go +++ b/pkg/kubelet/kubelet_server_test.go @@ -29,24 +29,25 @@ import ( "github.com/GoogleCloudPlatform/kubernetes/pkg/api" "github.com/GoogleCloudPlatform/kubernetes/pkg/util" "github.com/fsouza/go-dockerclient" + "github.com/google/cadvisor/info" ) type fakeKubelet struct { infoFunc func(name string) (api.PodInfo, error) - containerStatsFunc func(podID, containerName string) (*api.ContainerStats, error) - machineStatsFunc func() (*api.ContainerStats, error) + containerStatsFunc func(podID, containerName string, req *info.ContainerInfoRequest) (*info.ContainerInfo, error) + machineStatsFunc func(query *info.ContainerInfoRequest) (*info.ContainerInfo, error) } func (fk *fakeKubelet) GetPodInfo(name string) (api.PodInfo, error) { return fk.infoFunc(name) } -func (fk *fakeKubelet) GetContainerStats(podID, containerName string) (*api.ContainerStats, error) { - return fk.containerStatsFunc(podID, containerName) +func (fk *fakeKubelet) GetContainerInfo(podID, containerName string, req *info.ContainerInfoRequest) (*info.ContainerInfo, error) { + return fk.containerStatsFunc(podID, containerName, req) } -func (fk *fakeKubelet) GetMachineStats() (*api.ContainerStats, error) { - return fk.machineStatsFunc() +func (fk *fakeKubelet) GetMachineStats(req *info.ContainerInfoRequest) (*info.ContainerInfo, error) { + return fk.machineStatsFunc(req) } type serverTestFramework struct { @@ -148,22 +149,24 @@ func TestPodInfo(t *testing.T) { func TestContainerStats(t *testing.T) { fw := makeServerTest() - expectedStats := &api.ContainerStats{ - MaxMemoryUsage: 1024001, - CPUUsagePercentiles: []api.Percentile{ - {50, 150}, - {80, 180}, - {90, 190}, - }, - MemoryUsagePercentiles: []api.Percentile{ - {50, 150}, - {80, 180}, - {90, 190}, + expectedStats := &info.ContainerInfo{ + StatsPercentiles: &info.ContainerStatsPercentiles{ + MaxMemoryUsage: 1024001, + CpuUsagePercentiles: []info.Percentile{ + {50, 150}, + {80, 180}, + {90, 190}, + }, + MemoryUsagePercentiles: []info.Percentile{ + {50, 150}, + {80, 180}, + {90, 190}, + }, }, } expectedPodID := "somepod" expectedContainerName := "goodcontainer" - fw.fakeKubelet.containerStatsFunc = func(podID, containerName string) (*api.ContainerStats, error) { + fw.fakeKubelet.containerStatsFunc = func(podID, containerName string, req *info.ContainerInfoRequest) (*info.ContainerInfo, error) { if podID != expectedPodID || containerName != expectedContainerName { return nil, fmt.Errorf("bad podID or containerName: podID=%v; containerName=%v", podID, containerName) } @@ -175,7 +178,7 @@ func TestContainerStats(t *testing.T) { t.Fatalf("Got error GETing: %v", err) } defer resp.Body.Close() - var receivedStats api.ContainerStats + var receivedStats info.ContainerInfo decoder := json.NewDecoder(resp.Body) err = decoder.Decode(&receivedStats) if err != nil { @@ -188,20 +191,22 @@ func TestContainerStats(t *testing.T) { func TestMachineStats(t *testing.T) { fw := makeServerTest() - expectedStats := &api.ContainerStats{ - MaxMemoryUsage: 1024001, - CPUUsagePercentiles: []api.Percentile{ - {50, 150}, - {80, 180}, - {90, 190}, - }, - MemoryUsagePercentiles: []api.Percentile{ - {50, 150}, - {80, 180}, - {90, 190}, + expectedStats := &info.ContainerInfo{ + StatsPercentiles: &info.ContainerStatsPercentiles{ + MaxMemoryUsage: 1024001, + CpuUsagePercentiles: []info.Percentile{ + {50, 150}, + {80, 180}, + {90, 190}, + }, + MemoryUsagePercentiles: []info.Percentile{ + {50, 150}, + {80, 180}, + {90, 190}, + }, }, } - fw.fakeKubelet.machineStatsFunc = func() (*api.ContainerStats, error) { + fw.fakeKubelet.machineStatsFunc = func(req *info.ContainerInfoRequest) (*info.ContainerInfo, error) { return expectedStats, nil } @@ -210,7 +215,7 @@ func TestMachineStats(t *testing.T) { t.Fatalf("Got error GETing: %v", err) } defer resp.Body.Close() - var receivedStats api.ContainerStats + var receivedStats info.ContainerInfo decoder := json.NewDecoder(resp.Body) err = decoder.Decode(&receivedStats) if err != nil { diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index 9f9e9a902b7..0d32062f734 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -26,6 +26,7 @@ import ( "testing" "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + "github.com/GoogleCloudPlatform/kubernetes/pkg/health" "github.com/GoogleCloudPlatform/kubernetes/pkg/tools" "github.com/GoogleCloudPlatform/kubernetes/pkg/util" "github.com/coreos/go-etcd/etcd" @@ -324,9 +325,7 @@ func TestGetKubeletStateFromEtcdNotFound(t *testing.T) { reader := startReading(channel) fakeClient.Data["/registry/hosts/machine/kubelet"] = tools.EtcdResponseWithError{ R: &etcd.Response{}, - E: &etcd.EtcdError{ - ErrorCode: 100, - }, + E: tools.EtcdErrorNotFound, } err := kubelet.getKubeletStateFromEtcd("/registry/hosts/machine/kubelet", channel) expectNoError(t, err) @@ -424,8 +423,8 @@ func TestSyncManifestsDeletes(t *testing.T) { type FalseHealthChecker struct{} -func (f *FalseHealthChecker) HealthCheck(container api.Container) (HealthCheckStatus, error) { - return CheckUnhealthy, nil +func (f *FalseHealthChecker) HealthCheck(container api.Container) (health.Status, error) { + return health.Unhealthy, nil } func TestSyncManifestsUnhealthy(t *testing.T) { @@ -913,8 +912,8 @@ type mockCadvisorClient struct { } // ContainerInfo is a mock implementation of CadvisorInterface.ContainerInfo. -func (c *mockCadvisorClient) ContainerInfo(name string) (*info.ContainerInfo, error) { - args := c.Called(name) +func (c *mockCadvisorClient) ContainerInfo(name string, req *info.ContainerInfoRequest) (*info.ContainerInfo, error) { + args := c.Called(name, req) return args.Get(0).(*info.ContainerInfo), args.Error(1) } @@ -926,7 +925,7 @@ func (c *mockCadvisorClient) MachineInfo() (*info.MachineInfo, error) { func areSamePercentiles( cadvisorPercentiles []info.Percentile, - kubePercentiles []api.Percentile, + kubePercentiles []info.Percentile, t *testing.T, ) { if len(cadvisorPercentiles) != len(kubePercentiles) { @@ -966,7 +965,7 @@ func TestGetContainerStats(t *testing.T) { {80, 180}, {90, 190}, }, - CPUUsagePercentiles: []info.Percentile{ + CpuUsagePercentiles: []info.Percentile{ {51, 101}, {81, 181}, {91, 191}, @@ -975,7 +974,9 @@ func TestGetContainerStats(t *testing.T) { } mockCadvisor := &mockCadvisorClient{} - mockCadvisor.On("ContainerInfo", containerPath).Return(containerInfo, nil) + req := &info.ContainerInfoRequest{} + cadvisorReq := getCadvisorContainerInfoRequest(req) + mockCadvisor.On("ContainerInfo", containerPath, cadvisorReq).Return(containerInfo, nil) kubelet, _, fakeDocker := makeTestKubelet(t) kubelet.CadvisorClient = mockCadvisor @@ -988,15 +989,15 @@ func TestGetContainerStats(t *testing.T) { }, } - stats, err := kubelet.GetContainerStats("qux", "foo") + stats, err := kubelet.GetContainerInfo("qux", "foo", req) if err != nil { t.Errorf("unexpected error: %v", err) } - if stats.MaxMemoryUsage != containerInfo.StatsPercentiles.MaxMemoryUsage { + if stats.StatsPercentiles.MaxMemoryUsage != containerInfo.StatsPercentiles.MaxMemoryUsage { t.Errorf("wrong max memory usage") } - areSamePercentiles(containerInfo.StatsPercentiles.CpuUsagePercentiles, stats.CPUUsagePercentiles, t) - areSamePercentiles(containerInfo.StatsPercentiles.MemoryUsagePercentiles, stats.MemoryUsagePercentiles, t) + areSamePercentiles(containerInfo.StatsPercentiles.CpuUsagePercentiles, stats.StatsPercentiles.CpuUsagePercentiles, t) + areSamePercentiles(containerInfo.StatsPercentiles.MemoryUsagePercentiles, stats.StatsPercentiles.MemoryUsagePercentiles, t) mockCadvisor.AssertExpectations(t) } @@ -1020,7 +1021,9 @@ func TestGetMachineStats(t *testing.T) { } mockCadvisor := &mockCadvisorClient{} - mockCadvisor.On("ContainerInfo", containerPath).Return(containerInfo, nil) + req := &info.ContainerInfoRequest{} + cadvisorReq := getCadvisorContainerInfoRequest(req) + mockCadvisor.On("ContainerInfo", containerPath, cadvisorReq).Return(containerInfo, nil) kubelet := Kubelet{ DockerClient: &fakeDocker, @@ -1029,15 +1032,15 @@ func TestGetMachineStats(t *testing.T) { } // If the container name is an empty string, then it means the root container. - stats, err := kubelet.GetMachineStats() + stats, err := kubelet.GetMachineStats(req) if err != nil { t.Errorf("unexpected error: %v", err) } - if stats.MaxMemoryUsage != containerInfo.StatsPercentiles.MaxMemoryUsage { + if stats.StatsPercentiles.MaxMemoryUsage != containerInfo.StatsPercentiles.MaxMemoryUsage { t.Errorf("wrong max memory usage") } - areSamePercentiles(containerInfo.StatsPercentiles.CpuUsagePercentiles, stats.CPUUsagePercentiles, t) - areSamePercentiles(containerInfo.StatsPercentiles.MemoryUsagePercentiles, stats.MemoryUsagePercentiles, t) + areSamePercentiles(containerInfo.StatsPercentiles.CpuUsagePercentiles, stats.StatsPercentiles.CpuUsagePercentiles, t) + areSamePercentiles(containerInfo.StatsPercentiles.MemoryUsagePercentiles, stats.StatsPercentiles.MemoryUsagePercentiles, t) mockCadvisor.AssertExpectations(t) } @@ -1052,19 +1055,19 @@ func TestGetContainerStatsWithoutCadvisor(t *testing.T) { }, } - stats, _ := kubelet.GetContainerStats("qux", "foo") + stats, _ := kubelet.GetContainerInfo("qux", "foo", nil) // When there's no cAdvisor, the stats should be either nil or empty if stats == nil { return } - if stats.MaxMemoryUsage != 0 { - t.Errorf("MaxMemoryUsage is %v even if there's no cadvisor", stats.MaxMemoryUsage) + if stats.StatsPercentiles.MaxMemoryUsage != 0 { + t.Errorf("MaxMemoryUsage is %v even if there's no cadvisor", stats.StatsPercentiles.MaxMemoryUsage) } - if len(stats.CPUUsagePercentiles) > 0 { - t.Errorf("Cpu usage percentiles is not empty (%+v) even if there's no cadvisor", stats.CPUUsagePercentiles) + if len(stats.StatsPercentiles.CpuUsagePercentiles) > 0 { + t.Errorf("Cpu usage percentiles is not empty (%+v) even if there's no cadvisor", stats.StatsPercentiles.CpuUsagePercentiles) } - if len(stats.MemoryUsagePercentiles) > 0 { - t.Errorf("Memory usage percentiles is not empty (%+v) even if there's no cadvisor", stats.MemoryUsagePercentiles) + if len(stats.StatsPercentiles.MemoryUsagePercentiles) > 0 { + t.Errorf("Memory usage percentiles is not empty (%+v) even if there's no cadvisor", stats.StatsPercentiles.MemoryUsagePercentiles) } } @@ -1074,8 +1077,10 @@ func TestGetContainerStatsWhenCadvisorFailed(t *testing.T) { containerInfo := &info.ContainerInfo{} mockCadvisor := &mockCadvisorClient{} + req := &info.ContainerInfoRequest{} + cadvisorReq := getCadvisorContainerInfoRequest(req) expectedErr := fmt.Errorf("some error") - mockCadvisor.On("ContainerInfo", containerPath).Return(containerInfo, expectedErr) + mockCadvisor.On("ContainerInfo", containerPath, cadvisorReq).Return(containerInfo, expectedErr) kubelet, _, fakeDocker := makeTestKubelet(t) kubelet.CadvisorClient = mockCadvisor @@ -1088,7 +1093,7 @@ func TestGetContainerStatsWhenCadvisorFailed(t *testing.T) { }, } - stats, err := kubelet.GetContainerStats("qux", "foo") + stats, err := kubelet.GetContainerInfo("qux", "foo", req) if stats != nil { t.Errorf("non-nil stats on error") } @@ -1109,7 +1114,7 @@ func TestGetContainerStatsOnNonExistContainer(t *testing.T) { kubelet.CadvisorClient = mockCadvisor fakeDocker.containerList = []docker.APIContainers{} - stats, _ := kubelet.GetContainerStats("qux", "foo") + stats, _ := kubelet.GetContainerInfo("qux", "foo", nil) if stats != nil { t.Errorf("non-nil stats on non exist container") } diff --git a/pkg/labels/labels.go b/pkg/labels/labels.go index 351289ea639..8c1e050f876 100644 --- a/pkg/labels/labels.go +++ b/pkg/labels/labels.go @@ -23,14 +23,15 @@ import ( // Labels allows you to present labels independently from their storage. type Labels interface { + // Get returns the value for the provided label. Get(label string) (value string) } -// A map of label:value. Implements Labels. +// Set is a map of label:value. It implements Labels. type Set map[string]string -// All labels listed as a human readable string. Conveniently, exactly the format -// that ParseSelector takes. +// String returns all labels listed as a human readable string. +// Conveniently, exactly the format that ParseSelector takes. func (ls Set) String() string { selector := make([]string, 0, len(ls)) for key, value := range ls { @@ -41,12 +42,12 @@ func (ls Set) String() string { return strings.Join(selector, ",") } -// Implement Labels interface. +// Get returns the value in the map for the provided label. func (ls Set) Get(label string) string { return ls[label] } -// Convenience function: convert these labels to a selector. +// AsSelector converts labels into a selectors. func (ls Set) AsSelector() Selector { return SelectorFromSet(ls) } diff --git a/pkg/labels/selector.go b/pkg/labels/selector.go index ac9c534d921..42adf008acd 100644 --- a/pkg/labels/selector.go +++ b/pkg/labels/selector.go @@ -22,12 +22,12 @@ import ( "strings" ) -// Represents a selector. +// Selector represents a label selector. type Selector interface { - // Returns true if this selector matches the given set of labels. + // Matches returns true if this selector matches the given set of labels. Matches(Labels) bool - // Prints a human readable version of this selector. + // String returns a human readable string that represents this selector. String() string } @@ -87,7 +87,7 @@ func try(selectorPiece, op string) (lhs, rhs string, ok bool) { return "", "", false } -// Given a Set, return a Selector which will match exactly that Set. +// SelectorFromSet returns a Selector which will match exactly the given Set. func SelectorFromSet(ls Set) Selector { items := make([]Selector, 0, len(ls)) for label, value := range ls { @@ -99,7 +99,8 @@ func SelectorFromSet(ls Set) Selector { return andTerm(items) } -// Takes a string repsenting a selector and returns an object suitable for matching, or an error. +// ParseSelector takes a string repsenting a selector and returns an +// object suitable for matching, or an error. func ParseSelector(selector string) (Selector, error) { parts := strings.Split(selector, ",") sort.StringSlice(parts).Sort() diff --git a/pkg/master/master.go b/pkg/master/master.go index 74c40d0081b..e12fc7f28ec 100644 --- a/pkg/master/master.go +++ b/pkg/master/master.go @@ -43,7 +43,7 @@ type Master struct { storage map[string]apiserver.RESTStorage } -// Returns a memory (not etcd) backed apiserver. +// NewMemoryServer returns a new instance of Master backed with memory (not etcd). func NewMemoryServer(minions []string, podInfoGetter client.PodInfoGetter, cloud cloudprovider.Interface) *Master { m := &Master{ podRegistry: registry.MakeMemoryRegistry(), @@ -55,7 +55,7 @@ func NewMemoryServer(minions []string, podInfoGetter client.PodInfoGetter, cloud return m } -// Returns a new apiserver. +// New returns a new instance of Master connected to the given etcdServer. func New(etcdServers, minions []string, podInfoGetter client.PodInfoGetter, cloud cloudprovider.Interface, minionRegexp string) *Master { etcdClient := etcd.NewClient(etcdServers) minionRegistry := minionRegistryMaker(minions, cloud, minionRegexp) @@ -84,7 +84,7 @@ func (m *Master) init(cloud cloudprovider.Interface, podInfoGetter client.PodInf m.random = rand.New(rand.NewSource(int64(time.Now().Nanosecond()))) podCache := NewPodCache(podInfoGetter, m.podRegistry, time.Second*30) go podCache.Loop() - s := scheduler.MakeRandomFitScheduler(m.podRegistry, m.random) + s := scheduler.NewRandomFitScheduler(m.podRegistry, m.random) m.storage = map[string]apiserver.RESTStorage{ "pods": registry.MakePodRegistryStorage(m.podRegistry, podInfoGetter, s, m.minionRegistry, cloud, podCache), "replicationControllers": registry.NewControllerRegistryStorage(m.controllerRegistry, m.podRegistry), @@ -94,7 +94,7 @@ func (m *Master) init(cloud cloudprovider.Interface, podInfoGetter client.PodInf } -// Runs master. Never returns. +// Run begins serving the Kubernetes API. It never returns. func (m *Master) Run(myAddress, apiPrefix string) error { endpoints := registry.MakeEndpointController(m.serviceRegistry, m.podRegistry) go util.Forever(func() { endpoints.SyncServiceEndpoints() }, time.Second*10) @@ -109,8 +109,9 @@ func (m *Master) Run(myAddress, apiPrefix string) error { return s.ListenAndServe() } -// Instead of calling Run, call ConstructHandler to get a handler for your own -// server. Intended for testing. Only call once. +// ConstructHandler returns an http.Handler which serves the Kubernetes API. +// Instead of calling Run, you can call this function to get a handler for your own server. +// It is intended for testing. Only call once. func (m *Master) ConstructHandler(apiPrefix string) http.Handler { endpoints := registry.MakeEndpointController(m.serviceRegistry, m.podRegistry) go util.Forever(func() { endpoints.SyncServiceEndpoints() }, time.Second*10) diff --git a/pkg/master/pod_cache.go b/pkg/master/pod_cache.go index 14416af2ba5..c6be9a9f9dd 100644 --- a/pkg/master/pod_cache.go +++ b/pkg/master/pod_cache.go @@ -40,6 +40,7 @@ type PodCache struct { podLock sync.Mutex } +// NewPodCache returns a new PodCache which watches container information registered in the given PodRegistry. func NewPodCache(info client.PodInfoGetter, pods registry.PodRegistry, period time.Duration) *PodCache { return &PodCache{ containerInfo: info, @@ -49,7 +50,7 @@ func NewPodCache(info client.PodInfoGetter, pods registry.PodRegistry, period ti } } -// Implements the PodInfoGetter interface. +// GetPodInfo Implements the PodInfoGetter.GetPodInfo. // The returned value should be treated as read-only. func (p *PodCache) GetPodInfo(host, podID string) (api.PodInfo, error) { p.podLock.Lock() @@ -57,9 +58,8 @@ func (p *PodCache) GetPodInfo(host, podID string) (api.PodInfo, error) { value, ok := p.podInfo[podID] if !ok { return nil, errors.New("no cached pod info") - } else { - return value, nil } + return value, nil } func (p *PodCache) updatePodInfo(host, id string) error { @@ -73,7 +73,7 @@ func (p *PodCache) updatePodInfo(host, id string) error { return nil } -// Update information about all containers. Either called by Loop() below, or one-off. +// UpdateAllContainers updates information about all containers. Either called by Loop() below, or one-off. func (p *PodCache) UpdateAllContainers() { pods, err := p.pods.ListPods(labels.Everything()) if err != nil { @@ -88,7 +88,8 @@ func (p *PodCache) UpdateAllContainers() { } } -// Loop runs forever, it is expected to be placed in a go routine. +// Loop begins watching updates of container information. +// It runs forever, and is expected to be placed in a go routine. func (p *PodCache) Loop() { util.Forever(func() { p.UpdateAllContainers() }, p.period) } diff --git a/pkg/proxy/config/config.go b/pkg/proxy/config/config.go index 99db95f9b06..7f17d71d6f5 100644 --- a/pkg/proxy/config/config.go +++ b/pkg/proxy/config/config.go @@ -24,40 +24,44 @@ import ( "github.com/golang/glog" ) +// Operation is a type of operation of services or endpoints. type Operation int +// These are the available operation types. const ( SET Operation = iota ADD REMOVE ) -// Defines an operation sent on the channel. You can add or remove single services by -// sending an array of size one and Op == ADD|REMOVE. For setting the state of the system -// to a given state for this source configuration, set Services as desired and Op to SET, -// which will reset the system state to that specified in this operation for this source -// channel. To remove all services, set Services to empty array and Op to SET +// ServiceUpdate describes an operation of services, sent on the channel. +// You can add or remove single services by sending an array of size one and Op == ADD|REMOVE. +// For setting the state of the system to a given state for this source configuration, set Services as desired and Op to SET, +// which will reset the system state to that specified in this operation for this source channel. +// To remove all services, set Services to empty array and Op to SET type ServiceUpdate struct { Services []api.Service Op Operation } -// Defines an operation sent on the channel. You can add or remove single endpoints by -// sending an array of size one and Op == ADD|REMOVE. For setting the state of the system -// to a given state for this source configuration, set Endpoints as desired and Op to SET, -// which will reset the system state to that specified in this operation for this source -// channel. To remove all endpoints, set Endpoints to empty array and Op to SET +// EndpointsUpdate describes an operation of endpoints, sent on the channel. +// You can add or remove single endpoints by sending an array of size one and Op == ADD|REMOVE. +// For setting the state of the system to a given state for this source configuration, set Endpoints as desired and Op to SET, +// which will reset the system state to that specified in this operation for this source channel. +// To remove all endpoints, set Endpoints to empty array and Op to SET type EndpointsUpdate struct { Endpoints []api.Endpoints Op Operation } +// ServiceConfigHandler is an abstract interface of objects which receive update notifications for the set of services. type ServiceConfigHandler interface { - // Sent when a configuration has been changed by one of the sources. This is the - // union of all the configuration sources. + // OnUpdate gets called when a configuration has been changed by one of the sources. + // This is the union of all the configuration sources. OnUpdate(services []api.Service) } +// EndpointsConfigHandler is an abstract interface of objects which receive update notifications for the set of endpoints. type EndpointsConfigHandler interface { // OnUpdate gets called when endpoints configuration is changed for a given // service on any of the configuration sources. An example is when a new @@ -65,6 +69,8 @@ type EndpointsConfigHandler interface { OnUpdate(endpoints []api.Endpoints) } +// ServiceConfig tracks a set of service configurations and their endpoint configurations. +// It accepts "set", "add" and "remove" operations of services and endpoints via channels, and invokes registered handlers on change. type ServiceConfig struct { // Configuration sources and their lock. configSourceLock sync.RWMutex @@ -94,6 +100,8 @@ type ServiceConfig struct { endpointsNotifyChannel chan string } +// NewServiceConfig creates a new ServiceConfig. +// It immediately runs the created ServiceConfig. func NewServiceConfig() *ServiceConfig { config := &ServiceConfig{ serviceConfigSources: make(map[string]chan ServiceUpdate), @@ -109,22 +117,26 @@ func NewServiceConfig() *ServiceConfig { return config } +// Run begins a loop to accept new service configurations and new endpoint configurations. +// It never returns. func (impl *ServiceConfig) Run() { glog.Infof("Starting the config Run loop") for { select { case source := <-impl.serviceNotifyChannel: glog.Infof("Got new service configuration from source %s", source) - impl.NotifyServiceUpdate() + impl.notifyServiceUpdate() case source := <-impl.endpointsNotifyChannel: glog.Infof("Got new endpoint configuration from source %s", source) - impl.NotifyEndpointsUpdate() + impl.notifyEndpointsUpdate() case <-time.After(1 * time.Second): } } } -func (impl *ServiceConfig) ServiceChannelListener(source string, listenChannel chan ServiceUpdate) { +// serviceChannelListener begins a loop to handle incoming ServiceUpdate notifications from the channel. +// It never returns. +func (impl *ServiceConfig) serviceChannelListener(source string, listenChannel chan ServiceUpdate) { // Represents the current services configuration for this channel. serviceMap := make(map[string]api.Service) for { @@ -160,7 +172,9 @@ func (impl *ServiceConfig) ServiceChannelListener(source string, listenChannel c } } -func (impl *ServiceConfig) EndpointsChannelListener(source string, listenChannel chan EndpointsUpdate) { +// endpointsChannelListener begins a loop to handle incoming EndpointsUpdate notifications from the channel. +// It never returns. +func (impl *ServiceConfig) endpointsChannelListener(source string, listenChannel chan EndpointsUpdate) { endpointMap := make(map[string]api.Endpoints) for { select { @@ -214,11 +228,11 @@ func (impl *ServiceConfig) GetServiceConfigurationChannel(source string) chan Se } newChannel := make(chan ServiceUpdate) impl.serviceConfigSources[source] = newChannel - go impl.ServiceChannelListener(source, newChannel) + go impl.serviceChannelListener(source, newChannel) return newChannel } -// GetEndpointConfigurationChannel returns a channel where a configuration source +// GetEndpointsConfigurationChannel returns a channel where a configuration source // can send updates of new endpoint configurations. Multiple calls with the same // source will return the same channel. This allows change and state based sources // to use the same channel. Difference source names however will be treated as a @@ -235,11 +249,11 @@ func (impl *ServiceConfig) GetEndpointsConfigurationChannel(source string) chan } newChannel := make(chan EndpointsUpdate) impl.endpointsConfigSources[source] = newChannel - go impl.EndpointsChannelListener(source, newChannel) + go impl.endpointsChannelListener(source, newChannel) return newChannel } -// Register ServiceConfigHandler to receive updates of changes to services. +// RegisterServiceHandler registers the ServiceConfigHandler to receive updates of changes to services. func (impl *ServiceConfig) RegisterServiceHandler(handler ServiceConfigHandler) { impl.handlerLock.Lock() defer impl.handlerLock.Unlock() @@ -255,7 +269,7 @@ func (impl *ServiceConfig) RegisterServiceHandler(handler ServiceConfigHandler) panic("Only up to 10 service handlers supported for now") } -// Register ServiceConfigHandler to receive updates of changes to services. +// RegisterEndpointsHandler registers the EndpointsConfigHandler to receive updates of changes to services. func (impl *ServiceConfig) RegisterEndpointsHandler(handler EndpointsConfigHandler) { impl.handlerLock.Lock() defer impl.handlerLock.Unlock() @@ -271,8 +285,9 @@ func (impl *ServiceConfig) RegisterEndpointsHandler(handler EndpointsConfigHandl panic("Only up to 10 endpoint handlers supported for now") } -func (impl *ServiceConfig) NotifyServiceUpdate() { - services := make([]api.Service, 0) +// notifyServiceUpdate calls the registered ServiceConfigHandlers with the current states of services. +func (impl *ServiceConfig) notifyServiceUpdate() { + services := []api.Service{} impl.configLock.RLock() for _, sourceServices := range impl.serviceConfig { for _, value := range sourceServices { @@ -291,8 +306,9 @@ func (impl *ServiceConfig) NotifyServiceUpdate() { } } -func (impl *ServiceConfig) NotifyEndpointsUpdate() { - endpoints := make([]api.Endpoints, 0) +// notifyEndpointsUpdate calls the registered EndpointsConfigHandlers with the current states of endpoints. +func (impl *ServiceConfig) notifyEndpointsUpdate() { + endpoints := []api.Endpoints{} impl.configLock.RLock() for _, sourceEndpoints := range impl.endpointConfig { for _, value := range sourceEndpoints { diff --git a/pkg/proxy/config/etcd.go b/pkg/proxy/config/etcd.go index 5b536f4e9ca..89488fca4f2 100644 --- a/pkg/proxy/config/etcd.go +++ b/pkg/proxy/config/etcd.go @@ -15,7 +15,7 @@ limitations under the License. */ // Watches etcd and gets the full configuration on preset intervals. -// Expects the list of exposed services to live under: +// It expects the list of exposed services to live under: // registry/services // which in etcd is exposed like so: // http:///v2/keys/registry/services @@ -30,7 +30,7 @@ limitations under the License. // '[ { "machine": , "name": }, // { "machine": , "name": } // ]', -// + package config import ( @@ -44,14 +44,17 @@ import ( "github.com/golang/glog" ) -const RegistryRoot = "registry/services" +// registryRoot is the key prefix for service configs in etcd. +const registryRoot = "registry/services" +// ConfigSourceEtcd communicates with a etcd via the client, and sends the change notification of services and endpoints to the specified channels. type ConfigSourceEtcd struct { client *etcd.Client serviceChannel chan ServiceUpdate endpointsChannel chan EndpointsUpdate } +// NewConfigSourceEtcd creates a new ConfigSourceEtcd and immediately runs the created ConfigSourceEtcd in a goroutine. func NewConfigSourceEtcd(client *etcd.Client, serviceChannel chan ServiceUpdate, endpointsChannel chan EndpointsUpdate) ConfigSourceEtcd { config := ConfigSourceEtcd{ client: client, @@ -62,13 +65,14 @@ func NewConfigSourceEtcd(client *etcd.Client, serviceChannel chan ServiceUpdate, return config } +// Run begins watching for new services and their endpoints on etcd. func (impl ConfigSourceEtcd) Run() { // Initially, just wait for the etcd to come up before doing anything more complicated. var services []api.Service var endpoints []api.Endpoints var err error for { - services, endpoints, err = impl.GetServices() + services, endpoints, err = impl.getServices() if err == nil { break } @@ -87,10 +91,10 @@ func (impl ConfigSourceEtcd) Run() { // Ok, so we got something back from etcd. Let's set up a watch for new services, and // their endpoints - go impl.WatchForChanges() + go impl.watchForChanges() for { - services, endpoints, err = impl.GetServices() + services, endpoints, err = impl.getServices() if err != nil { glog.Errorf("ConfigSourceEtcd: Failed to get services: %v", err) } else { @@ -107,12 +111,12 @@ func (impl ConfigSourceEtcd) Run() { } } -// Finds the list of services and their endpoints from etcd. +// getServices finds the list of services and their endpoints from etcd. // This operation is akin to a set a known good at regular intervals. -func (impl ConfigSourceEtcd) GetServices() ([]api.Service, []api.Endpoints, error) { - response, err := impl.client.Get(RegistryRoot+"/specs", true, false) +func (impl ConfigSourceEtcd) getServices() ([]api.Service, []api.Endpoints, error) { + response, err := impl.client.Get(registryRoot+"/specs", true, false) if err != nil { - glog.Errorf("Failed to get the key %s: %v", RegistryRoot, err) + glog.Errorf("Failed to get the key %s: %v", registryRoot, err) return make([]api.Service, 0), make([]api.Endpoints, 0), err } if response.Node.Dir == true { @@ -129,7 +133,7 @@ func (impl ConfigSourceEtcd) GetServices() ([]api.Service, []api.Endpoints, erro continue } retServices[i] = svc - endpoints, err := impl.GetEndpoints(svc.ID) + endpoints, err := impl.getEndpoints(svc.ID) if err != nil { glog.Errorf("Couldn't get endpoints for %s : %v skipping", svc.ID, err) } @@ -138,23 +142,23 @@ func (impl ConfigSourceEtcd) GetServices() ([]api.Service, []api.Endpoints, erro } return retServices, retEndpoints, err } - return nil, nil, fmt.Errorf("did not get the root of the registry %s", RegistryRoot) + return nil, nil, fmt.Errorf("did not get the root of the registry %s", registryRoot) } -func (impl ConfigSourceEtcd) GetEndpoints(service string) (api.Endpoints, error) { - key := fmt.Sprintf(RegistryRoot + "/endpoints/" + service) +// getEndpoints finds the list of endpoints of the service from etcd. +func (impl ConfigSourceEtcd) getEndpoints(service string) (api.Endpoints, error) { + key := fmt.Sprintf(registryRoot + "/endpoints/" + service) response, err := impl.client.Get(key, true, false) if err != nil { glog.Errorf("Failed to get the key: %s %v", key, err) return api.Endpoints{}, err } // Parse all the endpoint specifications in this value. - return ParseEndpoints(response.Node.Value) + return parseEndpoints(response.Node.Value) } -// EtcdResponseToServiceAndLocalport takes an etcd response and pulls it apart to find -// service -func EtcdResponseToService(response *etcd.Response) (*api.Service, error) { +// etcdResponseToService takes an etcd response and pulls it apart to find service. +func etcdResponseToService(response *etcd.Response) (*api.Service, error) { if response.Node == nil { return nil, fmt.Errorf("invalid response from etcd: %#v", response) } @@ -166,31 +170,31 @@ func EtcdResponseToService(response *etcd.Response) (*api.Service, error) { return &svc, err } -func ParseEndpoints(jsonString string) (api.Endpoints, error) { +func parseEndpoints(jsonString string) (api.Endpoints, error) { var e api.Endpoints err := json.Unmarshal([]byte(jsonString), &e) return e, err } -func (impl ConfigSourceEtcd) WatchForChanges() { +func (impl ConfigSourceEtcd) watchForChanges() { glog.Info("Setting up a watch for new services") watchChannel := make(chan *etcd.Response) go impl.client.Watch("/registry/services/", 0, true, watchChannel, nil) for { watchResponse := <-watchChannel - impl.ProcessChange(watchResponse) + impl.processChange(watchResponse) } } -func (impl ConfigSourceEtcd) ProcessChange(response *etcd.Response) { +func (impl ConfigSourceEtcd) processChange(response *etcd.Response) { glog.Infof("Processing a change in service configuration... %s", *response) // If it's a new service being added (signified by a localport being added) // then process it as such if strings.Contains(response.Node.Key, "/endpoints/") { - impl.ProcessEndpointResponse(response) + impl.processEndpointResponse(response) } else if response.Action == "set" { - service, err := EtcdResponseToService(response) + service, err := etcdResponseToService(response) if err != nil { glog.Errorf("Failed to parse %s Port: %s", response, err) return @@ -208,13 +212,12 @@ func (impl ConfigSourceEtcd) ProcessChange(response *etcd.Response) { serviceUpdate := ServiceUpdate{Op: REMOVE, Services: []api.Service{{JSONBase: api.JSONBase{ID: parts[3]}}}} impl.serviceChannel <- serviceUpdate return - } else { - glog.Infof("Unknown service delete: %#v", parts) } + glog.Infof("Unknown service delete: %#v", parts) } } -func (impl ConfigSourceEtcd) ProcessEndpointResponse(response *etcd.Response) { +func (impl ConfigSourceEtcd) processEndpointResponse(response *etcd.Response) { glog.Infof("Processing a change in endpoint configuration... %s", *response) var endpoints api.Endpoints err := json.Unmarshal([]byte(response.Node.Value), &endpoints) diff --git a/pkg/proxy/config/etcd_test.go b/pkg/proxy/config/etcd_test.go index 7c7a832f0df..9b1dafbcccb 100644 --- a/pkg/proxy/config/etcd_test.go +++ b/pkg/proxy/config/etcd_test.go @@ -26,15 +26,15 @@ import ( const TomcatContainerEtcdKey = "/registry/services/tomcat/endpoints/tomcat-3bd5af34" const TomcatService = "tomcat" -const TomcatContainerId = "tomcat-3bd5af34" +const TomcatContainerID = "tomcat-3bd5af34" -func ValidateJsonParsing(t *testing.T, jsonString string, expectedEndpoints api.Endpoints, expectError bool) { - endpoints, err := ParseEndpoints(jsonString) +func validateJSONParsing(t *testing.T, jsonString string, expectedEndpoints api.Endpoints, expectError bool) { + endpoints, err := parseEndpoints(jsonString) if err == nil && expectError { - t.Errorf("ValidateJsonParsing did not get expected error when parsing %s", jsonString) + t.Errorf("validateJSONParsing did not get expected error when parsing %s", jsonString) } if err != nil && !expectError { - t.Errorf("ValidateJsonParsing got unexpected error %+v when parsing %s", err, jsonString) + t.Errorf("validateJSONParsing got unexpected error %+v when parsing %s", err, jsonString) } if !reflect.DeepEqual(expectedEndpoints, endpoints) { t.Errorf("Didn't get expected endpoints %+v got: %+v", expectedEndpoints, endpoints) @@ -42,7 +42,7 @@ func ValidateJsonParsing(t *testing.T, jsonString string, expectedEndpoints api. } func TestParseJsonEndpoints(t *testing.T) { - ValidateJsonParsing(t, "", api.Endpoints{}, true) + validateJSONParsing(t, "", api.Endpoints{}, true) endpoints := api.Endpoints{ Name: "foo", Endpoints: []string{"foo", "bar", "baz"}, @@ -51,6 +51,6 @@ func TestParseJsonEndpoints(t *testing.T) { if err != nil { t.Errorf("Unexpected error: %#v", err) } - ValidateJsonParsing(t, string(data), endpoints, false) - // ValidateJsonParsing(t, "[{\"port\":8000,\"name\":\"mysql\",\"machine\":\"foo\"},{\"port\":9000,\"name\":\"mysql\",\"machine\":\"bar\"}]", []string{"foo:8000", "bar:9000"}, false) + validateJSONParsing(t, string(data), endpoints, false) + // validateJSONParsing(t, "[{\"port\":8000,\"name\":\"mysql\",\"machine\":\"foo\"},{\"port\":9000,\"name\":\"mysql\",\"machine\":\"bar\"}]", []string{"foo:8000", "bar:9000"}, false) } diff --git a/pkg/proxy/config/file.go b/pkg/proxy/config/file.go index 65a1b9705bf..ac6810bdec3 100644 --- a/pkg/proxy/config/file.go +++ b/pkg/proxy/config/file.go @@ -28,6 +28,7 @@ limitations under the License. // } //] //} + package config import ( @@ -41,22 +42,23 @@ import ( "github.com/golang/glog" ) -// TODO: kill this struct. -type ServiceJSON struct { - Name string - Port int - Endpoints []string -} -type ConfigFile struct { - Services []ServiceJSON +// serviceConfig is a deserialized form of the config file format which ConfigSourceFile accepts. +type serviceConfig struct { + Services []struct { + Name string `json: "name"` + Port int `json: "port"` + Endpoints []string `json: "endpoints"` + } `json: "service"` } +// ConfigSourceFile periodically reads service configurations in JSON from a file, and sends the services and endpoints defined in the file to the specified channels. type ConfigSourceFile struct { serviceChannel chan ServiceUpdate endpointsChannel chan EndpointsUpdate filename string } +// NewConfigSourceFile creates a new ConfigSourceFile and let it immediately runs the created ConfigSourceFile in a goroutine. func NewConfigSourceFile(filename string, serviceChannel chan ServiceUpdate, endpointsChannel chan EndpointsUpdate) ConfigSourceFile { config := ConfigSourceFile{ filename: filename, @@ -67,6 +69,7 @@ func NewConfigSourceFile(filename string, serviceChannel chan ServiceUpdate, end return config } +// Run begins watching the config file. func (impl ConfigSourceFile) Run() { glog.Infof("Watching file %s", impl.filename) var lastData []byte @@ -85,7 +88,7 @@ func (impl ConfigSourceFile) Run() { } lastData = data - config := new(ConfigFile) + config := &serviceConfig{} if err = json.Unmarshal(data, config); err != nil { glog.Errorf("Couldn't unmarshal configuration from file : %s %v", data, err) continue diff --git a/pkg/proxy/doc.go b/pkg/proxy/doc.go index e29caf01efe..7e209b66503 100644 --- a/pkg/proxy/doc.go +++ b/pkg/proxy/doc.go @@ -14,5 +14,5 @@ See the License for the specific language governing permissions and limitations under the License. */ -// Package proxy implements the layer-3 network proxy +// Package proxy implements the layer-3 network proxy. package proxy diff --git a/pkg/proxy/loadbalancer.go b/pkg/proxy/loadbalancer.go index 6481ee30bfd..6771d8bfcc6 100644 --- a/pkg/proxy/loadbalancer.go +++ b/pkg/proxy/loadbalancer.go @@ -22,6 +22,8 @@ import ( "net" ) +// LoadBalancer represents a load balancer that decides where to route +// the incoming services for a particular service to. type LoadBalancer interface { // LoadBalance takes an incoming request and figures out where to route it to. // Determination is based on destination service (for example, 'mysql') as diff --git a/pkg/proxy/proxier.go b/pkg/proxy/proxier.go index f83f3f9ba4e..a7822e56a29 100644 --- a/pkg/proxy/proxier.go +++ b/pkg/proxy/proxier.go @@ -33,11 +33,12 @@ type Proxier struct { serviceMap map[string]int } +// NewProxier returns a newly created and correctly initialized instance of Proxier. func NewProxier(loadBalancer LoadBalancer) *Proxier { return &Proxier{loadBalancer: loadBalancer, serviceMap: make(map[string]int)} } -func CopyBytes(in, out *net.TCPConn) { +func copyBytes(in, out *net.TCPConn) { glog.Infof("Copying from %v <-> %v <-> %v <-> %v", in.RemoteAddr(), in.LocalAddr(), out.LocalAddr(), out.RemoteAddr()) _, err := io.Copy(in, out) @@ -49,14 +50,17 @@ func CopyBytes(in, out *net.TCPConn) { out.CloseWrite() } -// Create a bidirectional byte shuffler. Copies bytes to/from each connection. -func ProxyConnection(in, out *net.TCPConn) { +// proxyConnection creates a bidirectional byte shuffler. +// It copies bytes to/from each connection. +func proxyConnection(in, out *net.TCPConn) { glog.Infof("Creating proxy between %v <-> %v <-> %v <-> %v", in.RemoteAddr(), in.LocalAddr(), out.LocalAddr(), out.RemoteAddr()) - go CopyBytes(in, out) - go CopyBytes(out, in) + go copyBytes(in, out) + go copyBytes(out, in) } +// AcceptHandler begins accepting incoming connections from listener and proxying the connections to the load-balanced endpoints. +// It never returns. func (proxier Proxier) AcceptHandler(service string, listener net.Listener) { for { inConn, err := listener.Accept() @@ -83,12 +87,12 @@ func (proxier Proxier) AcceptHandler(service string, listener net.Listener) { inConn.Close() continue } - ProxyConnection(inConn.(*net.TCPConn), outConn.(*net.TCPConn)) + proxyConnection(inConn.(*net.TCPConn), outConn.(*net.TCPConn)) } } -// AddService starts listening for a new service on a given port. -func (proxier Proxier) AddService(service string, port int) error { +// addService starts listening for a new service on a given port. +func (proxier Proxier) addService(service string, port int) error { // Make sure we can start listening on the port before saying all's well. l, err := net.Listen("tcp", fmt.Sprintf(":%d", port)) if err != nil { @@ -117,18 +121,21 @@ func (proxier Proxier) addServiceCommon(service string, l net.Listener) { go proxier.AcceptHandler(service, l) } +// OnUpdate recieves update notices for the updated services and start listening newly added services. +// It implements "github.com/GoogleCloudPlatform/kubernetes/pkg/proxy/config".ServiceConfigHandler.OnUpdate. func (proxier Proxier) OnUpdate(services []api.Service) { glog.Infof("Received update notice: %+v", services) for _, service := range services { port, exists := proxier.serviceMap[service.ID] - if !exists || port != service.Port { - glog.Infof("Adding a new service %s on port %d", service.ID, service.Port) - err := proxier.AddService(service.ID, service.Port) - if err == nil { - proxier.serviceMap[service.ID] = service.Port - } else { - glog.Infof("Failed to start listening for %s on %d", service.ID, service.Port) - } + if exists && port == service.Port { + continue } + glog.Infof("Adding a new service %s on port %d", service.ID, service.Port) + err := proxier.addService(service.ID, service.Port) + if err != nil { + glog.Infof("Failed to start listening for %s on %d", service.ID, service.Port) + continue + } + proxier.serviceMap[service.ID] = service.Port } } diff --git a/pkg/proxy/roundrobbin.go b/pkg/proxy/roundrobbin.go index a55d10d5ccc..a5acebba7fc 100644 --- a/pkg/proxy/roundrobbin.go +++ b/pkg/proxy/roundrobbin.go @@ -29,22 +29,25 @@ import ( "github.com/golang/glog" ) +// LoadBalancerRR is a round-robin load balancer. It implements LoadBalancer. type LoadBalancerRR struct { lock sync.RWMutex endpointsMap map[string][]string rrIndex map[string]int } +// NewLoadBalancerRR returns a newly created and correctly initialized instance of LoadBalancerRR. func NewLoadBalancerRR() *LoadBalancerRR { return &LoadBalancerRR{endpointsMap: make(map[string][]string), rrIndex: make(map[string]int)} } +// LoadBalance selects an endpoint of the service by round-robin algorithm. func (impl LoadBalancerRR) LoadBalance(service string, srcAddr net.Addr) (string, error) { impl.lock.RLock() endpoints, exists := impl.endpointsMap[service] index := impl.rrIndex[service] impl.lock.RUnlock() - if exists == false { + if !exists { return "", errors.New("no service entry for:" + service) } if len(endpoints) == 0 { @@ -55,7 +58,7 @@ func (impl LoadBalancerRR) LoadBalance(service string, srcAddr net.Addr) (string return endpoint, nil } -func (impl LoadBalancerRR) IsValid(spec string) bool { +func (impl LoadBalancerRR) isValid(spec string) bool { _, port, err := net.SplitHostPort(spec) if err != nil { return false @@ -67,16 +70,19 @@ func (impl LoadBalancerRR) IsValid(spec string) bool { return value > 0 } -func (impl LoadBalancerRR) FilterValidEndpoints(endpoints []string) []string { +func (impl LoadBalancerRR) filterValidEndpoints(endpoints []string) []string { var result []string for _, spec := range endpoints { - if impl.IsValid(spec) { + if impl.isValid(spec) { result = append(result, spec) } } return result } +// OnUpdate updates the registered endpoints with the new +// endpoint information, removes the registered endpoints +// no longer present in the provided endpoints. func (impl LoadBalancerRR) OnUpdate(endpoints []api.Endpoints) { tmp := make(map[string]bool) impl.lock.Lock() @@ -84,7 +90,7 @@ func (impl LoadBalancerRR) OnUpdate(endpoints []api.Endpoints) { // First update / add all new endpoints for services. for _, value := range endpoints { existingEndpoints, exists := impl.endpointsMap[value.Name] - validEndpoints := impl.FilterValidEndpoints(value.Endpoints) + validEndpoints := impl.filterValidEndpoints(value.Endpoints) if !exists || !reflect.DeepEqual(existingEndpoints, validEndpoints) { glog.Infof("LoadBalancerRR: Setting endpoints for %s to %+v", value.Name, value.Endpoints) impl.endpointsMap[value.Name] = validEndpoints diff --git a/pkg/proxy/roundrobbin_test.go b/pkg/proxy/roundrobbin_test.go index b4af90a9819..1112141de80 100644 --- a/pkg/proxy/roundrobbin_test.go +++ b/pkg/proxy/roundrobbin_test.go @@ -24,16 +24,16 @@ import ( func TestLoadBalanceValidateWorks(t *testing.T) { loadBalancer := NewLoadBalancerRR() - if loadBalancer.IsValid("") { + if loadBalancer.isValid("") { t.Errorf("Didn't fail for empty string") } - if loadBalancer.IsValid("foobar") { + if loadBalancer.isValid("foobar") { t.Errorf("Didn't fail with no port") } - if loadBalancer.IsValid("foobar:-1") { + if loadBalancer.isValid("foobar:-1") { t.Errorf("Didn't fail with a negative port") } - if !loadBalancer.IsValid("foobar:8080") { + if !loadBalancer.isValid("foobar:8080") { t.Errorf("Failed a valid config.") } } @@ -41,7 +41,7 @@ func TestLoadBalanceValidateWorks(t *testing.T) { func TestLoadBalanceFilterWorks(t *testing.T) { loadBalancer := NewLoadBalancerRR() endpoints := []string{"foobar:1", "foobar:2", "foobar:-1", "foobar:3", "foobar:-2"} - filtered := loadBalancer.FilterValidEndpoints(endpoints) + filtered := loadBalancer.filterValidEndpoints(endpoints) if len(filtered) != 3 { t.Errorf("Failed to filter to the correct size") @@ -59,7 +59,7 @@ func TestLoadBalanceFilterWorks(t *testing.T) { func TestLoadBalanceFailsWithNoEndpoints(t *testing.T) { loadBalancer := NewLoadBalancerRR() - endpoints := make([]api.Endpoints, 0) + var endpoints []api.Endpoints loadBalancer.OnUpdate(endpoints) endpoint, err := loadBalancer.LoadBalance("foo", nil) if err == nil { diff --git a/pkg/registry/etcd_registry_test.go b/pkg/registry/etcd_registry_test.go index caa5c08bce1..5d0718a3754 100644 --- a/pkg/registry/etcd_registry_test.go +++ b/pkg/registry/etcd_registry_test.go @@ -53,9 +53,7 @@ func TestEtcdGetPodNotFound(t *testing.T) { R: &etcd.Response{ Node: nil, }, - E: &etcd.EtcdError{ - ErrorCode: 100, - }, + E: tools.EtcdErrorNotFound, } registry := MakeTestEtcdRegistry(fakeClient, []string{"machine"}) _, err := registry.GetPod("foo") @@ -70,7 +68,7 @@ func TestEtcdCreatePod(t *testing.T) { R: &etcd.Response{ Node: nil, }, - E: &etcd.EtcdError{ErrorCode: 100}, + E: tools.EtcdErrorNotFound, } fakeClient.Set("/registry/hosts/machine/kubelet", util.MakeJSONString([]api.ContainerManifest{}), 0) registry := MakeTestEtcdRegistry(fakeClient, []string{"machine"}) @@ -133,13 +131,13 @@ func TestEtcdCreatePodWithContainersError(t *testing.T) { R: &etcd.Response{ Node: nil, }, - E: &etcd.EtcdError{ErrorCode: 100}, + E: tools.EtcdErrorNotFound, } fakeClient.Data["/registry/hosts/machine/kubelet"] = tools.EtcdResponseWithError{ R: &etcd.Response{ Node: nil, }, - E: &etcd.EtcdError{ErrorCode: 200}, + E: tools.EtcdErrorValueRequired, } registry := MakeTestEtcdRegistry(fakeClient, []string{"machine"}) err := registry.CreatePod("machine", api.Pod{ @@ -154,7 +152,7 @@ func TestEtcdCreatePodWithContainersError(t *testing.T) { if err == nil { t.Error("Unexpected non-error") } - if err != nil && err.(*etcd.EtcdError).ErrorCode != 100 { + if !tools.IsEtcdNotFound(err) { t.Errorf("Unexpected error: %#v", err) } } @@ -165,13 +163,13 @@ func TestEtcdCreatePodWithContainersNotFound(t *testing.T) { R: &etcd.Response{ Node: nil, }, - E: &etcd.EtcdError{ErrorCode: 100}, + E: tools.EtcdErrorNotFound, } fakeClient.Data["/registry/hosts/machine/kubelet"] = tools.EtcdResponseWithError{ R: &etcd.Response{ Node: nil, }, - E: &etcd.EtcdError{ErrorCode: 100}, + E: tools.EtcdErrorNotFound, } registry := MakeTestEtcdRegistry(fakeClient, []string{"machine"}) err := registry.CreatePod("machine", api.Pod{ @@ -213,7 +211,7 @@ func TestEtcdCreatePodWithExistingContainers(t *testing.T) { R: &etcd.Response{ Node: nil, }, - E: &etcd.EtcdError{ErrorCode: 100}, + E: tools.EtcdErrorNotFound, } fakeClient.Set("/registry/hosts/machine/kubelet", util.MakeJSONString([]api.ContainerManifest{ { @@ -329,7 +327,7 @@ func TestEtcdListPodsNotFound(t *testing.T) { key := "/registry/hosts/machine/pods" fakeClient.Data[key] = tools.EtcdResponseWithError{ R: &etcd.Response{}, - E: &etcd.EtcdError{ErrorCode: 100}, + E: tools.EtcdErrorNotFound, } registry := MakeTestEtcdRegistry(fakeClient, []string{"machine"}) pods, err := registry.ListPods(labels.Everything()) @@ -374,7 +372,7 @@ func TestEtcdListControllersNotFound(t *testing.T) { key := "/registry/controllers" fakeClient.Data[key] = tools.EtcdResponseWithError{ R: &etcd.Response{}, - E: &etcd.EtcdError{ErrorCode: 100}, + E: tools.EtcdErrorNotFound, } registry := MakeTestEtcdRegistry(fakeClient, []string{"machine"}) controllers, err := registry.ListControllers() @@ -389,7 +387,7 @@ func TestEtcdListServicesNotFound(t *testing.T) { key := "/registry/services/specs" fakeClient.Data[key] = tools.EtcdResponseWithError{ R: &etcd.Response{}, - E: &etcd.EtcdError{ErrorCode: 100}, + E: tools.EtcdErrorNotFound, } registry := MakeTestEtcdRegistry(fakeClient, []string{"machine"}) services, err := registry.ListServices() @@ -442,9 +440,7 @@ func TestEtcdGetControllerNotFound(t *testing.T) { R: &etcd.Response{ Node: nil, }, - E: &etcd.EtcdError{ - ErrorCode: 100, - }, + E: tools.EtcdErrorNotFound, } registry := MakeTestEtcdRegistry(fakeClient, []string{"machine"}) ctrl, err := registry.GetController("foo") @@ -538,7 +534,7 @@ func TestEtcdCreateService(t *testing.T) { R: &etcd.Response{ Node: nil, }, - E: &etcd.EtcdError{ErrorCode: 100}, + E: tools.EtcdErrorNotFound, } registry := MakeTestEtcdRegistry(fakeClient, []string{"machine"}) err := registry.CreateService(api.Service{ @@ -572,9 +568,7 @@ func TestEtcdGetServiceNotFound(t *testing.T) { R: &etcd.Response{ Node: nil, }, - E: &etcd.EtcdError{ - ErrorCode: 100, - }, + E: tools.EtcdErrorNotFound, } registry := MakeTestEtcdRegistry(fakeClient, []string{"machine"}) _, err := registry.GetService("foo") diff --git a/pkg/registry/healthy_minion_registry.go b/pkg/registry/healthy_minion_registry.go new file mode 100644 index 00000000000..32273cb4856 --- /dev/null +++ b/pkg/registry/healthy_minion_registry.go @@ -0,0 +1,86 @@ +/* +Copyright 2014 Google Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package registry + +import ( + "fmt" + "net/http" + + "github.com/GoogleCloudPlatform/kubernetes/pkg/health" +) + +type HealthyMinionRegistry struct { + delegate MinionRegistry + client health.HTTPGetInterface + port int +} + +func NewHealthyMinionRegistry(delegate MinionRegistry, client *http.Client) MinionRegistry { + return &HealthyMinionRegistry{ + delegate: delegate, + client: client, + port: 10250, + } +} + +func (h *HealthyMinionRegistry) makeMinionURL(minion string) string { + return fmt.Sprintf("http://%s:%d/healthz", minion, h.port) +} + +func (h *HealthyMinionRegistry) List() (currentMinions []string, err error) { + var result []string + list, err := h.delegate.List() + if err != nil { + return result, err + } + for _, minion := range list { + status, err := health.Check(h.makeMinionURL(minion), h.client) + if err != nil { + return result, err + } + if status == health.Healthy { + result = append(result, minion) + } + } + return result, nil +} + +func (h *HealthyMinionRegistry) Insert(minion string) error { + return h.delegate.Insert(minion) +} + +func (h *HealthyMinionRegistry) Delete(minion string) error { + return h.delegate.Delete(minion) +} + +func (h *HealthyMinionRegistry) Contains(minion string) (bool, error) { + contains, err := h.delegate.Contains(minion) + if err != nil { + return false, err + } + if !contains { + return false, nil + } + status, err := health.Check(h.makeMinionURL(minion), h.client) + if err != nil { + return false, err + } + if status == health.Unhealthy { + return false, nil + } + return true, nil +} diff --git a/pkg/registry/healthy_minion_registry_test.go b/pkg/registry/healthy_minion_registry_test.go new file mode 100644 index 00000000000..7ba29a6b6bc --- /dev/null +++ b/pkg/registry/healthy_minion_registry_test.go @@ -0,0 +1,96 @@ +/* +Copyright 2014 Google Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package registry + +import ( + "net/http" + "reflect" + "testing" +) + +type alwaysYes struct{} + +func (alwaysYes) Get(url string) (*http.Response, error) { + return &http.Response{ + StatusCode: http.StatusOK, + }, nil +} + +func TestBasicDelegation(t *testing.T) { + mockMinionRegistry := MockMinionRegistry{ + minions: []string{"m1", "m2", "m3"}, + } + healthy := HealthyMinionRegistry{ + delegate: &mockMinionRegistry, + client: alwaysYes{}, + } + + list, err := healthy.List() + expectNoError(t, err) + if !reflect.DeepEqual(list, mockMinionRegistry.minions) { + t.Errorf("Expected %v, Got %v", mockMinionRegistry.minions, list) + } + err = healthy.Insert("foo") + expectNoError(t, err) + + ok, err := healthy.Contains("m1") + expectNoError(t, err) + if !ok { + t.Errorf("Unexpected absence of 'm1'") + } + + ok, err = healthy.Contains("m5") + expectNoError(t, err) + if ok { + t.Errorf("Unexpected presence of 'm5'") + } +} + +type notMinion struct { + minion string +} + +func (n *notMinion) Get(url string) (*http.Response, error) { + if url != "http://"+n.minion+":10250/healthz" { + return &http.Response{StatusCode: http.StatusOK}, nil + } else { + return &http.Response{StatusCode: http.StatusInternalServerError}, nil + } +} + +func TestFiltering(t *testing.T) { + mockMinionRegistry := MockMinionRegistry{ + minions: []string{"m1", "m2", "m3"}, + } + healthy := HealthyMinionRegistry{ + delegate: &mockMinionRegistry, + client: ¬Minion{minion: "m1"}, + port: 10250, + } + + expected := []string{"m2", "m3"} + list, err := healthy.List() + expectNoError(t, err) + if !reflect.DeepEqual(list, expected) { + t.Errorf("Expected %v, Got %v", expected, list) + } + ok, err := healthy.Contains("m1") + expectNoError(t, err) + if ok { + t.Errorf("Unexpected presence of 'm1'") + } +} diff --git a/pkg/registry/mock_registry.go b/pkg/registry/mock_registry.go index 2db7b4fe5c0..89b8d90b8f6 100644 --- a/pkg/registry/mock_registry.go +++ b/pkg/registry/mock_registry.go @@ -75,3 +75,53 @@ func (registry *MockPodRegistry) DeletePod(podId string) error { defer registry.Unlock() return registry.err } + +type MockMinionRegistry struct { + err error + minion string + minions []string + sync.Mutex +} + +func MakeMockMinionRegistry(minions []string) *MockMinionRegistry { + return &MockMinionRegistry{ + minions: minions, + } +} + +func (registry *MockMinionRegistry) List() ([]string, error) { + registry.Lock() + defer registry.Unlock() + return registry.minions, registry.err +} + +func (registry *MockMinionRegistry) Insert(minion string) error { + registry.Lock() + defer registry.Unlock() + registry.minion = minion + return registry.err +} + +func (registry *MockMinionRegistry) Contains(minion string) (bool, error) { + registry.Lock() + defer registry.Unlock() + for _, name := range registry.minions { + if name == minion { + return true, registry.err + } + } + return false, registry.err +} + +func (registry *MockMinionRegistry) Delete(minion string) error { + registry.Lock() + defer registry.Unlock() + var newList []string + for _, name := range registry.minions { + if name != minion { + newList = append(newList, name) + } + } + registry.minions = newList + return registry.err +} diff --git a/pkg/scheduler/randomfit.go b/pkg/scheduler/randomfit.go index 20193fde255..b198f9c1c2b 100644 --- a/pkg/scheduler/randomfit.go +++ b/pkg/scheduler/randomfit.go @@ -32,7 +32,7 @@ type RandomFitScheduler struct { randomLock sync.Mutex } -func MakeRandomFitScheduler(podLister PodLister, random *rand.Rand) Scheduler { +func NewRandomFitScheduler(podLister PodLister, random *rand.Rand) Scheduler { return &RandomFitScheduler{ podLister: podLister, random: random, diff --git a/pkg/scheduler/randomfit_test.go b/pkg/scheduler/randomfit_test.go index 1cdf505b9e1..642179249e8 100644 --- a/pkg/scheduler/randomfit_test.go +++ b/pkg/scheduler/randomfit_test.go @@ -28,7 +28,7 @@ func TestRandomFitSchedulerNothingScheduled(t *testing.T) { r := rand.New(rand.NewSource(0)) st := schedulerTester{ t: t, - scheduler: MakeRandomFitScheduler(&fakeRegistry, r), + scheduler: NewRandomFitScheduler(&fakeRegistry, r), minionLister: FakeMinionLister{"m1", "m2", "m3"}, } st.expectSchedule(api.Pod{}, "m3") @@ -41,7 +41,7 @@ func TestRandomFitSchedulerFirstScheduled(t *testing.T) { r := rand.New(rand.NewSource(0)) st := schedulerTester{ t: t, - scheduler: MakeRandomFitScheduler(fakeRegistry, r), + scheduler: NewRandomFitScheduler(fakeRegistry, r), minionLister: FakeMinionLister{"m1", "m2", "m3"}, } st.expectSchedule(makePod("", 8080), "m3") @@ -56,7 +56,7 @@ func TestRandomFitSchedulerFirstScheduledComplicated(t *testing.T) { r := rand.New(rand.NewSource(0)) st := schedulerTester{ t: t, - scheduler: MakeRandomFitScheduler(fakeRegistry, r), + scheduler: NewRandomFitScheduler(fakeRegistry, r), minionLister: FakeMinionLister{"m1", "m2", "m3"}, } st.expectSchedule(makePod("", 8080, 8081), "m3") @@ -71,7 +71,7 @@ func TestRandomFitSchedulerFirstScheduledImpossible(t *testing.T) { r := rand.New(rand.NewSource(0)) st := schedulerTester{ t: t, - scheduler: MakeRandomFitScheduler(fakeRegistry, r), + scheduler: NewRandomFitScheduler(fakeRegistry, r), minionLister: FakeMinionLister{"m1", "m2", "m3"}, } st.expectFailure(makePod("", 8080, 8081)) diff --git a/pkg/tools/etcd_tools.go b/pkg/tools/etcd_tools.go index 9f4b625e0d8..9a94416ff60 100644 --- a/pkg/tools/etcd_tools.go +++ b/pkg/tools/etcd_tools.go @@ -25,6 +25,16 @@ import ( "github.com/coreos/go-etcd/etcd" ) +const ( + EtcdErrorCodeNotFound = 100 + EtcdErrorCodeValueRequired = 200 +) + +var ( + EtcdErrorNotFound = &etcd.EtcdError{ErrorCode: EtcdErrorCodeNotFound} + EtcdErrorValueRequired = &etcd.EtcdError{ErrorCode: EtcdErrorCodeValueRequired} +) + // EtcdClient is an injectable interface for testing. type EtcdClient interface { AddChild(key, data string, ttl uint64) (*etcd.Response, error) diff --git a/pkg/tools/etcd_tools_test.go b/pkg/tools/etcd_tools_test.go index 3b382877487..2f0bef5bae7 100644 --- a/pkg/tools/etcd_tools_test.go +++ b/pkg/tools/etcd_tools_test.go @@ -36,7 +36,7 @@ func TestIsNotFoundErr(t *testing.T) { t.Errorf("Expected %#v to return %v, but it did not", err, isNotFound) } } - try(&etcd.EtcdError{ErrorCode: 100}, true) + try(EtcdErrorNotFound, true) try(&etcd.EtcdError{ErrorCode: 101}, false) try(nil, false) try(fmt.Errorf("some other kind of error"), false) diff --git a/pkg/tools/fake_etcd_client.go b/pkg/tools/fake_etcd_client.go index dc08b9c3c5a..6dd3c38ec99 100644 --- a/pkg/tools/fake_etcd_client.go +++ b/pkg/tools/fake_etcd_client.go @@ -74,7 +74,7 @@ func (f *FakeEtcdClient) Get(key string, sort, recursive bool) (*etcd.Response, result := f.Data[key] if result.R == nil { f.t.Errorf("Unexpected get for %s", key) - return &etcd.Response{}, &etcd.EtcdError{ErrorCode: 100} // Key not found + return &etcd.Response{}, EtcdErrorNotFound } f.t.Logf("returning %v: %v %#v", key, result.R, result.E) return result.R, result.E @@ -105,7 +105,7 @@ func (f *FakeEtcdClient) Delete(key string, recursive bool) (*etcd.Response, err R: &etcd.Response{ Node: nil, }, - E: &etcd.EtcdError{ErrorCode: 100}, + E: EtcdErrorNotFound, } f.DeletedKeys = append(f.DeletedKeys, key) diff --git a/third_party/deps.sh b/third_party/deps.sh index 5f63f14c5d9..36c1de723ff 100755 --- a/third_party/deps.sh +++ b/third_party/deps.sh @@ -5,6 +5,8 @@ TOP_PACKAGES=" code.google.com/p/goauth2/compute/serviceaccount code.google.com/p/goauth2/oauth code.google.com/p/google-api-go-client/compute/v1 + github.com/google/cadvisor/info + github.com/google/cadvisor/client " DEP_PACKAGES=" @@ -13,7 +15,6 @@ DEP_PACKAGES=" code.google.com/p/google-api-go-client/googleapi github.com/coreos/go-log/log github.com/coreos/go-systemd/journal - github.com/google/cadvisor/info " PACKAGES="$TOP_PACKAGES $DEP_PACKAGES" diff --git a/third_party/src/github.com/google/cadvisor/.gitignore b/third_party/src/github.com/google/cadvisor/.gitignore new file mode 100644 index 00000000000..1377554ebea --- /dev/null +++ b/third_party/src/github.com/google/cadvisor/.gitignore @@ -0,0 +1 @@ +*.swp diff --git a/third_party/src/github.com/google/cadvisor/.travis.yml b/third_party/src/github.com/google/cadvisor/.travis.yml index 8542bd19106..ed54fedc5d6 100644 --- a/third_party/src/github.com/google/cadvisor/.travis.yml +++ b/third_party/src/github.com/google/cadvisor/.travis.yml @@ -1,13 +1,12 @@ language: go go: - - 1.2 + - 1.3 before_script: - go get github.com/stretchr/testify/mock - go get github.com/kr/pretty + - wget http://s3.amazonaws.com/influxdb/influxdb_latest_amd64.deb + - sudo dpkg -i influxdb_latest_amd64.deb + - sudo service influxdb start script: - - go test -v -race github.com/google/cadvisor/container - - go test -v github.com/google/cadvisor/info - - go test -v github.com/google/cadvisor/client - - go test -v github.com/google/cadvisor/sampling - - go test -v github.com/google/cadvisor/storage/memory + - go test -v -race github.com/google/cadvisor/... - go build github.com/google/cadvisor diff --git a/third_party/src/github.com/google/cadvisor/CHANGELOG.md b/third_party/src/github.com/google/cadvisor/CHANGELOG.md new file mode 100644 index 00000000000..32d8b2ec2e0 --- /dev/null +++ b/third_party/src/github.com/google/cadvisor/CHANGELOG.md @@ -0,0 +1,20 @@ +# Changelog + +## 0.1.2 (2014-07-10) +- Added Storage Driver concept (flag: storage_driver), default is the in-memory driver +- Implemented InfluxDB storage driver +- Support in REST API for specifying number of stats to return +- Allow running without lmctfy (flag: allow_lmctfy) +- Bugfixes + +## 0.1.0 (2014-06-14) +- Support for container aliases +- Sampling historical usage and exporting that in the REST API +- Bugfixes for UI + +## 0.0.0 (2014-06-10) +- Initial version of cAdvisor +- Web UI with auto-updating stats +- v1.0 REST API with container and machine information +- Support for Docker containers +- Support for lmctfy containers diff --git a/third_party/src/github.com/google/cadvisor/CONTRIBUTORS b/third_party/src/github.com/google/cadvisor/CONTRIBUTORS index d0c97f1ad9f..cb65c25e88d 100644 --- a/third_party/src/github.com/google/cadvisor/CONTRIBUTORS +++ b/third_party/src/github.com/google/cadvisor/CONTRIBUTORS @@ -6,6 +6,10 @@ # Please keep the list sorted by first name. +Jason Swindle +Johan Euphrosine Kamil Yurtsever Nan Deng +Rohit Jnagal Victor Marmol +Zohaib Maya diff --git a/third_party/src/github.com/google/cadvisor/advice/interference/detector.go b/third_party/src/github.com/google/cadvisor/advice/interference/detector.go new file mode 100644 index 00000000000..8c29176311c --- /dev/null +++ b/third_party/src/github.com/google/cadvisor/advice/interference/detector.go @@ -0,0 +1,32 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package inference + +import "github.com/google/cadvisor/info" + +// InterferenceDectector detects if there's a container which +// interferences with a set of containers. The detector tracks +// a set of containers and find the victims and antagonist. +type InterferenceDetector interface { + // Tracks the behavior of the container. + AddContainer(ref info.ContainerReference) + + // Returns a list of possible interferences. The upper layer may take action + // based on the interference. + Detect() ([]*info.Interference, error) + + // The name of the detector. + Name() string +} diff --git a/third_party/src/github.com/google/cadvisor/api/handler.go b/third_party/src/github.com/google/cadvisor/api/handler.go index 409dc917791..393705bde04 100644 --- a/third_party/src/github.com/google/cadvisor/api/handler.go +++ b/third_party/src/github.com/google/cadvisor/api/handler.go @@ -19,12 +19,13 @@ package api import ( "encoding/json" "fmt" + "io" "log" "net/http" - "net/url" "strings" "time" + "github.com/google/cadvisor/info" "github.com/google/cadvisor/manager" ) @@ -34,9 +35,11 @@ const ( MachineApi = "machine" ) -func HandleRequest(m manager.Manager, w http.ResponseWriter, u *url.URL) error { +func HandleRequest(m manager.Manager, w http.ResponseWriter, r *http.Request) error { start := time.Now() + u := r.URL + // Get API request type. requestType := u.Path[len(ApiResource):] i := strings.Index(requestType, "/") @@ -46,7 +49,8 @@ func HandleRequest(m manager.Manager, w http.ResponseWriter, u *url.URL) error { requestType = requestType[:i] } - if requestType == MachineApi { + switch { + case requestType == MachineApi: log.Printf("Api - Machine") // Get the MachineInfo @@ -60,14 +64,20 @@ func HandleRequest(m manager.Manager, w http.ResponseWriter, u *url.URL) error { fmt.Fprintf(w, "Failed to marshall MachineInfo with error: %s", err) } w.Write(out) - } else if requestType == ContainersApi { + case requestType == ContainersApi: // The container name is the path after the requestType containerName := requestArgs log.Printf("Api - Container(%s)", containerName) + var query info.ContainerInfoRequest + decoder := json.NewDecoder(r.Body) + err := decoder.Decode(&query) + if err != nil && err != io.EOF { + return fmt.Errorf("unable to decode the json value: ", err) + } // Get the container. - cont, err := m.GetContainerInfo(containerName) + cont, err := m.GetContainerInfo(containerName, &query) if err != nil { fmt.Fprintf(w, "Failed to get container \"%s\" with error: %s", containerName, err) return err @@ -79,7 +89,7 @@ func HandleRequest(m manager.Manager, w http.ResponseWriter, u *url.URL) error { fmt.Fprintf(w, "Failed to marshall container %q with error: %s", containerName, err) } w.Write(out) - } else { + default: return fmt.Errorf("unknown API request type %q", requestType) } diff --git a/third_party/src/github.com/google/cadvisor/cadvisor.go b/third_party/src/github.com/google/cadvisor/cadvisor.go index b6fa9f917b5..5df6e27d24d 100644 --- a/third_party/src/github.com/google/cadvisor/cadvisor.go +++ b/third_party/src/github.com/google/cadvisor/cadvisor.go @@ -27,32 +27,46 @@ import ( "github.com/google/cadvisor/manager" "github.com/google/cadvisor/pages" "github.com/google/cadvisor/pages/static" - "github.com/google/cadvisor/storage/memory" ) var argPort = flag.Int("port", 8080, "port to listen") -var argSampleSize = flag.Int("samples", 1024, "number of samples we want to keep") -var argHistoryDuration = flag.Int("history_duration", 60, "number of seconds of container history to keep") +var argAllowLmctfy = flag.Bool("allow_lmctfy", true, "whether to allow lmctfy as a container handler") + +var argDbDriver = flag.String("storage_driver", "memory", "storage driver to use. Options are: memory (default) and influxdb") func main() { flag.Parse() - storage := memory.New(*argSampleSize, *argHistoryDuration) - // TODO(monnand): Add stats writer for manager - containerManager, err := manager.New(storage) + storageDriver, err := NewStorageDriver(*argDbDriver) + if err != nil { + log.Fatalf("Failed to connect to database: %s", err) + } + + containerManager, err := manager.New(storageDriver) if err != nil { log.Fatalf("Failed to create a Container Manager: %s", err) } - if err := lmctfy.Register("/"); err != nil { - log.Printf("lmctfy registration failed: %v.", err) - log.Print("Running in docker only mode.") - if err := docker.Register(containerManager, "/"); err != nil { - log.Printf("Docker registration failed: %v.", err) - log.Fatalf("Unable to continue without docker or lmctfy.") + // Register lmctfy for the root if allowed and available. + registeredRoot := false + if *argAllowLmctfy { + if err := lmctfy.Register("/"); err != nil { + log.Printf("lmctfy registration failed: %v.", err) + log.Print("Running in docker only mode.") + } else { + registeredRoot = true } } + // Register Docker for root if we were unable to register lmctfy. + if !registeredRoot { + if err := docker.Register(containerManager, "/"); err != nil { + log.Printf("Docker registration failed: %v.", err) + log.Fatalf("Unable to continue without root handler.") + } + } + + // Register Docker for all Docker containers. if err := docker.Register(containerManager, "/docker"); err != nil { // Ignore this error because we should work with lmctfy only log.Printf("Docker registration failed: %v.", err) @@ -69,7 +83,7 @@ func main() { // Handler for the API. http.HandleFunc(api.ApiResource, func(w http.ResponseWriter, r *http.Request) { - err := api.HandleRequest(containerManager, w, r.URL) + err := api.HandleRequest(containerManager, w, r) if err != nil { fmt.Fprintf(w, "%s", err) } diff --git a/third_party/src/github.com/google/cadvisor/client/client.go b/third_party/src/github.com/google/cadvisor/client/client.go index d521b1e3fa8..033cbc2a31e 100644 --- a/third_party/src/github.com/google/cadvisor/client/client.go +++ b/third_party/src/github.com/google/cadvisor/client/client.go @@ -15,6 +15,7 @@ package cadvisor import ( + "bytes" "encoding/json" "fmt" "io/ioutil" @@ -49,7 +50,7 @@ func (self *Client) machineInfoUrl() string { func (self *Client) MachineInfo() (minfo *info.MachineInfo, err error) { u := self.machineInfoUrl() ret := new(info.MachineInfo) - err = self.httpGetJsonData(ret, u, "machine info") + err = self.httpGetJsonData(ret, nil, u, "machine info") if err != nil { return } @@ -64,8 +65,19 @@ func (self *Client) containerInfoUrl(name string) string { return strings.Join([]string{self.baseUrl, "containers", name}, "/") } -func (self *Client) httpGetJsonData(data interface{}, url, infoName string) error { - resp, err := http.Get(url) +func (self *Client) httpGetJsonData(data, postData interface{}, url, infoName string) error { + var resp *http.Response + var err error + + if postData != nil { + data, err := json.Marshal(postData) + if err != nil { + return fmt.Errorf("unable to marshal data: %v", err) + } + resp, err = http.Post(url, "application/json", bytes.NewBuffer(data)) + } else { + resp, err = http.Get(url) + } if err != nil { err = fmt.Errorf("unable to get %v: %v", infoName, err) return err @@ -84,10 +96,12 @@ func (self *Client) httpGetJsonData(data interface{}, url, infoName string) erro return nil } -func (self *Client) ContainerInfo(name string) (cinfo *info.ContainerInfo, err error) { +func (self *Client) ContainerInfo( + name string, + query *info.ContainerInfoRequest) (cinfo *info.ContainerInfo, err error) { u := self.containerInfoUrl(name) ret := new(info.ContainerInfo) - err = self.httpGetJsonData(ret, u, fmt.Sprintf("container info for %v", name)) + err = self.httpGetJsonData(ret, query, u, fmt.Sprintf("container info for %v", name)) if err != nil { return } diff --git a/third_party/src/github.com/google/cadvisor/client/client_test.go b/third_party/src/github.com/google/cadvisor/client/client_test.go index d02607302b4..a5b7f69113a 100644 --- a/third_party/src/github.com/google/cadvisor/client/client_test.go +++ b/third_party/src/github.com/google/cadvisor/client/client_test.go @@ -21,33 +21,42 @@ import ( "net/http/httptest" "reflect" "testing" + "time" "github.com/google/cadvisor/info" + itest "github.com/google/cadvisor/info/test" + "github.com/kr/pretty" ) func testGetJsonData( - strRep string, - emptyData interface{}, + expected interface{}, f func() (interface{}, error), ) error { - err := json.Unmarshal([]byte(strRep), emptyData) - if err != nil { - return fmt.Errorf("invalid json input: %v", err) - } reply, err := f() if err != nil { return fmt.Errorf("unable to retrieve data: %v", err) } - if !reflect.DeepEqual(reply, emptyData) { - return fmt.Errorf("retrieved wrong data: %+v != %+v", reply, emptyData) + if !reflect.DeepEqual(reply, expected) { + return pretty.Errorf("retrieved wrong data: %# v != %# v", reply, expected) } return nil } -func cadvisorTestClient(path, reply string) (*Client, *httptest.Server, error) { +func cadvisorTestClient(path string, expectedPostObj, expectedPostObjEmpty, replyObj interface{}, t *testing.T) (*Client, *httptest.Server, error) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path == path { - fmt.Fprint(w, reply) + if expectedPostObj != nil { + decoder := json.NewDecoder(r.Body) + err := decoder.Decode(expectedPostObjEmpty) + if err != nil { + t.Errorf("Recieved invalid object: %v", err) + } + if !reflect.DeepEqual(expectedPostObj, expectedPostObjEmpty) { + t.Errorf("Recieved unexpected object: %+v", expectedPostObjEmpty) + } + } + encoder := json.NewEncoder(w) + encoder.Encode(replyObj) } else if r.URL.Path == "/api/v1.0/machine" { fmt.Fprint(w, `{"num_cores":8,"memory_capacity":31625871360}`) } else { @@ -64,693 +73,69 @@ func cadvisorTestClient(path, reply string) (*Client, *httptest.Server, error) { } func TestGetMachineinfo(t *testing.T) { - respStr := `{"num_cores":8,"memory_capacity":31625871360}` - client, server, err := cadvisorTestClient("/api/v1.0/machine", respStr) + minfo := &info.MachineInfo{ + NumCores: 8, + MemoryCapacity: 31625871360, + } + client, server, err := cadvisorTestClient("/api/v1.0/machine", nil, nil, minfo, t) if err != nil { t.Fatalf("unable to get a client %v", err) } defer server.Close() - err = testGetJsonData(respStr, &info.MachineInfo{}, func() (interface{}, error) { - return client.MachineInfo() - }) + returned, err := client.MachineInfo() if err != nil { t.Fatal(err) } + if !reflect.DeepEqual(returned, minfo) { + t.Fatalf("received unexpected machine info") + } } func TestGetContainerInfo(t *testing.T) { - respStr := ` -{ - "name": "%v", - "spec": { - "cpu": { - "limit": 18446744073709551000, - "max_limit": 0, - "mask": { - "data": [ - 18446744073709551000 - ] - } - }, - "memory": { - "limit": 18446744073709551000, - "swap_limit": 18446744073709551000 - } - }, - "stats": [ - { - "timestamp": "2014-06-13T01:03:26.434981825Z", - "cpu": { - "usage": { - "total": 56896502, - "per_cpu": [ - 20479682, - 13579420, - 6025040, - 2255123, - 3635661, - 2489368, - 5158288, - 3273920 - ], - "user": 10000000, - "system": 30000000 - }, - "load": 0 - }, - "memory": { - "usage": 495616, - "container_data": { - "pgfault": 2279 - }, - "hierarchical_data": { - "pgfault": 2279 - } - } - }, - { - "timestamp": "2014-06-13T01:03:27.538394608Z", - "cpu": { - "usage": { - "total": 56896502, - "per_cpu": [ - 20479682, - 13579420, - 6025040, - 2255123, - 3635661, - 2489368, - 5158288, - 3273920 - ], - "user": 10000000, - "system": 30000000 - }, - "load": 0 - }, - "memory": { - "usage": 495616, - "container_data": { - "pgfault": 2279 - }, - "hierarchical_data": { - "pgfault": 2279 - } - } - }, - { - "timestamp": "2014-06-13T01:03:28.640302072Z", - "cpu": { - "usage": { - "total": 56896502, - "per_cpu": [ - 20479682, - 13579420, - 6025040, - 2255123, - 3635661, - 2489368, - 5158288, - 3273920 - ], - "user": 10000000, - "system": 30000000 - }, - "load": 0 - }, - "memory": { - "usage": 495616, - "container_data": { - "pgfault": 2279 - }, - "hierarchical_data": { - "pgfault": 2279 - } - } - }, - { - "timestamp": "2014-06-13T01:03:29.74247308Z", - "cpu": { - "usage": { - "total": 56896502, - "per_cpu": [ - 20479682, - 13579420, - 6025040, - 2255123, - 3635661, - 2489368, - 5158288, - 3273920 - ], - "user": 10000000, - "system": 30000000 - }, - "load": 0 - }, - "memory": { - "usage": 495616, - "container_data": { - "pgfault": 2279 - }, - "hierarchical_data": { - "pgfault": 2279 - } - } - }, - { - "timestamp": "2014-06-13T01:03:30.844494537Z", - "cpu": { - "usage": { - "total": 56896502, - "per_cpu": [ - 20479682, - 13579420, - 6025040, - 2255123, - 3635661, - 2489368, - 5158288, - 3273920 - ], - "user": 10000000, - "system": 30000000 - }, - "load": 0 - }, - "memory": { - "usage": 495616, - "container_data": { - "pgfault": 2279 - }, - "hierarchical_data": { - "pgfault": 2279 - } - } - }, - { - "timestamp": "2014-06-13T01:03:31.946757066Z", - "cpu": { - "usage": { - "total": 56896502, - "per_cpu": [ - 20479682, - 13579420, - 6025040, - 2255123, - 3635661, - 2489368, - 5158288, - 3273920 - ], - "user": 10000000, - "system": 30000000 - }, - "load": 0 - }, - "memory": { - "usage": 495616, - "container_data": { - "pgfault": 2279 - }, - "hierarchical_data": { - "pgfault": 2279 - } - } - }, - { - "timestamp": "2014-06-13T01:03:33.050214062Z", - "cpu": { - "usage": { - "total": 56896502, - "per_cpu": [ - 20479682, - 13579420, - 6025040, - 2255123, - 3635661, - 2489368, - 5158288, - 3273920 - ], - "user": 10000000, - "system": 30000000 - }, - "load": 0 - }, - "memory": { - "usage": 495616, - "container_data": { - "pgfault": 2279 - }, - "hierarchical_data": { - "pgfault": 2279 - } - } - }, - { - "timestamp": "2014-06-13T01:03:34.15222186Z", - "cpu": { - "usage": { - "total": 56896502, - "per_cpu": [ - 20479682, - 13579420, - 6025040, - 2255123, - 3635661, - 2489368, - 5158288, - 3273920 - ], - "user": 10000000, - "system": 30000000 - }, - "load": 0 - }, - "memory": { - "usage": 495616, - "container_data": { - "pgfault": 2279 - }, - "hierarchical_data": { - "pgfault": 2279 - } - } - }, - { - "timestamp": "2014-06-13T01:03:35.25417391Z", - "cpu": { - "usage": { - "total": 56896502, - "per_cpu": [ - 20479682, - 13579420, - 6025040, - 2255123, - 3635661, - 2489368, - 5158288, - 3273920 - ], - "user": 10000000, - "system": 30000000 - }, - "load": 0 - }, - "memory": { - "usage": 495616, - "container_data": { - "pgfault": 2279 - }, - "hierarchical_data": { - "pgfault": 2279 - } - } - }, - { - "timestamp": "2014-06-13T01:03:36.355902169Z", - "cpu": { - "usage": { - "total": 56896502, - "per_cpu": [ - 20479682, - 13579420, - 6025040, - 2255123, - 3635661, - 2489368, - 5158288, - 3273920 - ], - "user": 10000000, - "system": 30000000 - }, - "load": 0 - }, - "memory": { - "usage": 495616, - "container_data": { - "pgfault": 2279 - }, - "hierarchical_data": { - "pgfault": 2279 - } - } - }, - { - "timestamp": "2014-06-13T01:03:37.457585928Z", - "cpu": { - "usage": { - "total": 56896502, - "per_cpu": [ - 20479682, - 13579420, - 6025040, - 2255123, - 3635661, - 2489368, - 5158288, - 3273920 - ], - "user": 10000000, - "system": 30000000 - }, - "load": 0 - }, - "memory": { - "usage": 495616, - "container_data": { - "pgfault": 2279 - }, - "hierarchical_data": { - "pgfault": 2279 - } - } - }, - { - "timestamp": "2014-06-13T01:03:38.559417379Z", - "cpu": { - "usage": { - "total": 56896502, - "per_cpu": [ - 20479682, - 13579420, - 6025040, - 2255123, - 3635661, - 2489368, - 5158288, - 3273920 - ], - "user": 10000000, - "system": 30000000 - }, - "load": 0 - }, - "memory": { - "usage": 495616, - "container_data": { - "pgfault": 2279 - }, - "hierarchical_data": { - "pgfault": 2279 - } - } - }, - { - "timestamp": "2014-06-13T01:03:39.662978029Z", - "cpu": { - "usage": { - "total": 56896502, - "per_cpu": [ - 20479682, - 13579420, - 6025040, - 2255123, - 3635661, - 2489368, - 5158288, - 3273920 - ], - "user": 10000000, - "system": 30000000 - }, - "load": 0 - }, - "memory": { - "usage": 495616, - "container_data": { - "pgfault": 2279 - }, - "hierarchical_data": { - "pgfault": 2279 - } - } - }, - { - "timestamp": "2014-06-13T01:03:40.764671232Z", - "cpu": { - "usage": { - "total": 56896502, - "per_cpu": [ - 20479682, - 13579420, - 6025040, - 2255123, - 3635661, - 2489368, - 5158288, - 3273920 - ], - "user": 10000000, - "system": 30000000 - }, - "load": 0 - }, - "memory": { - "usage": 495616, - "container_data": { - "pgfault": 2279 - }, - "hierarchical_data": { - "pgfault": 2279 - } - } - }, - { - "timestamp": "2014-06-13T01:03:41.866456459Z", - "cpu": { - "usage": { - "total": 56896502, - "per_cpu": [ - 20479682, - 13579420, - 6025040, - 2255123, - 3635661, - 2489368, - 5158288, - 3273920 - ], - "user": 10000000, - "system": 30000000 - }, - "load": 0 - }, - "memory": { - "usage": 495616, - "container_data": { - "pgfault": 2279 - }, - "hierarchical_data": { - "pgfault": 2279 - } - } - } - ], - "stats_summary": { - "max_memory_usage": 495616, - "samples": [ - { - "timestamp": "2014-06-13T01:03:27.538394608Z", - "duration": 1103412783, - "cpu": { - "usage": 0 - }, - "memory": { - "usage": 495616 - } - }, - { - "timestamp": "2014-06-13T01:03:28.640302072Z", - "duration": 1101907464, - "cpu": { - "usage": 0 - }, - "memory": { - "usage": 495616 - } - }, - { - "timestamp": "2014-06-13T01:03:29.74247308Z", - "duration": 1102171008, - "cpu": { - "usage": 0 - }, - "memory": { - "usage": 495616 - } - }, - { - "timestamp": "2014-06-13T01:03:30.844494537Z", - "duration": 1102021457, - "cpu": { - "usage": 0 - }, - "memory": { - "usage": 495616 - } - }, - { - "timestamp": "2014-06-13T01:03:31.946757066Z", - "duration": 1102262529, - "cpu": { - "usage": 0 - }, - "memory": { - "usage": 495616 - } - }, - { - "timestamp": "2014-06-13T01:03:33.050214062Z", - "duration": 1103456996, - "cpu": { - "usage": 0 - }, - "memory": { - "usage": 495616 - } - }, - { - "timestamp": "2014-06-13T01:03:34.15222186Z", - "duration": 1102007798, - "cpu": { - "usage": 0 - }, - "memory": { - "usage": 495616 - } - }, - { - "timestamp": "2014-06-13T01:03:35.25417391Z", - "duration": 1101952050, - "cpu": { - "usage": 0 - }, - "memory": { - "usage": 495616 - } - }, - { - "timestamp": "2014-06-13T01:03:36.355902169Z", - "duration": 1101728259, - "cpu": { - "usage": 0 - }, - "memory": { - "usage": 495616 - } - }, - { - "timestamp": "2014-06-13T01:03:37.457585928Z", - "duration": 1101683759, - "cpu": { - "usage": 0 - }, - "memory": { - "usage": 495616 - } - }, - { - "timestamp": "2014-06-13T01:03:38.559417379Z", - "duration": 1101831451, - "cpu": { - "usage": 0 - }, - "memory": { - "usage": 495616 - } - }, - { - "timestamp": "2014-06-13T01:03:39.662978029Z", - "duration": 1103560650, - "cpu": { - "usage": 0 - }, - "memory": { - "usage": 495616 - } - }, - { - "timestamp": "2014-06-13T01:03:40.764671232Z", - "duration": 1101693203, - "cpu": { - "usage": 0 - }, - "memory": { - "usage": 495616 - } - }, - { - "timestamp": "2014-06-13T01:03:41.866456459Z", - "duration": 1101785227, - "cpu": { - "usage": 0 - }, - "memory": { - "usage": 495616 - } - } - ], - "memory_usage_percentiles": [ - { - "percentage": 50, - "value": 495616 - }, - { - "percentage": 80, - "value": 495616 - }, - { - "percentage": 90, - "value": 495616 - }, - { - "percentage": 95, - "value": 495616 - }, - { - "percentage": 99, - "value": 495616 - } - ], - "cpu_usage_percentiles": [ - { - "percentage": 50, - "value": 0 - }, - { - "percentage": 80, - "value": 0 - }, - { - "percentage": 90, - "value": 0 - }, - { - "percentage": 95, - "value": 0 - }, - { - "percentage": 99, - "value": 0 - } - ] - } -} -` + query := &info.ContainerInfoRequest{ + NumStats: 3, + NumSamples: 2, + CpuUsagePercentiles: []int{10, 50, 90}, + MemoryUsagePercentages: []int{10, 80, 90}, + } containerName := "/some/container" - respStr = fmt.Sprintf(respStr, containerName) - client, server, err := cadvisorTestClient(fmt.Sprintf("/api/v1.0/containers%v", containerName), respStr) + cinfo := itest.GenerateRandomContainerInfo(containerName, 4, query, 1*time.Second) + client, server, err := cadvisorTestClient(fmt.Sprintf("/api/v1.0/containers%v", containerName), query, &info.ContainerInfoRequest{}, cinfo, t) if err != nil { t.Fatalf("unable to get a client %v", err) } defer server.Close() - err = testGetJsonData(respStr, &info.ContainerInfo{}, func() (interface{}, error) { - return client.ContainerInfo(containerName) - }) + returned, err := client.ContainerInfo(containerName, query) if err != nil { t.Fatal(err) } + + // We cannot use DeepEqual() to compare them directly, + // because json en/decoded time may have precision issues. + if !reflect.DeepEqual(returned.ContainerReference, cinfo.ContainerReference) { + t.Errorf("received unexpected container ref") + } + if !reflect.DeepEqual(returned.Subcontainers, cinfo.Subcontainers) { + t.Errorf("received unexpected subcontainers") + } + if !reflect.DeepEqual(returned.Spec, cinfo.Spec) { + t.Errorf("received unexpected spec") + } + if !reflect.DeepEqual(returned.StatsPercentiles, cinfo.StatsPercentiles) { + t.Errorf("received unexpected spec") + } + + for i, expectedStats := range cinfo.Stats { + returnedStats := returned.Stats[i] + if !expectedStats.Eq(returnedStats) { + t.Errorf("received unexpected stats") + } + } + + for i, expectedSample := range cinfo.Samples { + returnedSample := returned.Samples[i] + if !expectedSample.Eq(returnedSample) { + t.Errorf("received unexpected sample") + } + } } diff --git a/third_party/src/github.com/google/cadvisor/container/docker/handler.go b/third_party/src/github.com/google/cadvisor/container/docker/handler.go index 5c8fc10bcc0..f6456a4cf2e 100644 --- a/third_party/src/github.com/google/cadvisor/container/docker/handler.go +++ b/third_party/src/github.com/google/cadvisor/container/docker/handler.go @@ -118,7 +118,7 @@ func (self *dockerContainerHandler) isDockerContainer() bool { } // TODO(vmarmol): Switch to getting this from libcontainer once we have a solid API. -func readLibcontainerSpec(id string) (spec *libcontainer.Container, err error) { +func readLibcontainerSpec(id string) (spec *libcontainer.Config, err error) { dir := "/var/lib/docker/execdriver/native" configPath := path.Join(dir, id, "container.json") f, err := os.Open(configPath) @@ -127,7 +127,7 @@ func readLibcontainerSpec(id string) (spec *libcontainer.Container, err error) { } defer f.Close() d := json.NewDecoder(f) - ret := new(libcontainer.Container) + ret := new(libcontainer.Config) err = d.Decode(ret) if err != nil { return @@ -136,7 +136,7 @@ func readLibcontainerSpec(id string) (spec *libcontainer.Container, err error) { return } -func libcontainerConfigToContainerSpec(config *libcontainer.Container, mi *info.MachineInfo) *info.ContainerSpec { +func libcontainerConfigToContainerSpec(config *libcontainer.Config, mi *info.MachineInfo) *info.ContainerSpec { spec := new(info.ContainerSpec) spec.Memory = new(info.MemorySpec) spec.Memory.Limit = math.MaxUint64 @@ -209,6 +209,12 @@ func libcontainerToContainerStats(s *cgroups.Stats, mi *info.MachineInfo) *info. ret.Memory.ContainerData.Pgmajfault = v ret.Memory.HierarchicalData.Pgmajfault = v } + if v, ok := s.MemoryStats.Stats["total_inactive_anon"]; ok { + ret.Memory.WorkingSet = ret.Memory.Usage - v + if v, ok := s.MemoryStats.Stats["total_active_file"]; ok { + ret.Memory.WorkingSet -= v + } + } return ret } diff --git a/third_party/src/github.com/google/cadvisor/container/test/mock.go b/third_party/src/github.com/google/cadvisor/container/test/mock.go new file mode 100644 index 00000000000..fbbe8fe0ac0 --- /dev/null +++ b/third_party/src/github.com/google/cadvisor/container/test/mock.go @@ -0,0 +1,88 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package test + +import ( + "github.com/google/cadvisor/container" + "github.com/google/cadvisor/info" + "github.com/stretchr/testify/mock" +) + +// This struct mocks a container handler. +type MockContainerHandler struct { + mock.Mock + Name string + Aliases []string +} + +// If self.Name is not empty, then ContainerReference() will return self.Name and self.Aliases. +// Otherwise, it will use the value provided by .On().Return(). +func (self *MockContainerHandler) ContainerReference() (info.ContainerReference, error) { + if len(self.Name) > 0 { + var aliases []string + if len(self.Aliases) > 0 { + aliases = make([]string, len(self.Aliases)) + copy(aliases, self.Aliases) + } + return info.ContainerReference{ + Name: self.Name, + Aliases: aliases, + }, nil + } + args := self.Called() + return args.Get(0).(info.ContainerReference), args.Error(1) +} + +func (self *MockContainerHandler) GetSpec() (*info.ContainerSpec, error) { + args := self.Called() + return args.Get(0).(*info.ContainerSpec), args.Error(1) +} + +func (self *MockContainerHandler) GetStats() (*info.ContainerStats, error) { + args := self.Called() + return args.Get(0).(*info.ContainerStats), args.Error(1) +} + +func (self *MockContainerHandler) ListContainers(listType container.ListType) ([]info.ContainerReference, error) { + args := self.Called(listType) + return args.Get(0).([]info.ContainerReference), args.Error(1) +} + +func (self *MockContainerHandler) ListThreads(listType container.ListType) ([]int, error) { + args := self.Called(listType) + return args.Get(0).([]int), args.Error(1) +} + +func (self *MockContainerHandler) ListProcesses(listType container.ListType) ([]int, error) { + args := self.Called(listType) + return args.Get(0).([]int), args.Error(1) +} + +type FactoryForMockContainerHandler struct { + Name string + PrepareContainerHandlerFunc func(name string, handler *MockContainerHandler) +} + +func (self *FactoryForMockContainerHandler) String() string { + return self.Name +} + +func (self *FactoryForMockContainerHandler) NewContainerHandler(name string) (container.ContainerHandler, error) { + handler := &MockContainerHandler{} + if self.PrepareContainerHandlerFunc != nil { + self.PrepareContainerHandlerFunc(name, handler) + } + return handler, nil +} diff --git a/third_party/src/github.com/google/cadvisor/deploy/lmctfy-docker/Dockerfile b/third_party/src/github.com/google/cadvisor/deploy/lmctfy-docker/Dockerfile index ce9f76de324..38f03fcc63e 100644 --- a/third_party/src/github.com/google/cadvisor/deploy/lmctfy-docker/Dockerfile +++ b/third_party/src/github.com/google/cadvisor/deploy/lmctfy-docker/Dockerfile @@ -7,7 +7,7 @@ RUN apt-get update && apt-get install -y -q --no-install-recommends pkg-config l # Get the lcmtfy and cAdvisor binaries. ADD http://storage.googleapis.com/cadvisor-bin/lmctfy/lmctfy /usr/bin/lmctfy ADD http://storage.googleapis.com/cadvisor-bin/lmctfy/libre2.so.0.0.0 /usr/lib/libre2.so.0 -ADD http://storage.googleapis.com/cadvisor-bin/cadvisor /usr/bin/cadvisor +ADD http://storage.googleapis.com/cadvisor-bin/cadvisor-0.1.2 /usr/bin/cadvisor RUN chmod +x /usr/bin/lmctfy && chmod +x /usr/bin/cadvisor EXPOSE 8080 diff --git a/third_party/src/github.com/google/cadvisor/info/advice.go b/third_party/src/github.com/google/cadvisor/info/advice.go new file mode 100644 index 00000000000..8084cf4741f --- /dev/null +++ b/third_party/src/github.com/google/cadvisor/info/advice.go @@ -0,0 +1,34 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package info + +// This struct describes one type of relationship between containers: One +// container, antagonist, interferes the performance of other +// containers, victims. +type Interference struct { + // Absolute name of the antagonist container name. This field + // should not be empty. + Antagonist string `json:"antagonist"` + + // The absolute path of the victims. This field should not be empty. + Victims []string `json:"victims"` + + // The name of the detector used to detect this antagonism. This field + // should not be empty + Detector string `json:"detector"` + + // Human readable description of this interference + Description string `json:"description,omitempty"` +} diff --git a/third_party/src/github.com/google/cadvisor/info/container.go b/third_party/src/github.com/google/cadvisor/info/container.go index 94c7c6574c5..cd6a717502e 100644 --- a/third_party/src/github.com/google/cadvisor/info/container.go +++ b/third_party/src/github.com/google/cadvisor/info/container.go @@ -16,6 +16,7 @@ package info import ( "fmt" + "reflect" "sort" "time" ) @@ -57,6 +58,40 @@ type ContainerReference struct { Aliases []string `json:"aliases,omitempty"` } +// ContainerInfoQuery is used when users check a container info from the REST api. +// It specifies how much data users want to get about a container +type ContainerInfoRequest struct { + // Max number of stats to return. + NumStats int `json:"num_stats,omitempty"` + // Max number of samples to return. + NumSamples int `json:"num_samples,omitempty"` + + // Different percentiles of CPU usage within a period. The values must be within [0, 100] + CpuUsagePercentiles []int `json:"cpu_usage_percentiles,omitempty"` + // Different percentiles of memory usage within a period. The values must be within [0, 100] + MemoryUsagePercentages []int `json:"memory_usage_percentiles,omitempty"` +} + +func (self *ContainerInfoRequest) FillDefaults() *ContainerInfoRequest { + ret := self + if ret == nil { + ret = new(ContainerInfoRequest) + } + if ret.NumStats <= 0 { + ret.NumStats = 1024 + } + if ret.NumSamples <= 0 { + ret.NumSamples = 1024 + } + if len(ret.CpuUsagePercentiles) == 0 { + ret.CpuUsagePercentiles = []int{50, 80, 90, 99} + } + if len(ret.MemoryUsagePercentages) == 0 { + ret.MemoryUsagePercentages = []int{50, 80, 90, 99} + } + return ret +} + type ContainerInfo struct { ContainerReference @@ -119,7 +154,7 @@ type CpuStats struct { // Per CPU/core usage of the container. // Unit: nanoseconds. - PerCpu []uint64 `json:"per_cpu,omitempty"` + PerCpu []uint64 `json:"per_cpu_usage,omitempty"` // Time spent in user space. // Unit: nanoseconds @@ -165,6 +200,42 @@ type ContainerStats struct { Memory *MemoryStats `json:"memory,omitempty"` } +// Makes a deep copy of the ContainerStats and returns a pointer to the new +// copy. Copy() will allocate a new ContainerStats object if dst is nil. +func (self *ContainerStats) Copy(dst *ContainerStats) *ContainerStats { + if dst == nil { + dst = new(ContainerStats) + } + dst.Timestamp = self.Timestamp + if self.Cpu != nil { + if dst.Cpu == nil { + dst.Cpu = new(CpuStats) + } + // To make a deep copy of a slice, we need to copy every value + // in the slice. To make less memory allocation, we would like + // to reuse the slice in dst if possible. + percpu := dst.Cpu.Usage.PerCpu + if len(percpu) != len(self.Cpu.Usage.PerCpu) { + percpu = make([]uint64, len(self.Cpu.Usage.PerCpu)) + } + dst.Cpu.Usage = self.Cpu.Usage + dst.Cpu.Load = self.Cpu.Load + copy(percpu, self.Cpu.Usage.PerCpu) + dst.Cpu.Usage.PerCpu = percpu + } else { + dst.Cpu = nil + } + if self.Memory != nil { + if dst.Memory == nil { + dst.Memory = new(MemoryStats) + } + *dst.Memory = *self.Memory + } else { + dst.Memory = nil + } + return dst +} + type ContainerStatsSample struct { // Timetamp of the end of the sample period Timestamp time.Time `json:"timestamp"` @@ -173,6 +244,9 @@ type ContainerStatsSample struct { Cpu struct { // number of nanoseconds of CPU time used by the container Usage uint64 `json:"usage"` + + // Per-core usage of the container. (unit: nanoseconds) + PerCpuUsage []uint64 `json:"per_cpu_usage,omitempty"` } `json:"cpu"` Memory struct { // Units: Bytes. @@ -180,6 +254,67 @@ type ContainerStatsSample struct { } `json:"memory"` } +func timeEq(t1, t2 time.Time, tolerance time.Duration) bool { + // t1 should not be later than t2 + if t1.After(t2) { + t1, t2 = t2, t1 + } + diff := t2.Sub(t1) + if diff <= tolerance { + return true + } + return false +} + +func durationEq(a, b time.Duration, tolerance time.Duration) bool { + if a > b { + a, b = b, a + } + diff := a - b + if diff <= tolerance { + return true + } + return false +} + +const ( + // 10ms, i.e. 0.01s + timePrecision time.Duration = 10 * time.Millisecond +) + +// This function is useful because we do not require precise time +// representation. +func (a *ContainerStats) Eq(b *ContainerStats) bool { + if !timeEq(a.Timestamp, b.Timestamp, timePrecision) { + return false + } + if !reflect.DeepEqual(a.Cpu, b.Cpu) { + return false + } + if !reflect.DeepEqual(a.Memory, b.Memory) { + return false + } + return true +} + +// This function is useful because we do not require precise time +// representation. +func (a *ContainerStatsSample) Eq(b *ContainerStatsSample) bool { + if !timeEq(a.Timestamp, b.Timestamp, timePrecision) { + return false + } + if !durationEq(a.Duration, b.Duration, timePrecision) { + return false + } + if !reflect.DeepEqual(a.Cpu, b.Cpu) { + return false + } + if !reflect.DeepEqual(a.Memory, b.Memory) { + return false + } + return true +} + type Percentile struct { Percentage int `json:"percentage"` Value uint64 `json:"value"` @@ -211,9 +346,28 @@ func NewSample(prev, current *ContainerStats) (*ContainerStatsSample, error) { if current.Cpu.Usage.Total < prev.Cpu.Usage.Total { return nil, fmt.Errorf("current CPU usage is less than prev CPU usage (cumulative).") } + + var percpu []uint64 + + if len(current.Cpu.Usage.PerCpu) > 0 { + curNumCpus := len(current.Cpu.Usage.PerCpu) + percpu = make([]uint64, curNumCpus) + + for i, currUsage := range current.Cpu.Usage.PerCpu { + var prevUsage uint64 = 0 + if i < len(prev.Cpu.Usage.PerCpu) { + prevUsage = prev.Cpu.Usage.PerCpu[i] + } + if currUsage < prevUsage { + return nil, fmt.Errorf("current per-core CPU usage is less than prev per-core CPU usage (cumulative).") + } + percpu[i] = currUsage - prevUsage + } + } sample := new(ContainerStatsSample) // Calculate the diff to get the CPU usage within the time interval. sample.Cpu.Usage = current.Cpu.Usage.Total - prev.Cpu.Usage.Total + sample.Cpu.PerCpuUsage = percpu // Memory usage is current memory usage sample.Memory.Usage = current.Memory.Usage sample.Timestamp = current.Timestamp diff --git a/third_party/src/github.com/google/cadvisor/info/container_test.go b/third_party/src/github.com/google/cadvisor/info/container_test.go index 4b275becc62..4b3c84a2423 100644 --- a/third_party/src/github.com/google/cadvisor/info/container_test.go +++ b/third_party/src/github.com/google/cadvisor/info/container_test.go @@ -15,6 +15,7 @@ package info import ( + "reflect" "testing" "time" ) @@ -230,3 +231,68 @@ func TestAddSampleWrongCpuUsage(t *testing.T) { t.Errorf("generated an unexpected sample: %+v", sample) } } + +func TestAddSampleHotPluggingCpu(t *testing.T) { + cpuPrevUsage := uint64(10) + cpuCurrentUsage := uint64(15) + memCurrentUsage := uint64(200) + prevTime := time.Now() + + prev := createStats(cpuPrevUsage, memCurrentUsage, prevTime) + current := createStats(cpuCurrentUsage, memCurrentUsage, prevTime.Add(1*time.Second)) + current.Cpu.Usage.PerCpu = append(current.Cpu.Usage.PerCpu, 10) + + sample, err := NewSample(prev, current) + if err != nil { + t.Errorf("should be able to generate a sample. but received error: %v", err) + } + if len(sample.Cpu.PerCpuUsage) != 2 { + t.Fatalf("Should have 2 cores.") + } + if sample.Cpu.PerCpuUsage[0] != cpuCurrentUsage-cpuPrevUsage { + t.Errorf("First cpu usage is %v. should be %v", sample.Cpu.PerCpuUsage[0], cpuCurrentUsage-cpuPrevUsage) + } + if sample.Cpu.PerCpuUsage[1] != 10 { + t.Errorf("Second cpu usage is %v. should be 10", sample.Cpu.PerCpuUsage[1]) + } +} + +func TestAddSampleHotUnpluggingCpu(t *testing.T) { + cpuPrevUsage := uint64(10) + cpuCurrentUsage := uint64(15) + memCurrentUsage := uint64(200) + prevTime := time.Now() + + prev := createStats(cpuPrevUsage, memCurrentUsage, prevTime) + current := createStats(cpuCurrentUsage, memCurrentUsage, prevTime.Add(1*time.Second)) + prev.Cpu.Usage.PerCpu = append(prev.Cpu.Usage.PerCpu, 10) + + sample, err := NewSample(prev, current) + if err != nil { + t.Errorf("should be able to generate a sample. but received error: %v", err) + } + if len(sample.Cpu.PerCpuUsage) != 1 { + t.Fatalf("Should have 1 cores.") + } + if sample.Cpu.PerCpuUsage[0] != cpuCurrentUsage-cpuPrevUsage { + t.Errorf("First cpu usage is %v. should be %v", sample.Cpu.PerCpuUsage[0], cpuCurrentUsage-cpuPrevUsage) + } +} + +func TestContainerStatsCopy(t *testing.T) { + stats := createStats(100, 101, time.Now()) + shadowStats := stats.Copy(nil) + if !reflect.DeepEqual(stats, shadowStats) { + t.Errorf("Copy() returned different object") + } + stats.Cpu.Usage.PerCpu[0] = shadowStats.Cpu.Usage.PerCpu[0] + 1 + stats.Cpu.Load = shadowStats.Cpu.Load + 1 + stats.Memory.Usage = shadowStats.Memory.Usage + 1 + if reflect.DeepEqual(stats, shadowStats) { + t.Errorf("Copy() did not deeply copy the object") + } + stats = shadowStats.Copy(stats) + if !reflect.DeepEqual(stats, shadowStats) { + t.Errorf("Copy() returned different object") + } +} diff --git a/third_party/src/github.com/google/cadvisor/info/test/datagen.go b/third_party/src/github.com/google/cadvisor/info/test/datagen.go new file mode 100644 index 00000000000..a1c0a3565ca --- /dev/null +++ b/third_party/src/github.com/google/cadvisor/info/test/datagen.go @@ -0,0 +1,127 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package test + +import ( + "fmt" + "math" + "math/rand" + "time" + + "github.com/google/cadvisor/info" +) + +func GenerateRandomStats(numStats, numCores int, duration time.Duration) []*info.ContainerStats { + ret := make([]*info.ContainerStats, numStats) + perCoreUsages := make([]uint64, numCores) + currentTime := time.Now() + for i := range perCoreUsages { + perCoreUsages[i] = uint64(rand.Int63n(1000)) + } + for i := 0; i < numStats; i++ { + stats := new(info.ContainerStats) + stats.Cpu = new(info.CpuStats) + stats.Memory = new(info.MemoryStats) + stats.Timestamp = currentTime + currentTime = currentTime.Add(duration) + + percore := make([]uint64, numCores) + for i := range perCoreUsages { + perCoreUsages[i] += uint64(rand.Int63n(1000)) + percore[i] = perCoreUsages[i] + stats.Cpu.Usage.Total += percore[i] + } + stats.Cpu.Usage.PerCpu = percore + stats.Cpu.Usage.User = stats.Cpu.Usage.Total + stats.Cpu.Usage.System = 0 + stats.Memory.Usage = uint64(rand.Int63n(4096)) + ret[i] = stats + } + return ret +} + +func GenerateRandomContainerSpec(numCores int) *info.ContainerSpec { + ret := &info.ContainerSpec{ + Cpu: &info.CpuSpec{}, + Memory: &info.MemorySpec{}, + } + ret.Cpu.Limit = uint64(1000 + rand.Int63n(2000)) + ret.Cpu.MaxLimit = uint64(1000 + rand.Int63n(2000)) + n := (numCores + 63) / 64 + ret.Cpu.Mask.Data = make([]uint64, n) + for i := 0; i < n; i++ { + ret.Cpu.Mask.Data[i] = math.MaxUint64 + } + + ret.Memory.Limit = uint64(4096 + rand.Int63n(4096)) + return ret +} + +func GenerateRandomContainerInfo(containerName string, numCores int, query *info.ContainerInfoRequest, duration time.Duration) *info.ContainerInfo { + stats := GenerateRandomStats(query.NumStats, numCores, duration) + samples, _ := NewSamplesFromStats(stats...) + if len(samples) > query.NumSamples { + samples = samples[:query.NumSamples] + } + cpuPercentiles := make([]info.Percentile, 0, len(query.CpuUsagePercentiles)) + + // TODO(monnand): This will generate percentiles where 50%tile data may + // be larger than 90%tile data. + for _, p := range query.CpuUsagePercentiles { + percentile := info.Percentile{p, uint64(rand.Int63n(1000))} + cpuPercentiles = append(cpuPercentiles, percentile) + } + memPercentiles := make([]info.Percentile, 0, len(query.MemoryUsagePercentages)) + for _, p := range query.MemoryUsagePercentages { + percentile := info.Percentile{p, uint64(rand.Int63n(1000))} + memPercentiles = append(memPercentiles, percentile) + } + + percentiles := &info.ContainerStatsPercentiles{ + MaxMemoryUsage: uint64(rand.Int63n(4096)), + MemoryUsagePercentiles: memPercentiles, + CpuUsagePercentiles: cpuPercentiles, + } + + spec := GenerateRandomContainerSpec(numCores) + + ret := &info.ContainerInfo{ + ContainerReference: info.ContainerReference{ + Name: containerName, + }, + Spec: spec, + StatsPercentiles: percentiles, + Samples: samples, + Stats: stats, + } + return ret +} + +func NewSamplesFromStats(stats ...*info.ContainerStats) ([]*info.ContainerStatsSample, error) { + if len(stats) < 2 { + return nil, nil + } + samples := make([]*info.ContainerStatsSample, 0, len(stats)-1) + for i, s := range stats[1:] { + prev := stats[i] + sample, err := info.NewSample(prev, s) + if err != nil { + return nil, fmt.Errorf("Unable to generate sample from %+v and %+v: %v", + prev, s, err) + } + samples = append(samples, sample) + } + return samples, nil +} diff --git a/third_party/src/github.com/google/cadvisor/info/version.go b/third_party/src/github.com/google/cadvisor/info/version.go index a00a0222fe6..96d233c9d24 100644 --- a/third_party/src/github.com/google/cadvisor/info/version.go +++ b/third_party/src/github.com/google/cadvisor/info/version.go @@ -15,4 +15,4 @@ package info // Version of cAdvisor. -const VERSION = "0.1.0" +const VERSION = "0.1.2" diff --git a/third_party/src/github.com/google/cadvisor/manager/container_test.go b/third_party/src/github.com/google/cadvisor/manager/container_test.go new file mode 100644 index 00000000000..c942eb81cab --- /dev/null +++ b/third_party/src/github.com/google/cadvisor/manager/container_test.go @@ -0,0 +1,237 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Per-container manager. + +package manager + +import ( + "fmt" + "reflect" + "testing" + "time" + + "github.com/google/cadvisor/container" + ctest "github.com/google/cadvisor/container/test" + "github.com/google/cadvisor/info" + itest "github.com/google/cadvisor/info/test" + "github.com/google/cadvisor/storage" + stest "github.com/google/cadvisor/storage/test" +) + +func createContainerDataAndSetHandler( + driver storage.StorageDriver, + f func(*ctest.MockContainerHandler), + t *testing.T, +) *containerData { + factory := &ctest.FactoryForMockContainerHandler{ + Name: "factoryForMockContainer", + PrepareContainerHandlerFunc: func(name string, handler *ctest.MockContainerHandler) { + handler.Name = name + f(handler) + }, + } + container.RegisterContainerHandlerFactory("/", factory) + + if driver == nil { + driver = &stest.MockStorageDriver{} + } + + ret, err := NewContainerData("/container", driver) + if err != nil { + t.Fatal(err) + } + return ret +} + +func TestContainerUpdateSubcontainers(t *testing.T) { + var handler *ctest.MockContainerHandler + subcontainers := []info.ContainerReference{ + {Name: "/container/ee0103"}, + {Name: "/container/abcd"}, + {Name: "/container/something"}, + } + cd := createContainerDataAndSetHandler( + nil, + func(h *ctest.MockContainerHandler) { + h.On("ListContainers", container.LIST_SELF).Return( + subcontainers, + nil, + ) + handler = h + }, + t, + ) + + err := cd.updateSubcontainers() + if err != nil { + t.Fatal(err) + } + + if len(cd.info.Subcontainers) != len(subcontainers) { + t.Errorf("Received %v subcontainers, should be %v", len(cd.info.Subcontainers), len(subcontainers)) + } + + for _, sub := range cd.info.Subcontainers { + found := false + for _, sub2 := range subcontainers { + if sub.Name == sub2.Name { + found = true + } + } + if !found { + t.Errorf("Received unknown sub container %v", sub) + } + } + + handler.AssertExpectations(t) +} + +func TestContainerUpdateSubcontainersWithError(t *testing.T) { + var handler *ctest.MockContainerHandler + cd := createContainerDataAndSetHandler( + nil, + func(h *ctest.MockContainerHandler) { + h.On("ListContainers", container.LIST_SELF).Return( + []info.ContainerReference{}, + fmt.Errorf("some error"), + ) + handler = h + }, + t, + ) + + err := cd.updateSubcontainers() + if err == nil { + t.Fatal("updateSubcontainers should return error") + } + if len(cd.info.Subcontainers) != 0 { + t.Errorf("Received %v subcontainers, should be 0", len(cd.info.Subcontainers)) + } + + handler.AssertExpectations(t) +} + +func TestContainerUpdateStats(t *testing.T) { + var handler *ctest.MockContainerHandler + var ref info.ContainerReference + + driver := &stest.MockStorageDriver{} + + statsList := itest.GenerateRandomStats(1, 4, 1*time.Second) + stats := statsList[0] + + cd := createContainerDataAndSetHandler( + driver, + func(h *ctest.MockContainerHandler) { + h.On("GetStats").Return( + stats, + nil, + ) + handler = h + ref.Name = h.Name + }, + t, + ) + + driver.On("AddStats", ref, stats).Return(nil) + + err := cd.updateStats() + if err != nil { + t.Fatal(err) + } + + handler.AssertExpectations(t) +} + +func TestContainerUpdateSpec(t *testing.T) { + var handler *ctest.MockContainerHandler + spec := itest.GenerateRandomContainerSpec(4) + cd := createContainerDataAndSetHandler( + nil, + func(h *ctest.MockContainerHandler) { + h.On("GetSpec").Return( + spec, + nil, + ) + handler = h + }, + t, + ) + + err := cd.updateSpec() + if err != nil { + t.Fatal(err) + } + + handler.AssertExpectations(t) +} + +func TestContainerGetInfo(t *testing.T) { + var handler *ctest.MockContainerHandler + spec := itest.GenerateRandomContainerSpec(4) + subcontainers := []info.ContainerReference{ + {Name: "/container/ee0103"}, + {Name: "/container/abcd"}, + {Name: "/container/something"}, + } + aliases := []string{"a1", "a2"} + cd := createContainerDataAndSetHandler( + nil, + func(h *ctest.MockContainerHandler) { + h.On("GetSpec").Return( + spec, + nil, + ) + h.On("ListContainers", container.LIST_SELF).Return( + subcontainers, + nil, + ) + h.Aliases = aliases + handler = h + }, + t, + ) + + info, err := cd.GetInfo() + if err != nil { + t.Fatal(err) + } + + handler.AssertExpectations(t) + + if len(info.Subcontainers) != len(subcontainers) { + t.Errorf("Received %v subcontainers, should be %v", len(info.Subcontainers), len(subcontainers)) + } + + for _, sub := range info.Subcontainers { + found := false + for _, sub2 := range subcontainers { + if sub.Name == sub2.Name { + found = true + } + } + if !found { + t.Errorf("Received unknown sub container %v", sub) + } + } + + if !reflect.DeepEqual(spec, info.Spec) { + t.Errorf("received wrong container spec") + } + + if info.Name != handler.Name { + t.Errorf("received wrong container name: received %v; should be %v", info.Name, handler.Name) + } +} diff --git a/third_party/src/github.com/google/cadvisor/manager/manager.go b/third_party/src/github.com/google/cadvisor/manager/manager.go index 1b0503551c4..696e0badbdf 100644 --- a/third_party/src/github.com/google/cadvisor/manager/manager.go +++ b/third_party/src/github.com/google/cadvisor/manager/manager.go @@ -30,7 +30,7 @@ type Manager interface { Start() error // Get information about a container. - GetContainerInfo(containerName string) (*info.ContainerInfo, error) + GetContainerInfo(containerName string, query *info.ContainerInfoRequest) (*info.ContainerInfo, error) // Get information about the machine. GetMachineInfo() (*info.MachineInfo, error) @@ -106,8 +106,8 @@ func (m *manager) Start() error { } // Get a container by name. -func (m *manager) GetContainerInfo(containerName string) (*info.ContainerInfo, error) { - log.Printf("Get(%s)", containerName) +func (m *manager) GetContainerInfo(containerName string, query *info.ContainerInfoRequest) (*info.ContainerInfo, error) { + log.Printf("Get(%s); %+v", containerName, query) var cont *containerData var ok bool func() { @@ -130,21 +130,21 @@ func (m *manager) GetContainerInfo(containerName string) (*info.ContainerInfo, e var percentiles *info.ContainerStatsPercentiles var samples []*info.ContainerStatsSample var stats []*info.ContainerStats - // TODO(monnand): These numbers should not be hard coded + query = query.FillDefaults() percentiles, err = m.storageDriver.Percentiles( cinfo.Name, - []int{50, 80, 90, 99}, - []int{50, 80, 90, 99}, + query.CpuUsagePercentiles, + query.MemoryUsagePercentages, ) if err != nil { return nil, err } - samples, err = m.storageDriver.Samples(cinfo.Name, 1024) + samples, err = m.storageDriver.Samples(cinfo.Name, query.NumSamples) if err != nil { return nil, err } - stats, err = m.storageDriver.RecentStats(cinfo.Name, 1024) + stats, err = m.storageDriver.RecentStats(cinfo.Name, query.NumStats) if err != nil { return nil, err } diff --git a/third_party/src/github.com/google/cadvisor/manager/manager_test.go b/third_party/src/github.com/google/cadvisor/manager/manager_test.go new file mode 100644 index 00000000000..99862c5db07 --- /dev/null +++ b/third_party/src/github.com/google/cadvisor/manager/manager_test.go @@ -0,0 +1,253 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Per-container manager. + +package manager + +import ( + "reflect" + "testing" + "time" + + "github.com/google/cadvisor/container" + ctest "github.com/google/cadvisor/container/test" + "github.com/google/cadvisor/info" + itest "github.com/google/cadvisor/info/test" + stest "github.com/google/cadvisor/storage/test" +) + +func createManagerAndAddContainers( + driver *stest.MockStorageDriver, + containers []string, + f func(*ctest.MockContainerHandler), + t *testing.T, +) *manager { + if driver == nil { + driver = &stest.MockStorageDriver{} + } + factory := &ctest.FactoryForMockContainerHandler{ + Name: "factoryForManager", + PrepareContainerHandlerFunc: func(name string, handler *ctest.MockContainerHandler) { + handler.Name = name + found := false + for _, c := range containers { + if c == name { + found = true + } + } + if !found { + t.Errorf("Asked to create a container with name %v, which is unknown.", name) + } + f(handler) + }, + } + container.RegisterContainerHandlerFactory("/", factory) + mif, err := New(driver) + if err != nil { + t.Fatal(err) + } + if ret, ok := mif.(*manager); ok { + for _, container := range containers { + ret.containers[container], err = NewContainerData(container, driver) + if err != nil { + t.Fatal(err) + } + } + return ret + } + t.Fatal("Wrong type") + return nil +} + +func TestGetContainerInfo(t *testing.T) { + containers := []string{ + "/c1", + "/c2", + } + + query := &info.ContainerInfoRequest{ + NumStats: 256, + NumSamples: 128, + CpuUsagePercentiles: []int{10, 50, 90}, + MemoryUsagePercentages: []int{10, 80, 90}, + } + + infosMap := make(map[string]*info.ContainerInfo, len(containers)) + handlerMap := make(map[string]*ctest.MockContainerHandler, len(containers)) + + for _, container := range containers { + infosMap[container] = itest.GenerateRandomContainerInfo(container, 4, query, 1*time.Second) + } + + driver := &stest.MockStorageDriver{} + m := createManagerAndAddContainers( + driver, + containers, + func(h *ctest.MockContainerHandler) { + cinfo := infosMap[h.Name] + stats := cinfo.Stats + samples := cinfo.Samples + percentiles := cinfo.StatsPercentiles + spec := cinfo.Spec + driver.On( + "Percentiles", + h.Name, + query.CpuUsagePercentiles, + query.MemoryUsagePercentages, + ).Return( + percentiles, + nil, + ) + + driver.On( + "Samples", + h.Name, + query.NumSamples, + ).Return( + samples, + nil, + ) + + driver.On( + "RecentStats", + h.Name, + query.NumStats, + ).Return( + stats, + nil, + ) + + h.On("ListContainers", container.LIST_SELF).Return( + []info.ContainerReference(nil), + nil, + ) + h.On("GetSpec").Return( + spec, + nil, + ) + handlerMap[h.Name] = h + }, + t, + ) + + returnedInfos := make(map[string]*info.ContainerInfo, len(containers)) + + for _, container := range containers { + cinfo, err := m.GetContainerInfo(container, query) + if err != nil { + t.Fatalf("Unable to get info for container %v: %v", container, err) + } + returnedInfos[container] = cinfo + } + + for container, handler := range handlerMap { + handler.AssertExpectations(t) + returned := returnedInfos[container] + expected := infosMap[container] + if !reflect.DeepEqual(returned, expected) { + t.Errorf("returned unexpected info for container %v; returned %+v; expected %+v", container, returned, expected) + } + } + +} + +func TestGetContainerInfoWithDefaultValue(t *testing.T) { + containers := []string{ + "/c1", + "/c2", + } + + var query *info.ContainerInfoRequest + query = query.FillDefaults() + + infosMap := make(map[string]*info.ContainerInfo, len(containers)) + handlerMap := make(map[string]*ctest.MockContainerHandler, len(containers)) + + for _, container := range containers { + infosMap[container] = itest.GenerateRandomContainerInfo(container, 4, query, 1*time.Second) + } + + driver := &stest.MockStorageDriver{} + m := createManagerAndAddContainers( + driver, + containers, + func(h *ctest.MockContainerHandler) { + cinfo := infosMap[h.Name] + stats := cinfo.Stats + samples := cinfo.Samples + percentiles := cinfo.StatsPercentiles + spec := cinfo.Spec + driver.On( + "Percentiles", + h.Name, + query.CpuUsagePercentiles, + query.MemoryUsagePercentages, + ).Return( + percentiles, + nil, + ) + + driver.On( + "Samples", + h.Name, + query.NumSamples, + ).Return( + samples, + nil, + ) + + driver.On( + "RecentStats", + h.Name, + query.NumStats, + ).Return( + stats, + nil, + ) + + h.On("ListContainers", container.LIST_SELF).Return( + []info.ContainerReference(nil), + nil, + ) + h.On("GetSpec").Return( + spec, + nil, + ) + handlerMap[h.Name] = h + }, + t, + ) + + returnedInfos := make(map[string]*info.ContainerInfo, len(containers)) + + for _, container := range containers { + // nil should give us default values + cinfo, err := m.GetContainerInfo(container, nil) + if err != nil { + t.Fatalf("Unable to get info for container %v: %v", container, err) + } + returnedInfos[container] = cinfo + } + + for container, handler := range handlerMap { + handler.AssertExpectations(t) + returned := returnedInfos[container] + expected := infosMap[container] + if !reflect.DeepEqual(returned, expected) { + t.Errorf("returned unexpected info for container %v; returned %+v; expected %+v", container, returned, expected) + } + } + +} diff --git a/third_party/src/github.com/google/cadvisor/pages/containers.go b/third_party/src/github.com/google/cadvisor/pages/containers.go index 58605a8bc8e..b3302b171e6 100644 --- a/third_party/src/github.com/google/cadvisor/pages/containers.go +++ b/third_party/src/github.com/google/cadvisor/pages/containers.go @@ -162,7 +162,11 @@ func ServerContainersPage(m manager.Manager, w http.ResponseWriter, u *url.URL) containerName := u.Path[len(ContainersPage)-1:] // Get the container. - cont, err := m.GetContainerInfo(containerName) + reqParams := info.ContainerInfoRequest{ + NumStats: 60, + NumSamples: 60, + } + cont, err := m.GetContainerInfo(containerName, &reqParams) if err != nil { return fmt.Errorf("Failed to get container \"%s\" with error: %s", containerName, err) } diff --git a/third_party/src/github.com/google/cadvisor/pages/static/containers_js.go b/third_party/src/github.com/google/cadvisor/pages/static/containers_js.go index b907b344d65..086c610b8f9 100644 --- a/third_party/src/github.com/google/cadvisor/pages/static/containers_js.go +++ b/third_party/src/github.com/google/cadvisor/pages/static/containers_js.go @@ -128,7 +128,7 @@ function drawCpuPerCoreUsage(elementId, machineInfo, stats) { elements.push(cur.timestamp); for (var j = 0; j < machineInfo.num_cores; j++) { // TODO(vmarmol): This assumes we sample every second, use the timestamps. - elements.push((cur.cpu.usage.per_cpu[j] - prev.cpu.usage.per_cpu[j]) / 1000000000); + elements.push((cur.cpu.usage.per_cpu_usage[j] - prev.cpu.usage.per_cpu_usage[j]) / 1000000000); } data.push(elements); } diff --git a/third_party/src/github.com/google/cadvisor/storage/influxdb/influxdb.go b/third_party/src/github.com/google/cadvisor/storage/influxdb/influxdb.go new file mode 100644 index 00000000000..99cb92c718f --- /dev/null +++ b/third_party/src/github.com/google/cadvisor/storage/influxdb/influxdb.go @@ -0,0 +1,498 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package influxdb + +import ( + "fmt" + "strconv" + "strings" + "time" + + "github.com/google/cadvisor/info" + "github.com/google/cadvisor/storage" + "github.com/influxdb/influxdb-go" +) + +type influxdbStorage struct { + client *influxdb.Client + prevStats *info.ContainerStats + machineName string + tableName string + windowLen time.Duration +} + +const ( + colTimestamp string = "timestamp" + colMachineName string = "machine" + colContainerName string = "container_name" + colCpuCumulativeUsage string = "cpu_cumulative_usage" + // Cumulative Cpu Usage in system mode + colCpuCumulativeUsageSystem string = "cpu_cumulative_usage_system" + // Cumulative Cpu Usage in user mode + colCpuCumulativeUsageUser string = "cpu_cumulative_usage_user" + // Memory Usage + colMemoryUsage string = "memory_usage" + // Working set size + colMemoryWorkingSet string = "memory_working_set" + // container page fault + colMemoryContainerPgfault string = "memory_container_pgfault" + // container major page fault + colMemoryContainerPgmajfault string = "memory_container_pgmajfault" + // hierarchical page fault + colMemoryHierarchicalPgfault string = "memory_hierarchical_pgfault" + // hierarchical major page fault + colMemoryHierarchicalPgmajfault string = "memory_hierarchical_pgmajfault" + // Cumulative per core usage + colPerCoreCumulativeUsagePrefix string = "per_core_cumulative_usage_core_" + // Optional: sample duration. Unit: Nanosecond. + colSampleDuration string = "sample_duration" + // Optional: Instant cpu usage + colCpuInstantUsage string = "cpu_instant_usage" + // Optional: Instant per core usage + colPerCoreInstantUsagePrefix string = "per_core_instant_usage_core_" +) + +func (self *influxdbStorage) containerStatsToValues( + ref info.ContainerReference, + stats *info.ContainerStats, +) (columns []string, values []interface{}) { + + // Timestamp + columns = append(columns, colTimestamp) + values = append(values, stats.Timestamp.Format(time.RFC3339Nano)) + + // Machine name + columns = append(columns, colMachineName) + values = append(values, self.machineName) + + // Container name + columns = append(columns, colContainerName) + values = append(values, ref.Name) + + // Cumulative Cpu Usage + columns = append(columns, colCpuCumulativeUsage) + values = append(values, stats.Cpu.Usage.Total) + + // Cumulative Cpu Usage in system mode + columns = append(columns, colCpuCumulativeUsageSystem) + values = append(values, stats.Cpu.Usage.System) + + // Cumulative Cpu Usage in user mode + columns = append(columns, colCpuCumulativeUsageUser) + values = append(values, stats.Cpu.Usage.User) + + // Memory Usage + columns = append(columns, colMemoryUsage) + values = append(values, stats.Memory.Usage) + + // Working set size + columns = append(columns, colMemoryWorkingSet) + values = append(values, stats.Memory.WorkingSet) + + // container page fault + columns = append(columns, colMemoryContainerPgfault) + values = append(values, stats.Memory.ContainerData.Pgfault) + + // container major page fault + columns = append(columns, colMemoryContainerPgmajfault) + values = append(values, stats.Memory.ContainerData.Pgmajfault) + + // hierarchical page fault + columns = append(columns, colMemoryHierarchicalPgfault) + values = append(values, stats.Memory.HierarchicalData.Pgfault) + + // hierarchical major page fault + columns = append(columns, colMemoryHierarchicalPgmajfault) + values = append(values, stats.Memory.HierarchicalData.Pgmajfault) + + // per cpu cumulative usage + for i, u := range stats.Cpu.Usage.PerCpu { + columns = append(columns, fmt.Sprintf("%v%v", colPerCoreCumulativeUsagePrefix, i)) + values = append(values, u) + } + + sample, err := info.NewSample(self.prevStats, stats) + if err != nil || sample == nil { + return columns, values + } + + // Optional: sample duration. Unit: Nanosecond. + columns = append(columns, colSampleDuration) + values = append(values, sample.Duration.String()) + + // Optional: Instant cpu usage + columns = append(columns, colCpuInstantUsage) + values = append(values, sample.Cpu.Usage) + + // Optional: Instant per core usage + for i, u := range sample.Cpu.PerCpuUsage { + columns = append(columns, fmt.Sprintf("%v%v", colPerCoreInstantUsagePrefix, i)) + values = append(values, u) + } + + return columns, values +} + +func convertToUint64(v interface{}) (uint64, error) { + if v == nil { + return 0, nil + } + switch x := v.(type) { + case uint64: + return x, nil + case int: + if x < 0 { + return 0, fmt.Errorf("negative value: %v", x) + } + return uint64(x), nil + case int32: + if x < 0 { + return 0, fmt.Errorf("negative value: %v", x) + } + return uint64(x), nil + case int64: + if x < 0 { + return 0, fmt.Errorf("negative value: %v", x) + } + return uint64(x), nil + case float64: + if x < 0 { + return 0, fmt.Errorf("negative value: %v", x) + } + return uint64(x), nil + case uint32: + return uint64(x), nil + } + return 0, fmt.Errorf("Unknown type") +} + +func (self *influxdbStorage) valuesToContainerStats(columns []string, values []interface{}) (*info.ContainerStats, error) { + stats := &info.ContainerStats{ + Cpu: &info.CpuStats{}, + Memory: &info.MemoryStats{}, + } + perCoreUsage := make(map[int]uint64, 32) + var err error + for i, col := range columns { + v := values[i] + switch { + case col == colTimestamp: + if str, ok := v.(string); ok { + stats.Timestamp, err = time.Parse(time.RFC3339Nano, str) + } + case col == colMachineName: + if m, ok := v.(string); ok { + if m != self.machineName { + return nil, fmt.Errorf("different machine") + } + } else { + return nil, fmt.Errorf("machine name field is not a string: %v", v) + } + // Cumulative Cpu Usage + case col == colCpuCumulativeUsage: + stats.Cpu.Usage.Total, err = convertToUint64(v) + // Cumulative Cpu used by the system + case col == colCpuCumulativeUsageSystem: + stats.Cpu.Usage.System, err = convertToUint64(v) + // Cumulative Cpu Usage in user mode + case col == colCpuCumulativeUsageUser: + stats.Cpu.Usage.User, err = convertToUint64(v) + // Memory Usage + case col == colMemoryUsage: + stats.Memory.Usage, err = convertToUint64(v) + // Working set size + case col == colMemoryWorkingSet: + stats.Memory.WorkingSet, err = convertToUint64(v) + // container page fault + case col == colMemoryContainerPgfault: + stats.Memory.ContainerData.Pgfault, err = convertToUint64(v) + // container major page fault + case col == colMemoryContainerPgmajfault: + stats.Memory.ContainerData.Pgmajfault, err = convertToUint64(v) + // hierarchical page fault + case col == colMemoryHierarchicalPgfault: + stats.Memory.HierarchicalData.Pgfault, err = convertToUint64(v) + // hierarchical major page fault + case col == colMemoryHierarchicalPgmajfault: + stats.Memory.HierarchicalData.Pgmajfault, err = convertToUint64(v) + case strings.HasPrefix(col, colPerCoreCumulativeUsagePrefix): + idxStr := col[len(colPerCoreCumulativeUsagePrefix):] + idx, err := strconv.Atoi(idxStr) + if err != nil { + continue + } + perCoreUsage[idx], err = convertToUint64(v) + } + if err != nil { + return nil, fmt.Errorf("column %v has invalid value %v: %v", col, v, err) + } + } + stats.Cpu.Usage.PerCpu = make([]uint64, len(perCoreUsage)) + for idx, usage := range perCoreUsage { + stats.Cpu.Usage.PerCpu[idx] = usage + } + return stats, nil +} + +func (self *influxdbStorage) valuesToContainerSample(columns []string, values []interface{}) (*info.ContainerStatsSample, error) { + sample := &info.ContainerStatsSample{} + perCoreUsage := make(map[int]uint64, 32) + var err error + for i, col := range columns { + v := values[i] + switch { + case col == colTimestamp: + if str, ok := v.(string); ok { + sample.Timestamp, err = time.Parse(time.RFC3339Nano, str) + } + case col == colMachineName: + if m, ok := v.(string); ok { + if m != self.machineName { + return nil, fmt.Errorf("different machine") + } + } else { + return nil, fmt.Errorf("machine name field is not a string: %v", v) + } + // Memory Usage + case col == colMemoryUsage: + sample.Memory.Usage, err = convertToUint64(v) + // sample duration. Unit: Nanosecond. + case col == colSampleDuration: + if v == nil { + // this record does not have sample_duration, so it's the first stats. + return nil, nil + } + sample.Duration, err = time.ParseDuration(v.(string)) + // Instant cpu usage + case col == colCpuInstantUsage: + sample.Cpu.Usage, err = convertToUint64(v) + case strings.HasPrefix(col, colPerCoreInstantUsagePrefix): + idxStr := col[len(colPerCoreInstantUsagePrefix):] + idx, err := strconv.Atoi(idxStr) + if err != nil { + continue + } + perCoreUsage[idx], err = convertToUint64(v) + } + if err != nil { + return nil, fmt.Errorf("column %v has invalid value %v: %v", col, v, err) + } + } + sample.Cpu.PerCpuUsage = make([]uint64, len(perCoreUsage)) + for idx, usage := range perCoreUsage { + sample.Cpu.PerCpuUsage[idx] = usage + } + if sample.Duration.Nanoseconds() == 0 { + return nil, nil + } + return sample, nil +} + +func (self *influxdbStorage) AddStats(ref info.ContainerReference, stats *info.ContainerStats) error { + series := &influxdb.Series{ + Name: self.tableName, + // There's only one point for each stats + Points: make([][]interface{}, 1), + } + if stats == nil || stats.Cpu == nil || stats.Memory == nil { + return nil + } + series.Columns, series.Points[0] = self.containerStatsToValues(ref, stats) + + self.prevStats = stats.Copy(self.prevStats) + err := self.client.WriteSeries([]*influxdb.Series{series}) + if err != nil { + return err + } + return nil +} + +func (self *influxdbStorage) RecentStats(containerName string, numStats int) ([]*info.ContainerStats, error) { + // TODO(dengnan): select only columns that we need + // TODO(dengnan): escape names + query := fmt.Sprintf("select * from %v where %v='%v' and %v='%v'", self.tableName, colContainerName, containerName, colMachineName, self.machineName) + if numStats > 0 { + query = fmt.Sprintf("%v limit %v", query, numStats) + } + series, err := self.client.Query(query) + if err != nil { + return nil, err + } + statsList := make([]*info.ContainerStats, 0, len(series)) + // By default, influxDB returns data in time descending order. + // RecentStats() requires stats in time increasing order, + // so we need to go through from the last one to the first one. + for i := len(series) - 1; i >= 0; i-- { + s := series[i] + for j := len(s.Points) - 1; j >= 0; j-- { + values := s.Points[j] + stats, err := self.valuesToContainerStats(s.Columns, values) + if err != nil { + return nil, err + } + if stats == nil { + continue + } + statsList = append(statsList, stats) + } + } + return statsList, nil +} + +func (self *influxdbStorage) Samples(containerName string, numSamples int) ([]*info.ContainerStatsSample, error) { + // TODO(dengnan): select only columns that we need + // TODO(dengnan): escape names + query := fmt.Sprintf("select * from %v where %v='%v' and %v='%v'", self.tableName, colContainerName, containerName, colMachineName, self.machineName) + if numSamples > 0 { + query = fmt.Sprintf("%v limit %v", query, numSamples) + } + series, err := self.client.Query(query) + if err != nil { + return nil, err + } + sampleList := make([]*info.ContainerStatsSample, 0, len(series)) + for i := len(series) - 1; i >= 0; i-- { + s := series[i] + for j := len(s.Points) - 1; j >= 0; j-- { + values := s.Points[j] + sample, err := self.valuesToContainerSample(s.Columns, values) + if err != nil { + return nil, err + } + if sample == nil { + continue + } + sampleList = append(sampleList, sample) + } + } + return sampleList, nil +} + +func (self *influxdbStorage) Close() error { + self.client = nil + return nil +} + +func (self *influxdbStorage) Percentiles( + containerName string, + cpuUsagePercentiles []int, + memUsagePercentiles []int, +) (*info.ContainerStatsPercentiles, error) { + selectedCol := make([]string, 0, len(cpuUsagePercentiles)+len(memUsagePercentiles)+1) + + selectedCol = append(selectedCol, fmt.Sprintf("max(%v)", colMemoryUsage)) + for _, p := range cpuUsagePercentiles { + selectedCol = append(selectedCol, fmt.Sprintf("percentile(%v, %v)", colCpuInstantUsage, p)) + } + for _, p := range memUsagePercentiles { + selectedCol = append(selectedCol, fmt.Sprintf("percentile(%v, %v)", colMemoryUsage, p)) + } + + query := fmt.Sprintf("select %v from %v where %v='%v' and %v='%v' and time > now() - %v", + strings.Join(selectedCol, ","), + self.tableName, + colContainerName, + containerName, + colMachineName, + self.machineName, + fmt.Sprintf("%vs", self.windowLen.Seconds()), + ) + series, err := self.client.Query(query) + if err != nil { + return nil, err + } + if len(series) != 1 { + return nil, nil + } + if len(series[0].Points) == 0 { + return nil, nil + } + + point := series[0].Points[0] + + ret := new(info.ContainerStatsPercentiles) + ret.MaxMemoryUsage, err = convertToUint64(point[1]) + if err != nil { + return nil, fmt.Errorf("invalid max memory usage: %v", err) + } + retrievedCpuPercentiles := point[2 : 2+len(cpuUsagePercentiles)] + for i, p := range cpuUsagePercentiles { + v, err := convertToUint64(retrievedCpuPercentiles[i]) + if err != nil { + return nil, fmt.Errorf("invalid cpu usage: %v", err) + } + ret.CpuUsagePercentiles = append( + ret.CpuUsagePercentiles, + info.Percentile{ + Percentage: p, + Value: v, + }, + ) + } + retrievedMemoryPercentiles := point[2+len(cpuUsagePercentiles):] + for i, p := range memUsagePercentiles { + v, err := convertToUint64(retrievedMemoryPercentiles[i]) + if err != nil { + return nil, fmt.Errorf("invalid memory usage: %v", err) + } + ret.MemoryUsagePercentiles = append( + ret.MemoryUsagePercentiles, + info.Percentile{ + Percentage: p, + Value: v, + }, + ) + } + return ret, nil +} + +// machineName: A unique identifier to identify the host that current cAdvisor +// instance is running on. +// influxdbHost: The host which runs influxdb. +// percentilesDuration: Time window which will be considered when calls Percentiles() +func New(machineName, + tablename, + database, + username, + password, + influxdbHost string, + isSecure bool, + percentilesDuration time.Duration, +) (storage.StorageDriver, error) { + config := &influxdb.ClientConfig{ + Host: influxdbHost, + Username: username, + Password: password, + Database: database, + IsSecure: isSecure, + } + client, err := influxdb.NewClient(config) + if err != nil { + return nil, err + } + // TODO(monnand): With go 1.3, we cannot compress data now. + client.DisableCompression() + if percentilesDuration.Seconds() < 1.0 { + percentilesDuration = 5 * time.Minute + } + + ret := &influxdbStorage{ + client: client, + windowLen: percentilesDuration, + machineName: machineName, + tableName: tablename, + } + return ret, nil +} diff --git a/third_party/src/github.com/google/cadvisor/storage/influxdb/influxdb_test.go b/third_party/src/github.com/google/cadvisor/storage/influxdb/influxdb_test.go new file mode 100644 index 00000000000..c5770d35d1b --- /dev/null +++ b/third_party/src/github.com/google/cadvisor/storage/influxdb/influxdb_test.go @@ -0,0 +1,136 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package influxdb + +import ( + "fmt" + "testing" + "time" + + "github.com/google/cadvisor/storage" + "github.com/google/cadvisor/storage/test" + "github.com/influxdb/influxdb-go" +) + +func runStorageTest(f func(storage.StorageDriver, *testing.T), t *testing.T) { + machineName := "machineA" + tablename := "t" + database := "cadvisor" + username := "root" + password := "root" + hostname := "localhost:8086" + percentilesDuration := 10 * time.Minute + rootConfig := &influxdb.ClientConfig{ + Host: hostname, + Username: username, + Password: password, + IsSecure: false, + } + rootClient, err := influxdb.NewClient(rootConfig) + if err != nil { + t.Fatal(err) + } + // create the data base first. + rootClient.CreateDatabase(database) + config := &influxdb.ClientConfig{ + Host: hostname, + Username: username, + Password: password, + Database: database, + IsSecure: false, + } + client, err := influxdb.NewClient(config) + if err != nil { + t.Fatal(err) + } + client.DisableCompression() + deleteAll := fmt.Sprintf("drop series %v", tablename) + _, err = client.Query(deleteAll) + if err != nil { + t.Fatal(err) + } + // delete all data by the end of the call + defer client.Query(deleteAll) + + driver, err := New(machineName, + tablename, + database, + username, + password, + hostname, + false, + percentilesDuration) + if err != nil { + t.Fatal(err) + } + // generate another container's data on same machine. + test.StorageDriverFillRandomStatsFunc("containerOnSameMachine", 100, driver, t) + + // generate another container's data on another machine. + driverForAnotherMachine, err := New("machineB", + tablename, + database, + username, + password, + hostname, + false, + percentilesDuration) + if err != nil { + t.Fatal(err) + } + defer driverForAnotherMachine.Close() + test.StorageDriverFillRandomStatsFunc("containerOnAnotherMachine", 100, driverForAnotherMachine, t) + f(driver, t) +} + +func TestSampleCpuUsage(t *testing.T) { + runStorageTest(test.StorageDriverTestSampleCpuUsage, t) +} + +func TestRetrievePartialRecentStats(t *testing.T) { + runStorageTest(test.StorageDriverTestRetrievePartialRecentStats, t) +} + +func TestSamplesWithoutSample(t *testing.T) { + runStorageTest(test.StorageDriverTestSamplesWithoutSample, t) +} + +func TestRetrieveAllRecentStats(t *testing.T) { + runStorageTest(test.StorageDriverTestRetrieveAllRecentStats, t) +} + +func TestNoRecentStats(t *testing.T) { + runStorageTest(test.StorageDriverTestNoRecentStats, t) +} + +func TestNoSamples(t *testing.T) { + runStorageTest(test.StorageDriverTestNoSamples, t) +} + +func TestPercentiles(t *testing.T) { + runStorageTest(test.StorageDriverTestPercentiles, t) +} + +func TestMaxMemoryUsage(t *testing.T) { + runStorageTest(test.StorageDriverTestMaxMemoryUsage, t) +} + +func TestPercentilesWithoutSample(t *testing.T) { + runStorageTest(test.StorageDriverTestPercentilesWithoutSample, t) +} + +func TestPercentilesWithoutStats(t *testing.T) { + runStorageTest(test.StorageDriverTestPercentilesWithoutStats, t) +} diff --git a/third_party/src/github.com/google/cadvisor/storage/memory/memory.go b/third_party/src/github.com/google/cadvisor/storage/memory/memory.go index dd353cc4634..d30a3df62c4 100644 --- a/third_party/src/github.com/google/cadvisor/storage/memory/memory.go +++ b/third_party/src/github.com/google/cadvisor/storage/memory/memory.go @@ -41,20 +41,7 @@ func (self *containerStorage) updatePrevStats(stats *info.ContainerStats) { self.prevStats = nil return } - if self.prevStats == nil { - self.prevStats = &info.ContainerStats{ - Cpu: &info.CpuStats{}, - Memory: &info.MemoryStats{}, - } - } - // make a deep copy. - self.prevStats.Timestamp = stats.Timestamp - *self.prevStats.Cpu = *stats.Cpu - self.prevStats.Cpu.Usage.PerCpu = make([]uint64, len(stats.Cpu.Usage.PerCpu)) - for i, perCpu := range stats.Cpu.Usage.PerCpu { - self.prevStats.Cpu.Usage.PerCpu[i] = perCpu - } - *self.prevStats.Memory = *stats.Memory + self.prevStats = stats.Copy(self.prevStats) } func (self *containerStorage) AddStats(stats *info.ContainerStats) error { @@ -75,9 +62,9 @@ func (self *containerStorage) AddStats(stats *info.ContainerStats) error { } } if self.recentStats.Len() >= self.maxNumStats { - self.recentStats.Remove(self.recentStats.Front()) + self.recentStats.Remove(self.recentStats.Back()) } - self.recentStats.PushBack(stats) + self.recentStats.PushFront(stats) self.updatePrevStats(stats) return nil } @@ -88,18 +75,25 @@ func (self *containerStorage) RecentStats(numStats int) ([]*info.ContainerStats, if self.recentStats.Len() < numStats || numStats < 0 { numStats = self.recentStats.Len() } - ret := make([]*info.ContainerStats, 0, numStats) + + // Stats in the recentStats list are stored in reverse chronological + // order, i.e. most recent stats is in the front. + // numStats will always <= recentStats.Len() so that there will be + // always at least numStats available stats to retrieve. We traverse + // the recentStats list from its head and fill the ret slice in + // reverse order so that the returned slice will be in chronological + // order. The order of the returned slice is not specified by the + // StorageDriver interface, so it is not necessary for other storage + // drivers to return the slice in the same order. + ret := make([]*info.ContainerStats, numStats) e := self.recentStats.Front() - for i := 0; i < numStats; i++ { + for i := numStats - 1; i >= 0; i-- { data, ok := e.Value.(*info.ContainerStats) if !ok { return nil, fmt.Errorf("The %vth element is not a ContainerStats", i) } - ret = append(ret, data) + ret[i] = data e = e.Next() - if e == nil { - break - } } return ret, nil } @@ -162,23 +156,34 @@ type InMemoryStorage struct { func (self *InMemoryStorage) AddStats(ref info.ContainerReference, stats *info.ContainerStats) error { var cstore *containerStorage var ok bool - self.lock.Lock() - if cstore, ok = self.containerStorageMap[ref.Name]; !ok { - cstore = newContainerStore(ref, self.maxNumSamples, self.maxNumStats) - self.containerStorageMap[ref.Name] = cstore - } - self.lock.Unlock() + + func() { + self.lock.Lock() + defer self.lock.Unlock() + if cstore, ok = self.containerStorageMap[ref.Name]; !ok { + cstore = newContainerStore(ref, self.maxNumSamples, self.maxNumStats) + self.containerStorageMap[ref.Name] = cstore + } + }() return cstore.AddStats(stats) } func (self *InMemoryStorage) Samples(name string, numSamples int) ([]*info.ContainerStatsSample, error) { var cstore *containerStorage var ok bool - self.lock.RLock() - if cstore, ok = self.containerStorageMap[name]; !ok { - return nil, fmt.Errorf("unable to find data for container %v", name) + + err := func() error { + self.lock.RLock() + defer self.lock.RUnlock() + if cstore, ok = self.containerStorageMap[name]; !ok { + return fmt.Errorf("unable to find data for container %v", name) + } + return nil + }() + + if err != nil { + return nil, err } - self.lock.RUnlock() return cstore.Samples(numSamples) } @@ -186,11 +191,17 @@ func (self *InMemoryStorage) Samples(name string, numSamples int) ([]*info.Conta func (self *InMemoryStorage) RecentStats(name string, numStats int) ([]*info.ContainerStats, error) { var cstore *containerStorage var ok bool - self.lock.RLock() - if cstore, ok = self.containerStorageMap[name]; !ok { - return nil, fmt.Errorf("unable to find data for container %v", name) + err := func() error { + self.lock.RLock() + defer self.lock.RUnlock() + if cstore, ok = self.containerStorageMap[name]; !ok { + return fmt.Errorf("unable to find data for container %v", name) + } + return nil + }() + if err != nil { + return nil, err } - self.lock.RUnlock() return cstore.RecentStats(numStats) } @@ -198,11 +209,17 @@ func (self *InMemoryStorage) RecentStats(name string, numStats int) ([]*info.Con func (self *InMemoryStorage) Percentiles(name string, cpuPercentiles, memPercentiles []int) (*info.ContainerStatsPercentiles, error) { var cstore *containerStorage var ok bool - self.lock.RLock() - if cstore, ok = self.containerStorageMap[name]; !ok { - return nil, fmt.Errorf("unable to find data for container %v", name) + err := func() error { + self.lock.RLock() + defer self.lock.RUnlock() + if cstore, ok = self.containerStorageMap[name]; !ok { + return fmt.Errorf("unable to find data for container %v", name) + } + return nil + }() + if err != nil { + return nil, err } - self.lock.RUnlock() return cstore.Percentiles(cpuPercentiles, memPercentiles) } diff --git a/third_party/src/github.com/google/cadvisor/storage/memory/memory_test.go b/third_party/src/github.com/google/cadvisor/storage/memory/memory_test.go index 98f9a6a8810..394e48f6a5f 100644 --- a/third_party/src/github.com/google/cadvisor/storage/memory/memory_test.go +++ b/third_party/src/github.com/google/cadvisor/storage/memory/memory_test.go @@ -44,6 +44,32 @@ func TestSamplesWithoutSample(t *testing.T) { runStorageTest(test.StorageDriverTestSamplesWithoutSample, t) } -func TestPercentilessWithoutSample(t *testing.T) { +func TestPercentilesWithoutSample(t *testing.T) { runStorageTest(test.StorageDriverTestPercentilesWithoutSample, t) } + +func TestPercentiles(t *testing.T) { + N := 100 + driver := New(N, N) + test.StorageDriverTestPercentiles(driver, t) +} + +func TestRetrievePartialRecentStats(t *testing.T) { + runStorageTest(test.StorageDriverTestRetrievePartialRecentStats, t) +} + +func TestRetrieveAllRecentStats(t *testing.T) { + runStorageTest(test.StorageDriverTestRetrieveAllRecentStats, t) +} + +func TestNoRecentStats(t *testing.T) { + runStorageTest(test.StorageDriverTestNoRecentStats, t) +} + +func TestNoSamples(t *testing.T) { + runStorageTest(test.StorageDriverTestNoSamples, t) +} + +func TestPercentilesWithoutStats(t *testing.T) { + runStorageTest(test.StorageDriverTestPercentilesWithoutStats, t) +} diff --git a/third_party/src/github.com/google/cadvisor/storage/storage.go b/third_party/src/github.com/google/cadvisor/storage/storage.go index 51f0b9c00f5..84ca0b3b610 100644 --- a/third_party/src/github.com/google/cadvisor/storage/storage.go +++ b/third_party/src/github.com/google/cadvisor/storage/storage.go @@ -21,7 +21,9 @@ type StorageDriver interface { // Read most recent stats. numStats indicates max number of stats // returned. The returned stats must be consecutive observed stats. If - // numStats < 0, then return all stats stored in the storage. + // numStats < 0, then return all stats stored in the storage. The + // returned stats should be sorted in time increasing order, i.e. Most + // recent stats should be the last. RecentStats(containerName string, numStats int) ([]*info.ContainerStats, error) // Read the specified percentiles of CPU and memory usage of the container. @@ -31,7 +33,7 @@ type StorageDriver interface { // Returns samples of the container stats. If numSamples < 0, then // the number of returned samples is implementation defined. Otherwise, the driver // should return at most numSamples samples. - Samples(containername string, numSamples int) ([]*info.ContainerStatsSample, error) + Samples(containerName string, numSamples int) ([]*info.ContainerStatsSample, error) // Close will clear the state of the storage driver. The elements // stored in the underlying storage may or may not be deleted depending diff --git a/third_party/src/github.com/google/cadvisor/storage/test/mock.go b/third_party/src/github.com/google/cadvisor/storage/test/mock.go new file mode 100644 index 00000000000..24fe2500948 --- /dev/null +++ b/third_party/src/github.com/google/cadvisor/storage/test/mock.go @@ -0,0 +1,57 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package test + +import ( + "github.com/google/cadvisor/info" + "github.com/stretchr/testify/mock" +) + +type MockStorageDriver struct { + mock.Mock + MockCloseMethod bool +} + +func (self *MockStorageDriver) AddStats(ref info.ContainerReference, stats *info.ContainerStats) error { + args := self.Called(ref, stats) + return args.Error(0) +} + +func (self *MockStorageDriver) RecentStats(containerName string, numStats int) ([]*info.ContainerStats, error) { + args := self.Called(containerName, numStats) + return args.Get(0).([]*info.ContainerStats), args.Error(1) +} + +func (self *MockStorageDriver) Percentiles( + containerName string, + cpuUsagePercentiles []int, + memUsagePercentiles []int, +) (*info.ContainerStatsPercentiles, error) { + args := self.Called(containerName, cpuUsagePercentiles, memUsagePercentiles) + return args.Get(0).(*info.ContainerStatsPercentiles), args.Error(1) +} + +func (self *MockStorageDriver) Samples(containerName string, numSamples int) ([]*info.ContainerStatsSample, error) { + args := self.Called(containerName, numSamples) + return args.Get(0).([]*info.ContainerStatsSample), args.Error(1) +} + +func (self *MockStorageDriver) Close() error { + if self.MockCloseMethod { + args := self.Called() + return args.Error(0) + } + return nil +} diff --git a/third_party/src/github.com/google/cadvisor/storage/test/storagetests.go b/third_party/src/github.com/google/cadvisor/storage/test/storagetests.go index e0f27f76aa7..8bad2c4d7a9 100644 --- a/third_party/src/github.com/google/cadvisor/storage/test/storagetests.go +++ b/third_party/src/github.com/google/cadvisor/storage/test/storagetests.go @@ -16,6 +16,7 @@ package test import ( "math/rand" + "reflect" "testing" "time" @@ -43,6 +44,7 @@ func buildTrace(cpu, mem []uint64, duration time.Duration) []*info.ContainerStat stats.Cpu.Usage.Total = cpuTotalUsage stats.Cpu.Usage.User = stats.Cpu.Usage.Total stats.Cpu.Usage.System = 0 + stats.Cpu.Usage.PerCpu = []uint64{cpuTotalUsage} stats.Memory.Usage = mem[i] @@ -51,10 +53,133 @@ func buildTrace(cpu, mem []uint64, duration time.Duration) []*info.ContainerStat return ret } -// The underlying driver must be able to hold more than 10 samples. +func timeEq(t1, t2 time.Time, tolerance time.Duration) bool { + // t1 should not be later than t2 + if t1.After(t2) { + t1, t2 = t2, t1 + } + diff := t2.Sub(t1) + if diff <= tolerance { + return true + } + return false +} + +func durationEq(a, b time.Duration, tolerance time.Duration) bool { + if a > b { + a, b = b, a + } + diff := a - b + if diff <= tolerance { + return true + } + return false +} + +const ( + // 10ms, i.e. 0.01s + timePrecision time.Duration = 10 * time.Millisecond +) + +// This function is useful because we do not require precise time +// representation. +func statsEq(a, b *info.ContainerStats) bool { + if !timeEq(a.Timestamp, b.Timestamp, timePrecision) { + return false + } + if !reflect.DeepEqual(a.Cpu, b.Cpu) { + return false + } + if !reflect.DeepEqual(a.Memory, b.Memory) { + return false + } + return true +} + +// This function is useful because we do not require precise time +// representation. +func sampleEq(a, b *info.ContainerStatsSample) bool { + if !timeEq(a.Timestamp, b.Timestamp, timePrecision) { + return false + } + if !durationEq(a.Duration, b.Duration, timePrecision) { + return false + } + if !reflect.DeepEqual(a.Cpu, b.Cpu) { + return false + } + if !reflect.DeepEqual(a.Memory, b.Memory) { + return false + } + return true +} + +func samplesInTrace(samples []*info.ContainerStatsSample, cpuTrace, memTrace []uint64, samplePeriod time.Duration, t *testing.T) { + for _, sample := range samples { + if sample.Duration != samplePeriod { + t.Errorf("sample duration is %v, not %v", sample.Duration, samplePeriod) + } + cpuUsage := sample.Cpu.Usage + memUsage := sample.Memory.Usage + found := false + for _, u := range cpuTrace { + if u == cpuUsage { + found = true + break + } + } + if !found { + t.Errorf("unable to find cpu usage %v", cpuUsage) + } + found = false + for _, u := range memTrace { + if u == memUsage { + found = true + break + } + } + if !found { + t.Errorf("unable to find mem usage %v", memUsage) + } + } +} + +// This function will generate random stats and write +// them into the storage. The function will not close the driver +func StorageDriverFillRandomStatsFunc( + containerName string, + N int, + driver storage.StorageDriver, + t *testing.T, +) { + cpuTrace := make([]uint64, 0, N) + memTrace := make([]uint64, 0, N) + + // We need N+1 observations to get N samples + for i := 0; i < N+1; i++ { + cpuTrace = append(cpuTrace, uint64(rand.Intn(1000))) + memTrace = append(memTrace, uint64(rand.Intn(1000))) + } + + samplePeriod := 1 * time.Second + + ref := info.ContainerReference{ + Name: containerName, + } + + trace := buildTrace(cpuTrace, memTrace, samplePeriod) + + for _, stats := range trace { + err := driver.AddStats(ref, stats) + if err != nil { + t.Fatalf("unable to add stats: %v", err) + } + } +} + func StorageDriverTestSampleCpuUsage(driver storage.StorageDriver, t *testing.T) { defer driver.Close() - N := 10 + N := 100 cpuTrace := make([]uint64, 0, N) memTrace := make([]uint64, 0, N) @@ -73,28 +198,37 @@ func StorageDriverTestSampleCpuUsage(driver storage.StorageDriver, t *testing.T) trace := buildTrace(cpuTrace, memTrace, samplePeriod) for _, stats := range trace { - driver.AddStats(ref, stats) + err := driver.AddStats(ref, stats) + if err != nil { + t.Fatalf("unable to add stats: %v", err) + } + // set the trace to something else. The stats stored in the + // storage should not be affected. + stats.Cpu.Usage.Total = 0 + stats.Cpu.Usage.System = 0 + stats.Cpu.Usage.User = 0 } samples, err := driver.Samples(ref.Name, N) if err != nil { t.Errorf("unable to sample stats: %v", err) } - for _, sample := range samples { - if sample.Duration != samplePeriod { - t.Errorf("sample duration is %v, not %v", sample.Duration, samplePeriod) - } - cpuUsage := sample.Cpu.Usage - found := false - for _, u := range cpuTrace { - if u == cpuUsage { - found = true - } - } - if !found { - t.Errorf("unable to find cpu usage %v", cpuUsage) - } + if len(samples) == 0 { + t.Fatal("should at least store one sample") } + samplesInTrace(samples, cpuTrace, memTrace, samplePeriod, t) + + samples, err = driver.Samples(ref.Name, -1) + if err != nil { + t.Errorf("unable to sample stats: %v", err) + } + samplesInTrace(samples, cpuTrace, memTrace, samplePeriod, t) + + samples, err = driver.Samples(ref.Name, N-5) + if err != nil { + t.Errorf("unable to sample stats: %v", err) + } + samplesInTrace(samples, cpuTrace, memTrace, samplePeriod, t) } func StorageDriverTestMaxMemoryUsage(driver storage.StorageDriver, t *testing.T) { @@ -114,7 +248,16 @@ func StorageDriverTestMaxMemoryUsage(driver storage.StorageDriver, t *testing.T) trace := buildTrace(cpuTrace, memTrace, 1*time.Second) for _, stats := range trace { - driver.AddStats(ref, stats) + err := driver.AddStats(ref, stats) + if err != nil { + t.Fatalf("unable to add stats: %v", err) + } + // set the trace to something else. The stats stored in the + // storage should not be affected. + stats.Cpu.Usage.Total = 0 + stats.Cpu.Usage.System = 0 + stats.Cpu.Usage.User = 0 + stats.Memory.Usage = 0 } percentiles, err := driver.Percentiles(ref.Name, []int{50}, []int{50}) @@ -168,3 +311,174 @@ func StorageDriverTestPercentilesWithoutSample(driver storage.StorageDriver, t * t.Errorf("There should be no percentiles") } } + +func StorageDriverTestPercentiles(driver storage.StorageDriver, t *testing.T) { + defer driver.Close() + N := 100 + cpuTrace := make([]uint64, N) + memTrace := make([]uint64, N) + for i := 1; i < N+1; i++ { + cpuTrace[i-1] = uint64(i) + memTrace[i-1] = uint64(i) + } + + trace := buildTrace(cpuTrace, memTrace, 1*time.Second) + + ref := info.ContainerReference{ + Name: "container", + } + for _, stats := range trace { + driver.AddStats(ref, stats) + } + percentages := []int{ + 80, + 90, + 50, + } + percentiles, err := driver.Percentiles(ref.Name, percentages, percentages) + if err != nil { + t.Fatal(err) + } + for _, x := range percentiles.CpuUsagePercentiles { + for _, y := range percentiles.CpuUsagePercentiles { + // lower percentage, smaller value + if x.Percentage < y.Percentage && x.Value > y.Value { + t.Errorf("%v percent is %v; while %v percent is %v", + x.Percentage, x.Value, y.Percentage, y.Value) + } + } + } + for _, x := range percentiles.MemoryUsagePercentiles { + for _, y := range percentiles.MemoryUsagePercentiles { + if x.Percentage < y.Percentage && x.Value > y.Value { + t.Errorf("%v percent is %v; while %v percent is %v", + x.Percentage, x.Value, y.Percentage, y.Value) + } + } + } +} + +func StorageDriverTestRetrievePartialRecentStats(driver storage.StorageDriver, t *testing.T) { + defer driver.Close() + N := 100 + memTrace := make([]uint64, N) + cpuTrace := make([]uint64, N) + for i := 0; i < N; i++ { + memTrace[i] = uint64(i + 1) + cpuTrace[i] = uint64(1) + } + + ref := info.ContainerReference{ + Name: "container", + } + + trace := buildTrace(cpuTrace, memTrace, 1*time.Second) + + for _, stats := range trace { + driver.AddStats(ref, stats) + } + + recentStats, err := driver.RecentStats(ref.Name, 10) + if err != nil { + t.Fatal(err) + } + if len(recentStats) == 0 { + t.Fatal("should at least store one stats") + } + + if len(recentStats) > 10 { + t.Fatalf("returned %v stats, not 10.", len(recentStats)) + } + + actualRecentStats := trace[len(trace)-len(recentStats):] + + // The returned stats should be sorted in time increasing order + for i, s := range actualRecentStats { + r := recentStats[i] + if !statsEq(s, r) { + t.Errorf("unexpected stats %+v with memory usage %v", r, r.Memory.Usage) + } + } +} + +func StorageDriverTestRetrieveAllRecentStats(driver storage.StorageDriver, t *testing.T) { + defer driver.Close() + N := 100 + memTrace := make([]uint64, N) + cpuTrace := make([]uint64, N) + for i := 0; i < N; i++ { + memTrace[i] = uint64(i + 1) + cpuTrace[i] = uint64(1) + } + + ref := info.ContainerReference{ + Name: "container", + } + + trace := buildTrace(cpuTrace, memTrace, 1*time.Second) + + for _, stats := range trace { + driver.AddStats(ref, stats) + } + + recentStats, err := driver.RecentStats(ref.Name, -1) + if err != nil { + t.Fatal(err) + } + if len(recentStats) == 0 { + t.Fatal("should at least store one stats") + } + if len(recentStats) > N { + t.Fatalf("returned %v stats, not 100.", len(recentStats)) + } + + actualRecentStats := trace[len(trace)-len(recentStats):] + + // The returned stats should be sorted in time increasing order + for i, s := range actualRecentStats { + r := recentStats[i] + if !statsEq(s, r) { + t.Errorf("unexpected stats %+v with memory usage %v", r, r.Memory.Usage) + } + } +} + +func StorageDriverTestNoRecentStats(driver storage.StorageDriver, t *testing.T) { + defer driver.Close() + nonExistContainer := "somerandomecontainer" + stats, _ := driver.RecentStats(nonExistContainer, -1) + if len(stats) > 0 { + t.Errorf("RecentStats() returns %v stats on non exist container", len(stats)) + } +} + +func StorageDriverTestNoSamples(driver storage.StorageDriver, t *testing.T) { + defer driver.Close() + nonExistContainer := "somerandomecontainer" + samples, _ := driver.Samples(nonExistContainer, -1) + if len(samples) > 0 { + t.Errorf("Samples() returns %v samples on non exist container", len(samples)) + } +} + +func StorageDriverTestPercentilesWithoutStats(driver storage.StorageDriver, t *testing.T) { + defer driver.Close() + nonExistContainer := "somerandomecontainer" + percentiles, _ := driver.Percentiles(nonExistContainer, []int{50, 80}, []int{50, 80}) + if percentiles == nil { + return + } + if percentiles.MaxMemoryUsage != 0 { + t.Errorf("Percentiles() reports max memory usage > 0 when there's no stats.") + } + for _, p := range percentiles.CpuUsagePercentiles { + if p.Value != 0 { + t.Errorf("Percentiles() reports cpu usage is %v when there's no stats.", p.Value) + } + } + for _, p := range percentiles.MemoryUsagePercentiles { + if p.Value != 0 { + t.Errorf("Percentiles() reports memory usage is %v when there's no stats.", p.Value) + } + } +} diff --git a/third_party/src/github.com/google/cadvisor/storagedriver.go b/third_party/src/github.com/google/cadvisor/storagedriver.go new file mode 100644 index 00000000000..3de3f85dd83 --- /dev/null +++ b/third_party/src/github.com/google/cadvisor/storagedriver.go @@ -0,0 +1,71 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "flag" + "fmt" + "os" + "time" + + "github.com/google/cadvisor/storage" + "github.com/google/cadvisor/storage/influxdb" + "github.com/google/cadvisor/storage/memory" +) + +var argSampleSize = flag.Int("samples", 1024, "number of samples we want to keep") +var argHistoryDuration = flag.Int("history_duration", 60, "number of seconds of container history to keep") +var argDbUsername = flag.String("storage_driver_user", "root", "database username") +var argDbPassword = flag.String("storage_driver_password", "root", "database password") +var argDbHost = flag.String("storage_driver_host", "localhost:8086", "database host:port") +var argDbName = flag.String("storage_driver_name", "cadvisor", "database name") +var argDbIsSecure = flag.Bool("storage_driver_secure", false, "use secure connection with database") + +func NewStorageDriver(driverName string) (storage.StorageDriver, error) { + var storageDriver storage.StorageDriver + var err error + switch driverName { + case "": + // empty string by default is the in memory store + fallthrough + case "memory": + storageDriver = memory.New(*argSampleSize, *argHistoryDuration) + return storageDriver, nil + case "influxdb": + var hostname string + hostname, err = os.Hostname() + if err != nil { + return nil, err + } + + storageDriver, err = influxdb.New( + hostname, + "cadvisorTable", + *argDbName, + *argDbUsername, + *argDbPassword, + *argDbHost, + *argDbIsSecure, + // TODO(monnand): One hour? Or user-defined? + 1*time.Hour, + ) + default: + err = fmt.Errorf("Unknown database driver: %v", *argDbDriver) + } + if err != nil { + return nil, err + } + return storageDriver, nil +}