103 lines
3.8 KiB
Go
103 lines
3.8 KiB
Go
/*
|
|
Copyright 2017 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package stackdriver
|
|
|
|
import (
|
|
"fmt"
|
|
"math"
|
|
"time"
|
|
|
|
"k8s.io/kubernetes/test/e2e/framework"
|
|
instrumentation "k8s.io/kubernetes/test/e2e/instrumentation/common"
|
|
"k8s.io/kubernetes/test/e2e/instrumentation/logging/utils"
|
|
|
|
"github.com/onsi/ginkgo"
|
|
)
|
|
|
|
const (
|
|
// maxAllowedLostFraction is the fraction of lost logs considered acceptable.
|
|
maxAllowedLostFraction = 0.01
|
|
// maxAllowedRestartsPerHour is the number of fluentd container restarts
|
|
// considered acceptable. Once per hour is fine for now, as long as it
|
|
// doesn't loose too much logs.
|
|
maxAllowedRestartsPerHour = 1.0
|
|
// lastPodIngestionSlack is the amount of time to wait for the last pod's
|
|
// logs to be ingested by the logging agent.
|
|
lastPodIngestionSlack = 5 * time.Minute
|
|
)
|
|
|
|
var _ = instrumentation.SIGDescribe("Cluster level logging implemented by Stackdriver [Feature:StackdriverLogging] [Soak]", func() {
|
|
f := framework.NewDefaultFramework("sd-logging-load")
|
|
|
|
ginkgo.It("should ingest logs from applications running for a prolonged amount of time", func() {
|
|
withLogProviderForScope(f, podsScope, func(p *sdLogProvider) {
|
|
nodes := framework.GetReadySchedulableNodesOrDie(f.ClientSet).Items
|
|
maxPodCount := 10
|
|
jobDuration := 30 * time.Minute
|
|
linesPerPodPerSecond := 100
|
|
// TODO(crassirostris): Increase to 21 hrs
|
|
testDuration := 3 * time.Hour
|
|
ingestionInterval := 1 * time.Minute
|
|
ingestionTimeout := testDuration + 30*time.Minute
|
|
allowedRestarts := int(math.Ceil(float64(testDuration) /
|
|
float64(time.Hour) * maxAllowedRestartsPerHour))
|
|
|
|
podRunDelay := time.Duration(int64(jobDuration) / int64(maxPodCount))
|
|
podRunCount := maxPodCount*(int(testDuration/jobDuration)-1) + 1
|
|
linesPerPod := linesPerPodPerSecond * int(jobDuration.Seconds())
|
|
|
|
// pods is a flat array of all pods to be run and to expect in Stackdriver.
|
|
pods := []utils.FiniteLoggingPod{}
|
|
// podsByRun is a two-dimensional array of pods, first dimension is the run
|
|
// index, the second dimension is the node index. Since we want to create
|
|
// an equal load on all nodes, for the same run we have one pod per node.
|
|
podsByRun := [][]utils.FiniteLoggingPod{}
|
|
for runIdx := 0; runIdx < podRunCount; runIdx++ {
|
|
podsInRun := []utils.FiniteLoggingPod{}
|
|
for nodeIdx, node := range nodes {
|
|
podName := fmt.Sprintf("job-logs-generator-%d-%d-%d-%d", maxPodCount, linesPerPod, runIdx, nodeIdx)
|
|
pod := utils.NewLoadLoggingPod(podName, node.Name, linesPerPod, jobDuration)
|
|
pods = append(pods, pod)
|
|
podsInRun = append(podsInRun, pod)
|
|
}
|
|
podsByRun = append(podsByRun, podsInRun)
|
|
}
|
|
|
|
ginkgo.By("Running short-living pods")
|
|
go func() {
|
|
t := time.NewTicker(podRunDelay)
|
|
defer t.Stop()
|
|
for runIdx := 0; runIdx < podRunCount; runIdx++ {
|
|
// Starting one pod on each node.
|
|
for _, pod := range podsByRun[runIdx] {
|
|
if err := pod.Start(f); err != nil {
|
|
framework.Logf("Failed to start pod: %v", err)
|
|
}
|
|
}
|
|
<-t.C
|
|
}
|
|
}()
|
|
|
|
checker := utils.NewFullIngestionPodLogChecker(p, maxAllowedLostFraction, pods...)
|
|
err := utils.WaitForLogs(checker, ingestionInterval, ingestionTimeout)
|
|
framework.ExpectNoError(err)
|
|
|
|
utils.EnsureLoggingAgentRestartsCount(f, p.LoggingAgentName(), allowedRestarts)
|
|
})
|
|
})
|
|
})
|