Merge pull request #8246 from rrati/red-flag-pod-termination
Correct logic for failing after % of containers fail. #7790
This commit is contained in:
commit
ab0844840a
@ -69,6 +69,11 @@ type TestContextType struct {
|
|||||||
|
|
||||||
var testContext TestContextType
|
var testContext TestContextType
|
||||||
|
|
||||||
|
type ContainerFailures struct {
|
||||||
|
status *api.ContainerStateTerminated
|
||||||
|
restarts int
|
||||||
|
}
|
||||||
|
|
||||||
func Logf(format string, a ...interface{}) {
|
func Logf(format string, a ...interface{}) {
|
||||||
fmt.Fprintf(GinkgoWriter, "INFO: "+format+"\n", a...)
|
fmt.Fprintf(GinkgoWriter, "INFO: "+format+"\n", a...)
|
||||||
}
|
}
|
||||||
@ -555,6 +560,7 @@ func RunRC(c *client.Client, name string, ns, image string, replicas int) error
|
|||||||
pending := 0
|
pending := 0
|
||||||
unknown := 0
|
unknown := 0
|
||||||
inactive := 0
|
inactive := 0
|
||||||
|
failedContainers := 0
|
||||||
time.Sleep(10 * time.Second)
|
time.Sleep(10 * time.Second)
|
||||||
|
|
||||||
// TODO: Use a reflector both to put less strain on the cluster and
|
// TODO: Use a reflector both to put less strain on the cluster and
|
||||||
@ -566,8 +572,8 @@ func RunRC(c *client.Client, name string, ns, image string, replicas int) error
|
|||||||
for _, p := range currentPods.Items {
|
for _, p := range currentPods.Items {
|
||||||
if p.Status.Phase == api.PodRunning {
|
if p.Status.Phase == api.PodRunning {
|
||||||
current++
|
current++
|
||||||
if err := VerifyContainersAreNotFailed(p, maxContainerFailures); err != nil {
|
for _, v := range FailedContainers(p) {
|
||||||
return err
|
failedContainers = failedContainers + v.restarts
|
||||||
}
|
}
|
||||||
} else if p.Status.Phase == api.PodPending {
|
} else if p.Status.Phase == api.PodPending {
|
||||||
if p.Spec.Host == "" {
|
if p.Spec.Host == "" {
|
||||||
@ -618,6 +624,10 @@ func RunRC(c *client.Client, name string, ns, image string, replicas int) error
|
|||||||
}
|
}
|
||||||
last = current
|
last = current
|
||||||
oldPods = currentPods
|
oldPods = currentPods
|
||||||
|
|
||||||
|
if failedContainers > maxContainerFailures {
|
||||||
|
return fmt.Errorf("%d containers failed which is more than allowed %d", failedContainers, maxContainerFailures)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if current != replicas {
|
if current != replicas {
|
||||||
return fmt.Errorf("Only %d pods started out of %d", current, replicas)
|
return fmt.Errorf("Only %d pods started out of %d", current, replicas)
|
||||||
@ -686,35 +696,36 @@ func listPods(c *client.Client, namespace string, label labels.Selector, field f
|
|||||||
return pods, err
|
return pods, err
|
||||||
}
|
}
|
||||||
|
|
||||||
//VerifyContainersAreNotFailed confirms that containers didn't enter an invalid state.
|
// FailedContainers inspects all containers in a pod and returns failure
|
||||||
//For example, too many restarts, or non nill Termination, and so on.
|
// information for containers that have failed or been restarted.
|
||||||
func VerifyContainersAreNotFailed(pod api.Pod, restartMax int) error {
|
// A map is returned where the key is the containerID and the value is a
|
||||||
var errStrings []string
|
// struct containing the restart and failure information
|
||||||
|
func FailedContainers(pod api.Pod) map[string]ContainerFailures {
|
||||||
|
var state ContainerFailures
|
||||||
|
states := make(map[string]ContainerFailures)
|
||||||
|
|
||||||
statuses := pod.Status.ContainerStatuses
|
statuses := pod.Status.ContainerStatuses
|
||||||
if len(statuses) == 0 {
|
if len(statuses) == 0 {
|
||||||
return nil
|
return nil
|
||||||
} else {
|
} else {
|
||||||
for _, status := range statuses {
|
for _, status := range statuses {
|
||||||
var errormsg string = ""
|
|
||||||
if status.State.Termination != nil {
|
if status.State.Termination != nil {
|
||||||
errormsg = "status.State.Termination was nil"
|
states[status.ContainerID] = ContainerFailures{status: status.State.Termination}
|
||||||
} else if status.LastTerminationState.Termination != nil {
|
} else if status.LastTerminationState.Termination != nil {
|
||||||
errormsg = "status.LastTerminationState.Termination was nil"
|
states[status.ContainerID] = ContainerFailures{status: status.LastTerminationState.Termination}
|
||||||
} else if status.RestartCount > restartMax {
|
|
||||||
errormsg = fmt.Sprintf("restarted %d times", restartMax)
|
|
||||||
}
|
}
|
||||||
|
if status.RestartCount > 0 {
|
||||||
if len(errormsg) != 0 {
|
var ok bool
|
||||||
errStrings = append(errStrings, fmt.Sprintf("Error: Pod %s (host: %s) : Container w/ name %s status was bad (%v).", pod.Name, pod.Spec.Host, status.Name, errormsg))
|
if state, ok = states[status.ContainerID]; !ok {
|
||||||
|
state = ContainerFailures{}
|
||||||
|
}
|
||||||
|
state.restarts = status.RestartCount
|
||||||
|
states[status.ContainerID] = state
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(errStrings) > 0 {
|
return states
|
||||||
return fmt.Errorf(strings.Join(errStrings, "\n"))
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prints the histogram of the events and returns the number of bad events.
|
// Prints the histogram of the events and returns the number of bad events.
|
||||||
|
Loading…
Reference in New Issue
Block a user