Revert "Revert "Gracefully delete pods from the Kubelet""

This reverts commit 98115facfd.
This commit is contained in:
Clayton Coleman
2015-06-02 20:36:58 -04:00
parent 919c7e94e2
commit b842a7dd15
60 changed files with 841 additions and 246 deletions

View File

@@ -1011,6 +1011,12 @@ func deepCopy_api_ObjectMeta(in ObjectMeta, out *ObjectMeta, c *conversion.Clone
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {

View File

@@ -59,6 +59,8 @@ func BeforeCreate(strategy RESTCreateStrategy, ctx api.Context, obj runtime.Obje
} else {
objectMeta.Namespace = api.NamespaceNone
}
objectMeta.DeletionTimestamp = nil
objectMeta.DeletionGracePeriodSeconds = nil
strategy.PrepareForCreate(obj)
api.FillObjectMetaSystemFields(ctx, objectMeta)
api.GenerateName(strategy, objectMeta)

View File

@@ -40,12 +40,37 @@ func BeforeDelete(strategy RESTDeleteStrategy, ctx api.Context, obj runtime.Obje
if strategy == nil {
return false, false, nil
}
_, _, kerr := objectMetaAndKind(strategy, obj)
objectMeta, _, kerr := objectMetaAndKind(strategy, obj)
if kerr != nil {
return false, false, kerr
}
// if the object is already being deleted
if objectMeta.DeletionTimestamp != nil {
// if we are already being deleted, we may only shorten the deletion grace period
// this means the object was gracefully deleted previously but deletionGracePeriodSeconds was not set,
// so we force deletion immediately
if objectMeta.DeletionGracePeriodSeconds == nil {
return false, false, nil
}
// only a shorter grace period may be provided by a user
if options.GracePeriodSeconds != nil {
period := int64(*options.GracePeriodSeconds)
if period > *objectMeta.DeletionGracePeriodSeconds {
return false, true, nil
}
objectMeta.DeletionGracePeriodSeconds = &period
options.GracePeriodSeconds = &period
return true, false, nil
}
// graceful deletion is pending, do nothing
options.GracePeriodSeconds = objectMeta.DeletionGracePeriodSeconds
return false, true, nil
}
if !strategy.CheckGracefulDelete(obj, options) {
return false, false, nil
}
objectMeta.DeletionGracePeriodSeconds = options.GracePeriodSeconds
return true, false, nil
}

View File

@@ -298,6 +298,22 @@ func (t *Tester) testUpdateFailsOnVersion(older runtime.Object) {
// =============================================================================
// Deletion tests.
func (t *Tester) TestDeleteNoGraceful(createFn func() runtime.Object, wasGracefulFn func() bool) {
existing := createFn()
objectMeta := t.getObjectMetaOrFail(existing)
ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace)
_, err := t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(10))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if _, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name); !errors.IsNotFound(err) {
t.Errorf("unexpected error, object should not exist: %v", err)
}
if wasGracefulFn() {
t.Errorf("resource should not support graceful delete")
}
}
func (t *Tester) testDeleteInvokesValidation(invalid ...runtime.Object) {
for i, obj := range invalid {
objectMeta := t.getObjectMetaOrFail(obj)
@@ -322,25 +338,16 @@ func (t *Tester) testDeleteNonExist(createFn func() runtime.Object) {
})
}
func (t *Tester) testDeleteNoGraceful(createFn func() runtime.Object, wasGracefulFn func() bool) {
existing := createFn()
objectMeta := t.getObjectMetaOrFail(existing)
ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace)
_, err := t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(10))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if _, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name); !errors.IsNotFound(err) {
t.Errorf("unexpected error, object should not exist: %v", err)
}
if wasGracefulFn() {
t.Errorf("resource should not support graceful delete")
}
}
// =============================================================================
// Graceful Deletion tests.
func (t *Tester) TestDeleteGraceful(createFn func() runtime.Object, expectedGrace int64, wasGracefulFn func() bool) {
t.TestDeleteGracefulHasDefault(createFn(), expectedGrace, wasGracefulFn)
t.TestDeleteGracefulWithValue(createFn(), expectedGrace, wasGracefulFn)
t.TestDeleteGracefulUsesZeroOnNil(createFn(), 0)
t.TestDeleteGracefulExtend(createFn(), expectedGrace, wasGracefulFn)
}
func (t *Tester) testDeleteGracefulHasDefault(existing runtime.Object, expectedGrace int64, wasGracefulFn func() bool) {
objectMeta := t.getObjectMetaOrFail(existing)
ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace)
@@ -348,12 +355,99 @@ func (t *Tester) testDeleteGracefulHasDefault(existing runtime.Object, expectedG
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if _, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name); err != nil {
if !wasGracefulFn() {
t.Errorf("did not gracefully delete resource")
return
}
object, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name)
if err != nil {
t.Errorf("unexpected error, object should exist: %v", err)
return
}
objectMeta, err = api.ObjectMetaFor(object)
if err != nil {
t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, object)
}
if objectMeta.DeletionTimestamp == nil {
t.Errorf("did not set deletion timestamp")
}
if objectMeta.DeletionGracePeriodSeconds == nil {
t.Fatalf("did not set deletion grace period seconds")
}
if *objectMeta.DeletionGracePeriodSeconds != expectedGrace {
t.Errorf("actual grace period does not match expected: %d", *objectMeta.DeletionGracePeriodSeconds)
}
}
func (t *Tester) TestDeleteGracefulWithValue(existing runtime.Object, expectedGrace int64, wasGracefulFn func() bool) {
objectMeta, err := api.ObjectMetaFor(existing)
if err != nil {
t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, existing)
}
ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace)
_, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(expectedGrace+2))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !wasGracefulFn() {
t.Errorf("did not gracefully delete resource")
}
object, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name)
if err != nil {
t.Errorf("unexpected error, object should exist: %v", err)
}
objectMeta, err = api.ObjectMetaFor(object)
if err != nil {
t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, object)
}
if objectMeta.DeletionTimestamp == nil {
t.Errorf("did not set deletion timestamp")
}
if objectMeta.DeletionGracePeriodSeconds == nil {
t.Fatalf("did not set deletion grace period seconds")
}
if *objectMeta.DeletionGracePeriodSeconds != expectedGrace+2 {
t.Errorf("actual grace period does not match expected: %d", *objectMeta.DeletionGracePeriodSeconds)
}
}
func (t *Tester) TestDeleteGracefulExtend(existing runtime.Object, expectedGrace int64, wasGracefulFn func() bool) {
objectMeta, err := api.ObjectMetaFor(existing)
if err != nil {
t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, existing)
}
ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace)
_, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(expectedGrace))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !wasGracefulFn() {
t.Errorf("did not gracefully delete resource")
}
// second delete duration is ignored
_, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(expectedGrace+2))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
object, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name)
if err != nil {
t.Errorf("unexpected error, object should exist: %v", err)
}
objectMeta, err = api.ObjectMetaFor(object)
if err != nil {
t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, object)
}
if objectMeta.DeletionTimestamp == nil {
t.Errorf("did not set deletion timestamp")
}
if objectMeta.DeletionGracePeriodSeconds == nil {
t.Fatalf("did not set deletion grace period seconds")
}
if *objectMeta.DeletionGracePeriodSeconds != expectedGrace {
t.Errorf("actual grace period does not match expected: %d", *objectMeta.DeletionGracePeriodSeconds)
}
}
func (t *Tester) testDeleteGracefulUsesZeroOnNil(existing runtime.Object, expectedGrace int64) {
@@ -364,7 +458,7 @@ func (t *Tester) testDeleteGracefulUsesZeroOnNil(existing runtime.Object, expect
t.Errorf("unexpected error: %v", err)
}
if _, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name); !errors.IsNotFound(err) {
t.Errorf("unexpected error, object should exist: %v", err)
t.Errorf("unexpected error, object should not exist: %v", err)
}
}

View File

@@ -151,6 +151,7 @@ func TestRoundTripTypes(t *testing.T) {
}
func TestEncode_Ptr(t *testing.T) {
grace := int64(30)
pod := &api.Pod{
ObjectMeta: api.ObjectMeta{
Labels: map[string]string{"name": "foo"},
@@ -158,6 +159,8 @@ func TestEncode_Ptr(t *testing.T) {
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
}
obj := runtime.Object(pod)

View File

@@ -89,6 +89,15 @@ func FuzzerFor(t *testing.T, version string, src rand.Source) *fuzz.Fuzzer {
j.LabelSelector, _ = labels.Parse("a=b")
j.FieldSelector, _ = fields.ParseSelector("a=b")
},
func(j *api.PodSpec, c fuzz.Continue) {
c.FuzzNoCustom(j)
// has a default value
ttl := int64(30)
if c.RandBool() {
ttl = int64(c.Uint32())
}
j.TerminationGracePeriodSeconds = &ttl
},
func(j *api.PodPhase, c fuzz.Continue) {
statuses := []api.PodPhase{api.PodPending, api.PodRunning, api.PodFailed, api.PodUnknown}
*j = statuses[c.Rand.Intn(len(statuses))]

View File

@@ -143,6 +143,10 @@ type ObjectMeta struct {
// will send a hard termination signal to the container.
DeletionTimestamp *util.Time `json:"deletionTimestamp,omitempty"`
// DeletionGracePeriodSeconds records the graceful deletion value set when graceful deletion
// was requested. Represents the most recent grace period, and may only be shortened once set.
DeletionGracePeriodSeconds *int64 `json:"deletionGracePeriodSeconds,omitempty"`
// Labels are key value pairs that may be used to scope and select individual resources.
// Label keys are of the form:
// label-key ::= prefixed-name | name

View File

@@ -1176,6 +1176,12 @@ func convert_api_ObjectMeta_To_v1_ObjectMeta(in *api.ObjectMeta, out *ObjectMeta
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {
@@ -3591,6 +3597,12 @@ func convert_v1_ObjectMeta_To_api_ObjectMeta(in *ObjectMeta, out *api.ObjectMeta
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {

View File

@@ -1010,6 +1010,12 @@ func deepCopy_v1_ObjectMeta(in ObjectMeta, out *ObjectMeta, c *conversion.Cloner
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {

View File

@@ -113,6 +113,10 @@ func addDefaultingFuncs() {
if obj.HostNetwork {
defaultHostNetworkPorts(&obj.Containers)
}
if obj.TerminationGracePeriodSeconds == nil {
period := int64(DefaultTerminationGracePeriodSeconds)
obj.TerminationGracePeriodSeconds = &period
}
},
func(obj *Probe) {
if obj.TimeoutSeconds == 0 {

View File

@@ -141,6 +141,10 @@ type ObjectMeta struct {
// will send a hard termination signal to the container.
DeletionTimestamp *util.Time `json:"deletionTimestamp,omitempty" description:"RFC 3339 date and time at which the object will be deleted; populated by the system when a graceful deletion is requested, read-only; if not set, graceful deletion of the object has not been requested; see http://releases.k8s.io/HEAD/docs/devel/api-conventions.md#metadata"`
// DeletionGracePeriodSeconds records the graceful deletion value set when graceful deletion
// was requested. Represents the most recent grace period, and may only be shortened once set.
DeletionGracePeriodSeconds *int64 `json:"deletionGracePeriodSeconds,omitempty" description:"number of seconds allowed for this object to gracefully terminate before it will be removed from the system; only set when deletionTimestamp is also set, read-only; may only be shortened"`
// Labels are key value pairs that may be used to scope and select individual resources.
// TODO: replace map[string]string with labels.LabelSet type
Labels map[string]string `json:"labels,omitempty" description:"map of string keys and values that can be used to organize and categorize objects; may match selectors of replication controllers and services; see http://releases.k8s.io/HEAD/docs/user-guide/labels.md"`
@@ -858,6 +862,8 @@ const (
// DNSDefault indicates that the pod should use the default (as
// determined by kubelet) DNS settings.
DNSDefault DNSPolicy = "Default"
DefaultTerminationGracePeriodSeconds = 30
)
// PodSpec is a description of a pod
@@ -872,7 +878,7 @@ type PodSpec struct {
// The grace period is the duration in seconds after the processes running in the pod are sent
// a termination signal and the time when the processes are forcibly halted with a kill signal.
// Set this value longer than the expected cleanup time for your process.
TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" description:"optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process"`
TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" description:"optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process; defaults to 30 seconds"`
ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty" description:"optional duration in seconds the pod may be active on the node relative to StartTime before the system will actively try to mark it failed and kill associated containers; value must be a positive integer"`
// Optional: Set DNS policy. Defaults to "ClusterFirst"
DNSPolicy DNSPolicy `json:"dnsPolicy,omitempty" description:"DNS policy for containers within the pod; one of 'ClusterFirst' or 'Default'"`

View File

@@ -265,6 +265,16 @@ func ValidateObjectMetaUpdate(new, old *api.ObjectMeta) errs.ValidationErrorList
} else {
new.CreationTimestamp = old.CreationTimestamp
}
// an object can never remove a deletion timestamp or clear/change grace period seconds
if !old.DeletionTimestamp.IsZero() {
new.DeletionTimestamp = old.DeletionTimestamp
}
if old.DeletionGracePeriodSeconds != nil && new.DeletionGracePeriodSeconds == nil {
new.DeletionGracePeriodSeconds = old.DeletionGracePeriodSeconds
}
if new.DeletionGracePeriodSeconds != nil && *new.DeletionGracePeriodSeconds != *old.DeletionGracePeriodSeconds {
allErrs = append(allErrs, errs.NewFieldInvalid("deletionGracePeriodSeconds", new.DeletionGracePeriodSeconds, "field is immutable; may only be changed via deletion"))
}
// Reject updates that don't specify a resource version
if new.ResourceVersion == "" {

View File

@@ -322,7 +322,8 @@ func FilterActivePods(pods []api.Pod) []*api.Pod {
var result []*api.Pod
for i := range pods {
if api.PodSucceeded != pods[i].Status.Phase &&
api.PodFailed != pods[i].Status.Phase {
api.PodFailed != pods[i].Status.Phase &&
pods[i].DeletionTimestamp == nil {
result = append(result, &pods[i])
}
}

View File

@@ -310,7 +310,11 @@ func (e *EndpointController) syncService(key string) {
continue
}
if len(pod.Status.PodIP) == 0 {
glog.V(4).Infof("Failed to find an IP for pod %s/%s", pod.Namespace, pod.Name)
glog.V(5).Infof("Failed to find an IP for pod %s/%s", pod.Namespace, pod.Name)
continue
}
if pod.DeletionTimestamp != nil {
glog.V(5).Infof("Pod is being deleted %s/%s", pod.Namespace, pod.Name)
continue
}

View File

@@ -213,6 +213,12 @@ func (rm *ReplicationManager) getPodController(pod *api.Pod) *api.ReplicationCon
// When a pod is created, enqueue the controller that manages it and update it's expectations.
func (rm *ReplicationManager) addPod(obj interface{}) {
pod := obj.(*api.Pod)
if pod.DeletionTimestamp != nil {
// on a restart of the controller manager, it's possible a new pod shows up in a state that
// is already pending deletion. Prevent the pod from being a creation observation.
rm.deletePod(pod)
return
}
if rc := rm.getPodController(pod); rc != nil {
rcKey, err := controller.KeyFunc(rc)
if err != nil {
@@ -234,6 +240,15 @@ func (rm *ReplicationManager) updatePod(old, cur interface{}) {
}
// TODO: Write a unittest for this case
curPod := cur.(*api.Pod)
if curPod.DeletionTimestamp != nil {
// when a pod is deleted gracefully it's deletion timestamp is first modified to reflect a grace period,
// and after such time has passed, the kubelet actually deletes it from the store. We receive an update
// for modification of the deletion timestamp and expect an rc to create more replicas asap, not wait
// until the kubelet actually deletes the pod. This is different from the Phase of a pod changing, because
// an rc never initiates a phase change, and so is never asleep waiting for the same.
rm.deletePod(curPod)
return
}
if rc := rm.getPodController(curPod); rc != nil {
rm.enqueueController(rc)
}

View File

@@ -38,6 +38,7 @@ import (
)
func testData() (*api.PodList, *api.ServiceList, *api.ReplicationControllerList) {
grace := int64(30)
pods := &api.PodList{
ListMeta: api.ListMeta{
ResourceVersion: "15",
@@ -46,15 +47,17 @@ func testData() (*api.PodList, *api.ServiceList, *api.ReplicationControllerList)
{
ObjectMeta: api.ObjectMeta{Name: "foo", Namespace: "test", ResourceVersion: "10"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "bar", Namespace: "test", ResourceVersion: "11"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},
@@ -536,6 +539,7 @@ func TestGetMultipleTypeObjectsWithDirectReference(t *testing.T) {
}
}
func watchTestData() ([]api.Pod, []watch.Event) {
grace := int64(30)
pods := []api.Pod{
{
ObjectMeta: api.ObjectMeta{
@@ -544,8 +548,9 @@ func watchTestData() ([]api.Pod, []watch.Event) {
ResourceVersion: "10",
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
}
@@ -559,8 +564,9 @@ func watchTestData() ([]api.Pod, []watch.Event) {
ResourceVersion: "11",
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},
@@ -573,8 +579,9 @@ func watchTestData() ([]api.Pod, []watch.Event) {
ResourceVersion: "12",
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},

View File

@@ -34,6 +34,7 @@ import (
)
func TestMerge(t *testing.T) {
grace := int64(30)
tests := []struct {
obj runtime.Object
fragment string
@@ -54,8 +55,9 @@ func TestMerge(t *testing.T) {
Name: "foo",
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},
@@ -122,8 +124,9 @@ func TestMerge(t *testing.T) {
VolumeSource: api.VolumeSource{EmptyDir: &api.EmptyDirVolumeSource{}},
},
},
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},

View File

@@ -417,7 +417,12 @@ func describePod(pod *api.Pod, rcs []api.ReplicationController, events *api.Even
fmt.Fprintf(out, "Image(s):\t%s\n", makeImageList(&pod.Spec))
fmt.Fprintf(out, "Node:\t%s\n", pod.Spec.NodeName+"/"+pod.Status.HostIP)
fmt.Fprintf(out, "Labels:\t%s\n", formatLabels(pod.Labels))
fmt.Fprintf(out, "Status:\t%s\n", string(pod.Status.Phase))
if pod.DeletionTimestamp != nil {
fmt.Fprintf(out, "Status:\tTerminating (expires %s)\n", pod.DeletionTimestamp.Time.Format(time.RFC1123Z))
fmt.Fprintf(out, "Termination Grace Period:\t%ss\n", pod.DeletionGracePeriodSeconds)
} else {
fmt.Fprintf(out, "Status:\t%s\n", string(pod.Status.Phase))
}
fmt.Fprintf(out, "Reason:\t%s\n", pod.Status.Reason)
fmt.Fprintf(out, "Message:\t%s\n", pod.Status.Message)
fmt.Fprintf(out, "IP:\t%s\n", pod.Status.PodIP)

View File

@@ -83,6 +83,7 @@ func fakeClientWith(testName string, t *testing.T, data map[string]string) Clien
}
func testData() (*api.PodList, *api.ServiceList) {
grace := int64(30)
pods := &api.PodList{
ListMeta: api.ListMeta{
ResourceVersion: "15",
@@ -91,15 +92,17 @@ func testData() (*api.PodList, *api.ServiceList) {
{
ObjectMeta: api.ObjectMeta{Name: "foo", Namespace: "test", ResourceVersion: "10"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "bar", Namespace: "test", ResourceVersion: "11"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},

View File

@@ -128,6 +128,7 @@ func TestHelperCreate(t *testing.T) {
return true
}
grace := int64(30)
tests := []struct {
Resp *http.Response
RespFunc client.HTTPClientFunc
@@ -172,8 +173,9 @@ func TestHelperCreate(t *testing.T) {
ExpectObject: &api.Pod{
ObjectMeta: api.ObjectMeta{Name: "foo"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
Resp: &http.Response{StatusCode: http.StatusOK, Body: objBody(&api.Status{Status: api.StatusSuccess})},
@@ -381,6 +383,7 @@ func TestHelperReplace(t *testing.T) {
return true
}
grace := int64(30)
tests := []struct {
Resp *http.Response
RespFunc client.HTTPClientFunc
@@ -418,8 +421,9 @@ func TestHelperReplace(t *testing.T) {
ExpectObject: &api.Pod{
ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "10"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
Overwrite: true,

View File

@@ -410,6 +410,9 @@ func printPod(pod *api.Pod, w io.Writer, withNamespace bool, wide bool, columnLa
readyContainers++
}
}
if pod.DeletionTimestamp != nil {
reason = "Terminating"
}
if withNamespace {
if _, err := fmt.Fprintf(w, "%s\t", namespace); err != nil {

View File

@@ -880,6 +880,7 @@ func TestUpdateExistingReplicationController(t *testing.T) {
func TestUpdateWithRetries(t *testing.T) {
codec := testapi.Codec()
grace := int64(30)
rc := &api.ReplicationController{
ObjectMeta: api.ObjectMeta{Name: "rc",
Labels: map[string]string{
@@ -897,8 +898,9 @@ func TestUpdateWithRetries(t *testing.T) {
},
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},

View File

@@ -31,6 +31,7 @@ import (
func noDefault(*api.Pod) error { return nil }
func TestDecodeSinglePod(t *testing.T) {
grace := int64(30)
pod := &api.Pod{
TypeMeta: api.TypeMeta{
APIVersion: "",
@@ -41,8 +42,9 @@ func TestDecodeSinglePod(t *testing.T) {
Namespace: "mynamespace",
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
Containers: []api.Container{{
Name: "image",
Image: "test/image",
@@ -91,6 +93,7 @@ func TestDecodeSinglePod(t *testing.T) {
}
func TestDecodePodList(t *testing.T) {
grace := int64(30)
pod := &api.Pod{
TypeMeta: api.TypeMeta{
APIVersion: "",
@@ -101,8 +104,9 @@ func TestDecodePodList(t *testing.T) {
Namespace: "mynamespace",
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
Containers: []api.Container{{
Name: "image",
Image: "test/image",

View File

@@ -217,9 +217,8 @@ func (s *podStorage) merge(source string, change interface{}) (adds, updates, de
for _, ref := range filtered {
name := kubecontainer.GetPodFullName(ref)
if existing, found := pods[name]; found {
if !reflect.DeepEqual(existing.Spec, ref.Spec) {
if checkAndUpdatePod(existing, ref) {
// this is an update
existing.Spec = ref.Spec
updates.Pods = append(updates.Pods, existing)
continue
}
@@ -261,9 +260,8 @@ func (s *podStorage) merge(source string, change interface{}) (adds, updates, de
name := kubecontainer.GetPodFullName(ref)
if existing, found := oldPods[name]; found {
pods[name] = existing
if !reflect.DeepEqual(existing.Spec, ref.Spec) {
if checkAndUpdatePod(existing, ref) {
// this is an update
existing.Spec = ref.Spec
updates.Pods = append(updates.Pods, existing)
continue
}
@@ -335,6 +333,23 @@ func filterInvalidPods(pods []*api.Pod, source string, recorder record.EventReco
return
}
// checkAndUpdatePod updates existing if ref makes a meaningful change and returns true, or
// returns false if there was no update.
func checkAndUpdatePod(existing, ref *api.Pod) bool {
// TODO: it would be better to update the whole object and only preserve certain things
// like the source annotation or the UID (to ensure safety)
if reflect.DeepEqual(existing.Spec, ref.Spec) &&
reflect.DeepEqual(existing.DeletionTimestamp, ref.DeletionTimestamp) &&
reflect.DeepEqual(existing.DeletionGracePeriodSeconds, ref.DeletionGracePeriodSeconds) {
return false
}
// this is an update
existing.Spec = ref.Spec
existing.DeletionTimestamp = ref.DeletionTimestamp
existing.DeletionGracePeriodSeconds = ref.DeletionGracePeriodSeconds
return true
}
// Sync sends a copy of the current state through the update channel.
func (s *podStorage) Sync() {
s.updateLock.Lock()

View File

@@ -69,6 +69,7 @@ func writeTestFile(t *testing.T, dir, name string, contents string) *os.File {
func TestReadPodsFromFile(t *testing.T) {
hostname := "random-test-hostname"
grace := int64(30)
var testCases = []struct {
desc string
pod runtime.Object
@@ -98,9 +99,10 @@ func TestReadPodsFromFile(t *testing.T) {
SelfLink: getSelfLink("test-"+hostname, "mynamespace"),
},
Spec: api.PodSpec{
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
Containers: []api.Container{{
Name: "image",
Image: "test/image",

View File

@@ -123,6 +123,7 @@ func TestExtractInvalidPods(t *testing.T) {
func TestExtractPodsFromHTTP(t *testing.T) {
hostname := "different-value"
grace := int64(30)
var testCases = []struct {
desc string
pods runtime.Object
@@ -156,9 +157,11 @@ func TestExtractPodsFromHTTP(t *testing.T) {
SelfLink: getSelfLink("foo-"+hostname, "mynamespace"),
},
Spec: api.PodSpec{
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
Containers: []api.Container{{
Name: "1",
Image: "foo",
@@ -209,9 +212,11 @@ func TestExtractPodsFromHTTP(t *testing.T) {
SelfLink: getSelfLink("foo-"+hostname, kubelet.NamespaceDefault),
},
Spec: api.PodSpec{
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
Containers: []api.Container{{
Name: "1",
Image: "foo",
@@ -229,9 +234,11 @@ func TestExtractPodsFromHTTP(t *testing.T) {
SelfLink: getSelfLink("bar-"+hostname, kubelet.NamespaceDefault),
},
Spec: api.PodSpec{
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
Containers: []api.Container{{
Name: "2",
Image: "bar",

View File

@@ -163,13 +163,13 @@ func (f *FakeRuntime) SyncPod(pod *api.Pod, _ Pod, _ api.PodStatus, _ []api.Secr
return f.Err
}
func (f *FakeRuntime) KillPod(pod Pod) error {
func (f *FakeRuntime) KillPod(pod *api.Pod, runningPod Pod) error {
f.Lock()
defer f.Unlock()
f.CalledFunctions = append(f.CalledFunctions, "KillPod")
f.KilledPods = append(f.KilledPods, string(pod.ID))
for _, c := range pod.Containers {
f.KilledPods = append(f.KilledPods, string(runningPod.ID))
for _, c := range runningPod.Containers {
f.KilledContainers = append(f.KilledContainers, c.Name)
}
return f.Err

View File

@@ -54,8 +54,8 @@ type Runtime interface {
GetPods(all bool) ([]*Pod, error)
// Syncs the running pod into the desired pod.
SyncPod(pod *api.Pod, runningPod Pod, podStatus api.PodStatus, pullSecrets []api.Secret) error
// KillPod kills all the containers of a pod.
KillPod(pod Pod) error
// KillPod kills all the containers of a pod. Pod may be nil, running pod must not be.
KillPod(pod *api.Pod, runningPod Pod) error
// GetPodStatus retrieves the status of the pod, including the information of
// all containers in the pod. Clients of this interface assume the containers
// statuses in a pod always have a deterministic ordering (eg: sorted by name).

View File

@@ -56,12 +56,21 @@ import (
const (
maxReasonCacheEntries = 200
kubernetesPodLabel = "io.kubernetes.pod.data"
kubernetesContainerLabel = "io.kubernetes.container.name"
// ndots specifies the minimum number of dots that a domain name must contain for the resolver to consider it as FQDN (fully-qualified)
// we want to able to consider SRV lookup names like _dns._udp.kube-dns.default.svc to be considered relative.
// hence, setting ndots to be 5.
ndotsDNSOption = "options ndots:5\n"
// In order to avoid unnecessary SIGKILLs, give every container a minimum grace
// period after SIGTERM. Docker will guarantee the termination, but SIGTERM is
// potentially dangerous.
// TODO: evaluate whether there are scenarios in which SIGKILL is preferable to
// SIGTERM for certain process types, which may justify setting this to 0.
minimumGracePeriodInSeconds = 2
kubernetesNameLabel = "io.kubernetes.pod.name"
kubernetesPodLabel = "io.kubernetes.pod.data"
kubernetesTerminationGracePeriodLabel = "io.kubernetes.pod.terminationGracePeriod"
kubernetesContainerLabel = "io.kubernetes.container.name"
)
// DockerManager implements the Runtime interface.
@@ -588,12 +597,19 @@ func (dm *DockerManager) runContainer(
if len(containerHostname) > hostnameMaxLen {
containerHostname = containerHostname[:hostnameMaxLen]
}
// Pod information is recorded on the container as labels to preserve it in the event the pod is deleted
// while the Kubelet is down and there is no information available to recover the pod. This includes
// termination information like the termination grace period and the pre stop hooks.
// TODO: keep these labels up to date if the pod changes
namespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}
labels := map[string]string{
"io.kubernetes.pod.name": namespacedName.String(),
kubernetesNameLabel: namespacedName.String(),
}
if pod.Spec.TerminationGracePeriodSeconds != nil {
labels[kubernetesTerminationGracePeriodLabel] = strconv.FormatInt(*pod.Spec.TerminationGracePeriodSeconds, 10)
}
if container.Lifecycle != nil && container.Lifecycle.PreStop != nil {
glog.V(1).Infof("Setting preStop hook")
// TODO: This is kind of hacky, we should really just encode the bits we need.
data, err := latest.Codec.Encode(pod)
if err != nil {
@@ -1104,40 +1120,56 @@ func (dm *DockerManager) PortForward(pod *kubecontainer.Pod, port uint16, stream
}
// Kills all containers in the specified pod
func (dm *DockerManager) KillPod(pod kubecontainer.Pod) error {
func (dm *DockerManager) KillPod(pod *api.Pod, runningPod kubecontainer.Pod) error {
// Send the kills in parallel since they may take a long time. Len + 1 since there
// can be Len errors + the networkPlugin teardown error.
errs := make(chan error, len(pod.Containers)+1)
errs := make(chan error, len(runningPod.Containers)+1)
wg := sync.WaitGroup{}
var networkID types.UID
for _, container := range pod.Containers {
var (
networkContainer *kubecontainer.Container
networkSpec *api.Container
)
for _, container := range runningPod.Containers {
wg.Add(1)
go func(container *kubecontainer.Container) {
defer util.HandleCrash()
defer wg.Done()
var containerSpec *api.Container
if pod != nil {
for i, c := range pod.Spec.Containers {
if c.Name == container.Name {
containerSpec = &pod.Spec.Containers[i]
break
}
}
}
// TODO: Handle this without signaling the pod infra container to
// adapt to the generic container runtime.
if container.Name == PodInfraContainerName {
// Store the container runtime for later deletion.
// We do this so that PreStop handlers can run in the network namespace.
networkID = container.ID
networkContainer = container
networkSpec = containerSpec
return
}
if err := dm.killContainer(container.ID); err != nil {
glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, pod.ID)
err := dm.killContainer(container.ID, containerSpec, pod)
if err != nil {
glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, runningPod.ID)
errs <- err
}
}(container)
}
wg.Wait()
if len(networkID) > 0 {
if err := dm.networkPlugin.TearDownPod(pod.Namespace, pod.Name, kubeletTypes.DockerID(networkID)); err != nil {
if networkContainer != nil {
if err := dm.networkPlugin.TearDownPod(runningPod.Namespace, runningPod.Name, kubeletTypes.DockerID(networkContainer.ID)); err != nil {
glog.Errorf("Failed tearing down the infra container: %v", err)
errs <- err
}
if err := dm.killContainer(networkID); err != nil {
glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, pod.ID)
if err := dm.killContainer(networkContainer.ID, networkSpec, pod); err != nil {
glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, runningPod.ID)
errs <- err
}
}
@@ -1152,75 +1184,128 @@ func (dm *DockerManager) KillPod(pod kubecontainer.Pod) error {
return nil
}
// KillContainerInPod kills a container in the pod.
func (dm *DockerManager) KillContainerInPod(container api.Container, pod *api.Pod) error {
// Locate the container.
pods, err := dm.GetPods(false)
if err != nil {
return err
}
targetPod := kubecontainer.Pods(pods).FindPod(kubecontainer.GetPodFullName(pod), pod.UID)
targetContainer := targetPod.FindContainerByName(container.Name)
if targetContainer == nil {
return fmt.Errorf("unable to find container %q in pod %q", container.Name, targetPod.Name)
}
return dm.killContainer(targetContainer.ID)
}
// TODO(vmarmol): Unexport this as it is no longer used externally.
// KillContainer kills a container identified by containerID.
// Internally, it invokes docker's StopContainer API with a timeout of 10s.
// TODO: Deprecate this function in favor of KillContainerInPod.
func (dm *DockerManager) KillContainer(containerID types.UID) error {
return dm.killContainer(containerID)
}
func (dm *DockerManager) killContainer(containerID types.UID) error {
ID := string(containerID)
glog.V(2).Infof("Killing container with id %q", ID)
inspect, err := dm.client.InspectContainer(ID)
if err != nil {
return err
}
var found bool
var preStop string
if inspect != nil && inspect.Config != nil && inspect.Config.Labels != nil {
preStop, found = inspect.Config.Labels[kubernetesPodLabel]
}
if found {
var pod api.Pod
err := latest.Codec.DecodeInto([]byte(preStop), &pod)
// KillContainerInPod kills a container in the pod. It must be passed either a container ID or a container and pod,
// and will attempt to lookup the other information if missing.
func (dm *DockerManager) KillContainerInPod(containerID types.UID, container *api.Container, pod *api.Pod) error {
switch {
case len(containerID) == 0:
// Locate the container.
pods, err := dm.GetPods(false)
if err != nil {
glog.Errorf("Failed to decode prestop: %s, %s", preStop, ID)
} else {
name := inspect.Config.Labels[kubernetesContainerLabel]
var container *api.Container
return err
}
targetPod := kubecontainer.Pods(pods).FindPod(kubecontainer.GetPodFullName(pod), pod.UID)
targetContainer := targetPod.FindContainerByName(container.Name)
if targetContainer == nil {
return fmt.Errorf("unable to find container %q in pod %q", container.Name, targetPod.Name)
}
containerID = targetContainer.ID
case container == nil || pod == nil:
// Read information about the container from labels
inspect, err := dm.client.InspectContainer(string(containerID))
if err != nil {
return err
}
storedPod, storedContainer, cerr := containerAndPodFromLabels(inspect)
if cerr != nil {
glog.Errorf("unable to access pod data from container: %v", err)
}
if container == nil {
container = storedContainer
}
if pod == nil {
pod = storedPod
}
}
return dm.killContainer(containerID, container, pod)
}
// killContainer accepts a containerID and an optional container or pod containing shutdown policies. Invoke
// KillContainerInPod if information must be retrieved first.
func (dm *DockerManager) killContainer(containerID types.UID, container *api.Container, pod *api.Pod) error {
ID := string(containerID)
name := ID
if container != nil {
name = fmt.Sprintf("%s %s", name, container.Name)
}
if pod != nil {
name = fmt.Sprintf("%s %s/%s", name, pod.Namespace, pod.Name)
}
gracePeriod := int64(minimumGracePeriodInSeconds)
if pod != nil && pod.DeletionGracePeriodSeconds != nil {
gracePeriod = *pod.DeletionGracePeriodSeconds
}
glog.V(2).Infof("Killing container %q with %d second grace period", name, gracePeriod)
if pod != nil && container != nil && container.Lifecycle != nil && container.Lifecycle.PreStop != nil {
glog.V(4).Infof("Running preStop hook for container %q", name)
start := util.Now()
// TODO: timebox PreStop execution to at most gracePeriod
if err := dm.runner.Run(ID, pod, container, container.Lifecycle.PreStop); err != nil {
glog.Errorf("preStop hook for container %q failed: %v", name, err)
}
gracePeriod -= int64(util.Now().Sub(start.Time).Seconds())
}
dm.readinessManager.RemoveReadiness(ID)
// always give containers a minimal shutdown window to avoid unnecessary SIGKILLs
if gracePeriod < minimumGracePeriodInSeconds {
gracePeriod = minimumGracePeriodInSeconds
}
err := dm.client.StopContainer(ID, uint(gracePeriod))
ref, ok := dm.containerRefManager.GetRef(ID)
if !ok {
glog.Warningf("No ref for pod '%q'", name)
} else {
// TODO: pass reason down here, and state, or move this call up the stack.
dm.recorder.Eventf(ref, "Killing", "Killing with docker id %v", util.ShortenString(ID, 12))
}
return err
}
var errNoPodOnContainer = fmt.Errorf("no pod information labels on Docker container")
// containerAndPodFromLabels tries to load the appropriate container info off of a Docker container's labels
func containerAndPodFromLabels(inspect *docker.Container) (pod *api.Pod, container *api.Container, err error) {
if inspect == nil && inspect.Config == nil && inspect.Config.Labels == nil {
return nil, nil, errNoPodOnContainer
}
labels := inspect.Config.Labels
// the pod data may not be set
if body, found := labels[kubernetesPodLabel]; found {
pod = &api.Pod{}
if err = latest.Codec.DecodeInto([]byte(body), pod); err == nil {
name := labels[kubernetesContainerLabel]
for ix := range pod.Spec.Containers {
if pod.Spec.Containers[ix].Name == name {
container = &pod.Spec.Containers[ix]
break
}
}
if container != nil {
glog.V(1).Infof("Running preStop hook")
if err := dm.runner.Run(ID, &pod, container, container.Lifecycle.PreStop); err != nil {
glog.Errorf("failed to run preStop hook: %v", err)
}
} else {
glog.Errorf("unable to find container %v, %s", pod, name)
if container == nil {
err = fmt.Errorf("unable to find container %s in pod %v", name, pod)
}
} else {
pod = nil
}
}
// attempt to find the default grace period if we didn't commit a pod, but set the generic metadata
// field (the one used by kill)
if pod == nil {
if period, ok := labels[kubernetesTerminationGracePeriodLabel]; ok {
if seconds, err := strconv.ParseInt(period, 10, 64); err == nil {
pod = &api.Pod{}
pod.DeletionGracePeriodSeconds = &seconds
}
}
}
dm.readinessManager.RemoveReadiness(ID)
err = dm.client.StopContainer(ID, 10)
ref, ok := dm.containerRefManager.GetRef(ID)
if !ok {
glog.Warningf("No ref for pod '%v'", ID)
} else {
// TODO: pass reason down here, and state, or move this call up the stack.
dm.recorder.Eventf(ref, "Killing", "Killing with docker id %v", util.ShortenString(ID, 12))
}
return err
return
}
// Run a single container from a pod. Returns the docker container ID
@@ -1253,7 +1338,7 @@ func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Containe
if container.Lifecycle != nil && container.Lifecycle.PostStart != nil {
handlerErr := dm.runner.Run(id, pod, container, container.Lifecycle.PostStart)
if handlerErr != nil {
dm.killContainer(types.UID(id))
dm.killContainer(types.UID(id), container, pod)
return kubeletTypes.DockerID(""), fmt.Errorf("failed to call event handler: %v", handlerErr)
}
}
@@ -1413,6 +1498,11 @@ func (dm *DockerManager) computePodContainerChanges(pod *api.Pod, runningPod kub
containersToKeep := make(map[kubeletTypes.DockerID]int)
createPodInfraContainer := false
if pod.DeletionTimestamp != nil {
glog.V(4).Infof("Pod is terminating %q", podFullName)
return PodContainerChangesSpec{}, nil
}
var err error
var podInfraContainerID kubeletTypes.DockerID
var changed bool
@@ -1547,7 +1637,7 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod
}
// Killing phase: if we want to start new infra container, or nothing is running kill everything (including infra container)
err = dm.KillPod(runningPod)
err = dm.KillPod(pod, runningPod)
if err != nil {
return err
}
@@ -1557,7 +1647,15 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod
_, keep := containerChanges.ContainersToKeep[kubeletTypes.DockerID(container.ID)]
if !keep {
glog.V(3).Infof("Killing unwanted container %+v", container)
err = dm.KillContainer(container.ID)
// attempt to find the appropriate container policy
var podContainer *api.Container
for i, c := range pod.Spec.Containers {
if c.Name == container.Name {
podContainer = &pod.Spec.Containers[i]
break
}
}
err = dm.KillContainerInPod(container.ID, podContainer, pod)
if err != nil {
glog.Errorf("Error killing container: %v", err)
}

View File

@@ -405,7 +405,7 @@ func TestKillContainerInPod(t *testing.T) {
manager.readinessManager.SetReadiness(c.ID, true)
}
if err := manager.KillContainerInPod(pod.Spec.Containers[0], pod); err != nil {
if err := manager.KillContainerInPod("", &pod.Spec.Containers[0], pod); err != nil {
t.Errorf("unexpected error: %v", err)
}
// Assert the container has been stopped.
@@ -478,14 +478,14 @@ func TestKillContainerInPodWithPreStop(t *testing.T) {
manager.readinessManager.SetReadiness(c.ID, true)
}
if err := manager.KillContainerInPod(pod.Spec.Containers[0], pod); err != nil {
if err := manager.KillContainerInPod("", &pod.Spec.Containers[0], pod); err != nil {
t.Errorf("unexpected error: %v", err)
}
// Assert the container has been stopped.
if err := fakeDocker.AssertStopped([]string{containerToKill.ID}); err != nil {
t.Errorf("container was not stopped correctly: %v", err)
}
verifyCalls(t, fakeDocker, []string{"list", "inspect_container", "create_exec", "start_exec", "stop"})
verifyCalls(t, fakeDocker, []string{"list", "create_exec", "start_exec", "stop"})
if !reflect.DeepEqual(expectedCmd, fakeDocker.execCmd) {
t.Errorf("expected: %v, got %v", expectedCmd, fakeDocker.execCmd)
}
@@ -522,7 +522,7 @@ func TestKillContainerInPodWithError(t *testing.T) {
manager.readinessManager.SetReadiness(c.ID, true)
}
if err := manager.KillContainerInPod(pod.Spec.Containers[0], pod); err == nil {
if err := manager.KillContainerInPod("", &pod.Spec.Containers[0], pod); err == nil {
t.Errorf("expected error, found nil")
}
@@ -1021,7 +1021,7 @@ func TestSyncPodDeletesWithNoPodInfraContainer(t *testing.T) {
verifyCalls(t, fakeDocker, []string{
// Kill the container since pod infra container is not running.
"inspect_container", "stop",
"stop",
// Create pod infra container.
"create", "start", "inspect_container",
// Create container.
@@ -1096,7 +1096,7 @@ func TestSyncPodDeletesDuplicate(t *testing.T) {
// Check the pod infra container.
"inspect_container",
// Kill the duplicated container.
"inspect_container", "stop",
"stop",
})
// Expect one of the duplicates to be killed.
if len(fakeDocker.Stopped) != 1 || (fakeDocker.Stopped[0] != "1234" && fakeDocker.Stopped[0] != "4567") {
@@ -1150,7 +1150,7 @@ func TestSyncPodBadHash(t *testing.T) {
// Check the pod infra container.
"inspect_container",
// Kill and restart the bad hash container.
"inspect_container", "stop", "create", "start", "inspect_container",
"stop", "create", "start",
})
if err := fakeDocker.AssertStopped([]string{"1234"}); err != nil {
@@ -1208,7 +1208,7 @@ func TestSyncPodsUnhealthy(t *testing.T) {
// Check the pod infra container.
"inspect_container",
// Kill the unhealthy container.
"inspect_container", "stop",
"stop",
// Restart the unhealthy container.
"create", "start", "inspect_container",
})
@@ -1443,7 +1443,7 @@ func TestSyncPodWithRestartPolicy(t *testing.T) {
// Check the pod infra container.
"inspect_container",
// Stop the last pod infra container.
"inspect_container", "stop",
"stop",
},
[]string{},
[]string{"9876"},
@@ -1910,7 +1910,7 @@ func TestSyncPodEventHandlerFails(t *testing.T) {
// Create the container.
"create", "start",
// Kill the container since event handler fails.
"inspect_container", "stop",
"stop",
})
// TODO(yifan): Check the stopped container's name.

View File

@@ -1120,8 +1120,8 @@ func parseResolvConf(reader io.Reader) (nameservers []string, searches []string,
}
// Kill all running containers in a pod (includes the pod infra container).
func (kl *Kubelet) killPod(pod kubecontainer.Pod) error {
return kl.containerRuntime.KillPod(pod)
func (kl *Kubelet) killPod(pod *api.Pod, runningPod kubecontainer.Pod) error {
return kl.containerRuntime.KillPod(pod, runningPod)
}
type empty struct{}
@@ -1179,7 +1179,7 @@ func (kl *Kubelet) syncPod(pod *api.Pod, mirrorPod *api.Pod, runningPod kubecont
// Kill pods we can't run.
err := canRunPod(pod)
if err != nil {
kl.killPod(runningPod)
kl.killPod(pod, runningPod)
return err
}
@@ -1550,7 +1550,7 @@ func (kl *Kubelet) killUnwantedPods(desiredPods map[types.UID]empty,
}()
glog.V(1).Infof("Killing unwanted pod %q", pod.Name)
// Stop the containers.
err = kl.killPod(*pod)
err = kl.killPod(nil, *pod)
if err != nil {
glog.Errorf("Failed killing the pod %q: %v", pod.Name, err)
return

View File

@@ -676,11 +676,11 @@ func (r *runtime) GetPods(all bool) ([]*kubecontainer.Pod, error) {
}
// KillPod invokes 'systemctl kill' to kill the unit that runs the pod.
func (r *runtime) KillPod(pod kubecontainer.Pod) error {
glog.V(4).Infof("Rkt is killing pod: name %q.", pod.Name)
func (r *runtime) KillPod(pod *api.Pod, runningPod kubecontainer.Pod) error {
glog.V(4).Infof("Rkt is killing pod: name %q.", runningPod.Name)
// TODO(yifan): More graceful stop. Replace with StopUnit and wait for a timeout.
r.systemd.KillUnit(makePodServiceFileName(pod.ID), int32(syscall.SIGKILL))
r.systemd.KillUnit(makePodServiceFileName(runningPod.ID), int32(syscall.SIGKILL))
return r.systemd.Reload()
}
@@ -887,7 +887,7 @@ func (r *runtime) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, podStatus
if restartPod {
// TODO(yifan): Handle network plugin.
if err := r.KillPod(runningPod); err != nil {
if err := r.KillPod(pod, runningPod); err != nil {
return err
}
if err := r.RunPod(pod); err != nil {

View File

@@ -24,6 +24,7 @@ import (
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/errors"
"k8s.io/kubernetes/pkg/client"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
kubeletTypes "k8s.io/kubernetes/pkg/kubelet/types"
@@ -161,13 +162,24 @@ func (s *statusManager) syncBatch() error {
}
// TODO: make me easier to express from client code
statusPod, err = s.kubeClient.Pods(statusPod.Namespace).Get(statusPod.Name)
if errors.IsNotFound(err) {
glog.V(3).Infof("Pod %q was deleted on the server", pod.Name)
return nil
}
if err == nil {
statusPod.Status = status
_, err = s.kubeClient.Pods(pod.Namespace).UpdateStatus(statusPod)
// TODO: handle conflict as a retry, make that easier too.
statusPod, err = s.kubeClient.Pods(pod.Namespace).UpdateStatus(statusPod)
if err == nil {
glog.V(3).Infof("Status for pod %q updated successfully", kubeletUtil.FormatPodName(pod))
return nil
if statusPod.DeletionTimestamp == nil || !allTerminated(statusPod.Status.ContainerStatuses) {
return nil
}
if err := s.kubeClient.Pods(statusPod.Namespace).Delete(statusPod.Name, api.NewDeleteOptions(0)); err == nil {
glog.V(3).Infof("Pod %q fully terminated and removed from etcd", statusPod.Name)
return nil
}
}
}
@@ -181,3 +193,14 @@ func (s *statusManager) syncBatch() error {
go s.DeletePodStatus(podFullName)
return fmt.Errorf("error updating status for pod %q: %v", kubeletUtil.FormatPodName(pod), err)
}
// allTerminated returns true if every status is terminated, or the status list
// is empty.
func allTerminated(statuses []api.ContainerStatus) bool {
for _, status := range statuses {
if status.State.Terminated == nil {
return false
}
}
return true
}

View File

@@ -695,7 +695,7 @@ func TestDelete(t *testing.T) {
// If the controller is still around after trying to delete either the delete
// failed, or we're deleting it gracefully.
if fakeClient.Data[key].R.Node != nil {
return true
return fakeClient.Data[key].R.Node.TTL != 0
}
return false
}

View File

@@ -37,6 +37,7 @@ var testTTL uint64 = 60
func NewTestEventStorage(t *testing.T) (*tools.FakeEtcdClient, *REST) {
f := tools.NewFakeEtcdClient(t)
f.HideExpires = true
f.TestIndex = true
s := etcdstorage.NewEtcdStorage(f, testapi.Codec(), etcdtest.PathPrefix())

View File

@@ -300,7 +300,7 @@ func TestEtcdUpdateStatus(t *testing.T) {
key, _ := storage.KeyFunc(ctx, "foo")
key = etcdtest.AddPrefix(key)
pvStart := validNewPersistentVolume("foo")
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvStart), 1)
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvStart), 0)
pvIn := &api.PersistentVolume{
ObjectMeta: api.ObjectMeta{

View File

@@ -298,7 +298,7 @@ func TestEtcdUpdateStatus(t *testing.T) {
key, _ := storage.KeyFunc(ctx, "foo")
key = etcdtest.AddPrefix(key)
pvcStart := validNewPersistentVolumeClaim("foo", api.NamespaceDefault)
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvcStart), 1)
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvcStart), 0)
pvc := &api.PersistentVolumeClaim{
ObjectMeta: api.ObjectMeta{

View File

@@ -56,6 +56,7 @@ func newStorage(t *testing.T) (*REST, *BindingREST, *StatusREST, *tools.FakeEtcd
}
func validNewPod() *api.Pod {
grace := int64(30)
return &api.Pod{
ObjectMeta: api.ObjectMeta{
Name: "foo",
@@ -64,6 +65,8 @@ func validNewPod() *api.Pod {
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
Containers: []api.Container{
{
Name: "foo",
@@ -134,9 +137,9 @@ func TestDelete(t *testing.T) {
if fakeEtcdClient.Data[key].R.Node == nil {
return false
}
return fakeEtcdClient.Data[key].R.Node.TTL == 30
return fakeEtcdClient.Data[key].R.Node.TTL != 0
}
test.TestDelete(createFn, gracefulSetFn)
test.TestDeleteGraceful(createFn, 30, gracefulSetFn)
}
func expectPod(t *testing.T, out runtime.Object) (*api.Pod, bool) {
@@ -1027,6 +1030,7 @@ func TestEtcdUpdateScheduled(t *testing.T) {
},
}), 1)
grace := int64(30)
podIn := api.Pod{
ObjectMeta: api.ObjectMeta{
Name: "foo",
@@ -1048,6 +1052,8 @@ func TestEtcdUpdateScheduled(t *testing.T) {
},
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
}
_, _, err := registry.Update(ctx, &podIn)
@@ -1088,7 +1094,7 @@ func TestEtcdUpdateStatus(t *testing.T) {
},
},
}
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, &podStart), 1)
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, &podStart), 0)
podIn := api.Pod{
ObjectMeta: api.ObjectMeta{
@@ -1117,6 +1123,8 @@ func TestEtcdUpdateStatus(t *testing.T) {
expected := podStart
expected.ResourceVersion = "2"
grace := int64(30)
expected.Spec.TerminationGracePeriodSeconds = &grace
expected.Spec.RestartPolicy = api.RestartPolicyAlways
expected.Spec.DNSPolicy = api.DNSClusterFirst
expected.Spec.Containers[0].ImagePullPolicy = api.PullIfNotPresent

View File

@@ -81,13 +81,31 @@ func (podStrategy) ValidateUpdate(ctx api.Context, obj, old runtime.Object) fiel
return append(errorList, validation.ValidatePodUpdate(obj.(*api.Pod), old.(*api.Pod))...)
}
// AllowUnconditionalUpdate allows pods to be overwritten
func (podStrategy) AllowUnconditionalUpdate() bool {
return true
}
// CheckGracefulDelete allows a pod to be gracefully deleted.
// CheckGracefulDelete allows a pod to be gracefully deleted. It updates the DeleteOptions to
// reflect the desired grace value.
func (podStrategy) CheckGracefulDelete(obj runtime.Object, options *api.DeleteOptions) bool {
return false
if options == nil {
return false
}
period := int64(0)
// user has specified a value
if options.GracePeriodSeconds != nil {
period = *options.GracePeriodSeconds
} else {
// use the default value if set, or deletes the pod immediately (0)
pod := obj.(*api.Pod)
if pod.Spec.TerminationGracePeriodSeconds != nil {
period = *pod.Spec.TerminationGracePeriodSeconds
}
}
// ensure the options and the pod are in sync
options.GracePeriodSeconds = &period
return true
}
type podStatusStrategy struct {
@@ -100,6 +118,7 @@ func (podStatusStrategy) PrepareForUpdate(obj, old runtime.Object) {
newPod := obj.(*api.Pod)
oldPod := old.(*api.Pod)
newPod.Spec = oldPod.Spec
newPod.DeletionTimestamp = nil
}
func (podStatusStrategy) ValidateUpdate(ctx api.Context, obj, old runtime.Object) fielderrors.ValidationErrorList {

View File

@@ -389,7 +389,7 @@ func TestEtcdUpdateStatus(t *testing.T) {
key, _ := registry.KeyFunc(ctx, "foo")
key = etcdtest.AddPrefix(key)
resourcequotaStart := validNewResourceQuota()
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, resourcequotaStart), 1)
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, resourcequotaStart), 0)
resourcequotaIn := &api.ResourceQuota{
ObjectMeta: api.ObjectMeta{

View File

@@ -394,14 +394,21 @@ func (h *etcdHelper) GuaranteedUpdate(key string, ptrToType runtime.Object, igno
ttl := uint64(0)
if node != nil {
index = node.ModifiedIndex
if node.TTL > 0 {
if node.TTL != 0 {
ttl = uint64(node.TTL)
}
if node.Expiration != nil && ttl == 0 {
ttl = 1
}
} else if res != nil {
index = res.EtcdIndex
}
if newTTL != nil {
if ttl != 0 && *newTTL == 0 {
// TODO: remove this after we have verified this is no longer an issue
glog.V(4).Infof("GuaranteedUpdate is clearing TTL for %q, may not be intentional", key)
}
ttl = *newTTL
}

View File

@@ -123,28 +123,32 @@ func TestList(t *testing.T) {
},
},
}
grace := int64(30)
expect := api.PodList{
ListMeta: api.ListMeta{ResourceVersion: "10"},
Items: []api.Pod{
{
ObjectMeta: api.ObjectMeta{Name: "bar", ResourceVersion: "2"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "baz", ResourceVersion: "3"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "1"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},
@@ -206,6 +210,7 @@ func TestListAcrossDirectories(t *testing.T) {
},
},
}
grace := int64(30)
expect := api.PodList{
ListMeta: api.ListMeta{ResourceVersion: "10"},
Items: []api.Pod{
@@ -213,22 +218,25 @@ func TestListAcrossDirectories(t *testing.T) {
{
ObjectMeta: api.ObjectMeta{Name: "baz", ResourceVersion: "3"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "1"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "bar", ResourceVersion: "2"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},
@@ -278,28 +286,32 @@ func TestListExcludesDirectories(t *testing.T) {
},
},
}
grace := int64(30)
expect := api.PodList{
ListMeta: api.ListMeta{ResourceVersion: "10"},
Items: []api.Pod{
{
ObjectMeta: api.ObjectMeta{Name: "bar", ResourceVersion: "2"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "baz", ResourceVersion: "3"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "1"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},
@@ -319,11 +331,13 @@ func TestGet(t *testing.T) {
fakeClient := tools.NewFakeEtcdClient(t)
helper := newEtcdHelper(fakeClient, testapi.Codec(), etcdtest.PathPrefix())
key := etcdtest.AddPrefix("/some/key")
grace := int64(30)
expect := api.Pod{
ObjectMeta: api.ObjectMeta{Name: "foo"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
}
fakeClient.Set(key, runtime.EncodeOrDie(testapi.Codec(), &expect), 0)

View File

@@ -38,6 +38,7 @@ const (
EtcdSet = "set"
EtcdCAS = "compareAndSwap"
EtcdDelete = "delete"
EtcdExpire = "expire"
)
// TransformFunc attempts to convert an object to another object for use with a watcher.
@@ -353,7 +354,7 @@ func (w *etcdWatcher) sendResult(res *etcd.Response) {
w.sendAdd(res)
case EtcdSet, EtcdCAS:
w.sendModify(res)
case EtcdDelete:
case EtcdDelete, EtcdExpire:
w.sendDelete(res)
default:
glog.Errorf("unknown action: %v", res.Action)

View File

@@ -20,6 +20,7 @@ import (
"errors"
"sort"
"sync"
"time"
"github.com/coreos/go-etcd/etcd"
)
@@ -52,6 +53,8 @@ type FakeEtcdClient struct {
TestIndex bool
ChangeIndex uint64
LastSetTTL uint64
// Will avoid setting the expires header on objects to make comparison easier
HideExpires bool
Machines []string
// Will become valid after Watch is called; tester may write to it. Tester may
@@ -175,6 +178,11 @@ func (f *FakeEtcdClient) setLocked(key, value string, ttl uint64) (*etcd.Respons
prevResult := f.Data[key]
createdIndex := prevResult.R.Node.CreatedIndex
f.t.Logf("updating %v, index %v -> %v (ttl: %d)", key, createdIndex, i, ttl)
var expires *time.Time
if !f.HideExpires && ttl > 0 {
now := time.Now()
expires = &now
}
result := EtcdResponseWithError{
R: &etcd.Response{
Node: &etcd.Node{
@@ -182,6 +190,7 @@ func (f *FakeEtcdClient) setLocked(key, value string, ttl uint64) (*etcd.Respons
CreatedIndex: createdIndex,
ModifiedIndex: i,
TTL: int64(ttl),
Expiration: expires,
},
},
}