kubernetes/pkg/kubelet/cm/dra/state/checkpoint.go
Moshe Levi e7256e08d3 kubelet dra: add checkpointing mechanism in the DRA Manager
The checkpointing mechanism will repopulate DRA Manager in-memory cache on kubelet restart.
This will ensure that the information needed by the PodResources API is available across
a kubelet restart.

The ClaimInfoState struct represent the DRA Manager in-memory cache state in checkpoint.
It is embedd in the ClaimInfo which also include the annotation field. The separation between
the in-memory cache and the cache state in the checkpoint is so we won't be tied to the in-memory
cache struct which may change in the future. In the ClaimInfoState we save the minimal required fields
to restore the in-memory cache.

Signed-off-by: Moshe Levi <moshele@nvidia.com>
2023-03-10 12:22:15 +02:00

69 lines
2.0 KiB
Go

/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package state
import (
"encoding/json"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
)
var _ checkpointmanager.Checkpoint = &DRAManagerCheckpoint{}
const checkpointVersion = "v1"
// DRAManagerCheckpoint struct is used to store pod dynamic resources assignments in a checkpoint
type DRAManagerCheckpoint struct {
Version string `json:"version"`
Entries ClaimInfoStateList `json:"entries,omitempty"`
Checksum checksum.Checksum `json:"checksum"`
}
// List of claim info to store in checkpoint
type ClaimInfoStateList []ClaimInfoState
// NewDRAManagerCheckpoint returns an instance of Checkpoint
func NewDRAManagerCheckpoint() *DRAManagerCheckpoint {
return &DRAManagerCheckpoint{
Version: checkpointVersion,
Entries: ClaimInfoStateList{},
}
}
// MarshalCheckpoint returns marshalled checkpoint
func (dc *DRAManagerCheckpoint) MarshalCheckpoint() ([]byte, error) {
// make sure checksum wasn't set before so it doesn't affect output checksum
dc.Checksum = 0
dc.Checksum = checksum.New(dc)
return json.Marshal(*dc)
}
// UnmarshalCheckpoint tries to unmarshal passed bytes to checkpoint
func (dc *DRAManagerCheckpoint) UnmarshalCheckpoint(blob []byte) error {
return json.Unmarshal(blob, dc)
}
// VerifyChecksum verifies that current checksum of checkpoint is valid
func (dc *DRAManagerCheckpoint) VerifyChecksum() error {
ck := dc.Checksum
dc.Checksum = 0
err := ck.Verify(dc)
dc.Checksum = ck
return err
}