integration: simplify CNI-fp and add README.md

* Use delegated plugin call to simplify cni-bridge-cni
* Add README.md for cni-bridge-cni

Signed-off-by: Wei Fu <fuweid89@gmail.com>
This commit is contained in:
Wei Fu
2022-07-21 23:37:39 +08:00
parent cbebeb9440
commit e6a2c07902
9 changed files with 659 additions and 218 deletions

View File

@@ -0,0 +1,159 @@
## cni-bridge-f(ail)p(oint)
### Overview
The `cni-bridge-fp` is a CNI plugin which delegates interface-creating function
to [CNI bridge plugin][1] and allows user to inject failpoint before delegation.
Since the CNI plugin is invoked by binary call from CRI and it is short-lived,
the failpoint need to be configured by a JSON file, which can be persisted.
There is an example about failpoint description.
```json
{
"cmdAdd": "1*error(you-shall-not-pass!)->1*panic(again)",
"cmdDel": "1*error(try-again)",
"cmdCheck": "10*off"
}
```
* `cmdAdd` (string, optional): The failpoint for `ADD` command.
* `cmdDel` (string, optional): The failpoint for `DEL` command.
* `cmdCheck` (string, optional): The failpoint for `CHECK` command.
Since the `cmdXXX` can be multiple failpoints, each CNI binary call will update
the current state to make sure the order of execution is expected.
And the failpoint injection is enabled by pod's annotation. Currently, the key
of customized CNI capabilities in containerd can only be `io.kubernetes.cri.pod-annotations`
and containerd will pass pod's annotations to CNI under the that object. The
user can use the `failpoint.cni.containerd.io/confpath` annotation to enable
failpoint for the pod.
```yaml
apiVersion: v1
kind: Pod
metadata:
name: nginx
annotations:
failpoint.cni.containerd.io/confpath: "/tmp/pod-failpoints.json"
spec:
containers:
- name: nginx
image: nginx:1.14.2
ports:
- containerPort: 80
```
### Example
Let's use the following json as failpoint description.
```bash
$ cat <<EOF | tee /tmp/cni-failpoint.json
{
"cmdAdd": "1*error(try-again)",
"cmdDel": "2*error(oops)",
"cmdCheck": "1*off->1*panic(sorry)"
}
EOF
```
And use `ip netns` to create persisted net namespace named by `failpoint`.
```bash
$ sudo ip netns add failpoint
```
And then setup the following bash script for demo.
```bash
$ cat <<EOFDEMO | tee /tmp/cni-failpoint-demo-helper.sh
#!/usr/bin/env bash
export CNI_CONTAINERID=failpoint-testing
export CNI_NETNS=/run/netns/failpoint
export CNI_IFNAME=fpeni0
export CNI_PATH=/opt/cni/bin/
cat <<EOF | /opt/cni/bin/cni-bridge-fp
{
"cniVersion": "0.3.0",
"name": "containerd-net-fp",
"type": "cni-bridge-fp",
"bridge": "fp-cni0",
"isGateway": true,
"ipMasq": true,
"promiscMode": true,
"ipam": {
"type": "host-local",
"ranges": [
[{
"subnet": "10.88.0.0/16"
}],
[{
"subnet": "2001:4860:4860::/64"
}]
],
"routes": [
{ "dst": "0.0.0.0/0" },
{ "dst": "::/0" }
]
},
"runtimeConfig": {
"io.kubernetes.cri.pod-annotations": {
"failpoint.cni.containerd.io/confpath": "/tmp/cni-failpoint.json"
}
}
}
EOF
EOFDEMO
```
Let's try to setup CNI and we should get a error `try-again`.
```bash
$ sudo CNI_COMMAND=ADD bash /tmp/cni-failpoint-demo-helper.sh
{
"code": 999,
"msg": "try-again"
}
# there is no failpoint for ADD command.
$ cat /tmp/cni-failpoint.json | jq .
{
"cmdAdd": "0*error(try-again)",
"cmdDel": "2*error(oops)",
"cmdCheck": "1*off->1*panic(sorry)"
}
```
We should setup CNI successfully after retry. When we teardown the interface,
there should be two failpoints.
```bash
$ sudo CNI_COMMAND=ADD bash /tmp/cni-failpoint-demo-helper.sh
...
$ sudo CNI_COMMAND=DEL bash /tmp/cni-failpoint-demo-helper.sh
{
"code": 999,
"msg": "oops"
}
$ sudo CNI_COMMAND=DEL bash /tmp/cni-failpoint-demo-helper.sh
{
"code": 999,
"msg": "oops"
}
$ cat /tmp/cni-failpoint.json | jq .
{
"cmdAdd": "0*error(try-again)",
"cmdDel": "0*error(oops)",
"cmdCheck": "1*off->1*panic(sorry)"
}
```
[1]: <https://www.cni.dev/plugins/current/main/bridge/>

View File

@@ -1,209 +0,0 @@
//go:build linux
// +build linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strings"
"syscall"
"github.com/containerd/containerd/pkg/failpoint"
"github.com/containerd/continuity"
"github.com/sirupsen/logrus"
)
type inheritedPodAnnotations struct {
// CNIFailpointControlStateDir is used to specify the location of
// failpoint control setting. In that such stateDir, the failpoint
// setting is stored in the json file named by
// `${K8S_POD_NAMESPACE}-${K8S_POD_NAME}.json`. The detail of json file
// is described by FailpointConf.
CNIFailpointControlStateDir string `json:"cniFailpointControlStateDir,omitempty"`
}
// FailpointConf is used to describe cmdAdd/cmdDel/cmdCheck command's failpoint.
type FailpointConf struct {
Add string `json:"cmdAdd"`
Del string `json:"cmdDel"`
Check string `json:"cmdCheck"`
}
type netConf struct {
RuntimeConfig struct {
PodAnnotations inheritedPodAnnotations `json:"io.kubernetes.cri.pod-annotations"`
} `json:"runtimeConfig,omitempty"`
}
func main() {
stdinData, err := ioutil.ReadAll(os.Stdin)
if err != nil {
logrus.Fatalf("failed to read stdin: %v", err)
}
var conf netConf
if err := json.Unmarshal(stdinData, &conf); err != nil {
logrus.Fatalf("failed to parse network configuration: %v", err)
}
cniCmd, ok := os.LookupEnv("CNI_COMMAND")
if !ok {
logrus.Fatal("required env CNI_COMMAND")
}
cniPath, ok := os.LookupEnv("CNI_PATH")
if !ok {
logrus.Fatal("required env CNI_PATH")
}
evalFn, err := buildFailpointEval(conf.RuntimeConfig.PodAnnotations.CNIFailpointControlStateDir, cniCmd)
if err != nil {
logrus.Fatalf("failed to build failpoint evaluate function: %v", err)
}
if err := evalFn(); err != nil {
logrus.Fatalf("failpoint: %v", err)
}
cmd := exec.Command(filepath.Join(cniPath, "bridge"))
cmd.Stdin = bytes.NewReader(stdinData)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Start(); err != nil {
logrus.Fatalf("failed to start bridge cni plugin: %v", err)
}
if err := cmd.Wait(); err != nil {
logrus.Fatalf("failed to wait for bridge cni plugin: %v", err)
}
}
// buildFailpointEval will read and update the failpoint setting and then
// return delegated failpoint evaluate function
func buildFailpointEval(stateDir string, cniCmd string) (failpoint.EvalFn, error) {
cniArgs, ok := os.LookupEnv("CNI_ARGS")
if !ok {
return nopEvalFn, nil
}
target := buildPodFailpointFilepath(stateDir, cniArgs)
if target == "" {
return nopEvalFn, nil
}
f, err := os.OpenFile(target, os.O_RDWR, 0666)
if err != nil {
if os.IsNotExist(err) {
return nopEvalFn, nil
}
return nil, fmt.Errorf("failed to open file %s: %w", target, err)
}
defer f.Close()
if err := syscall.Flock(int(f.Fd()), syscall.LOCK_EX); err != nil {
return nil, fmt.Errorf("failed to lock failpoint setting %s: %w", target, err)
}
defer syscall.Flock(int(f.Fd()), syscall.LOCK_UN)
data, err := ioutil.ReadAll(f)
if err != nil {
return nil, fmt.Errorf("failed to read failpoint setting %s: %w", target, err)
}
var conf FailpointConf
if err := json.Unmarshal(data, &conf); err != nil {
return nil, fmt.Errorf("failed to unmarshal failpoint conf %s: %w", string(data), err)
}
var fpStr *string
switch cniCmd {
case "ADD":
fpStr = &conf.Add
case "DEL":
fpStr = &conf.Del
case "CHECK":
fpStr = &conf.Check
}
if fpStr == nil || *fpStr == "" {
return nopEvalFn, nil
}
fp, err := failpoint.NewFailpoint(cniCmd, *fpStr)
if err != nil {
return nil, fmt.Errorf("failed to parse failpoint %s: %w", *fpStr, err)
}
evalFn := fp.DelegatedEval()
*fpStr = fp.Marshal()
data, err = json.Marshal(conf)
if err != nil {
return nil, fmt.Errorf("failed to marshal failpoint conf: %w", err)
}
return evalFn, continuity.AtomicWriteFile(target, data, 0666)
}
// buildPodFailpointFilepath returns the expected failpoint setting filepath
// by Pod metadata.
func buildPodFailpointFilepath(stateDir, cniArgs string) string {
args := cniArgsIntoKeyValue(cniArgs)
res := make([]string, 0, 2)
for _, key := range []string{"K8S_POD_NAMESPACE", "K8S_POD_NAME"} {
v, ok := args[key]
if !ok {
break
}
res = append(res, v)
}
if len(res) != 2 {
return ""
}
return filepath.Join(stateDir, strings.Join(res, "-")+".json")
}
// cniArgsIntoKeyValue converts the CNI ARGS from `key1=value1;key2=value2...`
// into key/value hashmap.
func cniArgsIntoKeyValue(envStr string) map[string]string {
parts := strings.Split(envStr, ";")
res := make(map[string]string, len(parts))
for _, part := range parts {
keyValue := strings.SplitN(part, "=", 2)
if len(keyValue) != 2 {
continue
}
res[keyValue[0]] = keyValue[1]
}
return res
}
func nopEvalFn() error {
return nil
}

View File

@@ -0,0 +1,202 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"context"
"encoding/json"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"syscall"
"github.com/containerd/containerd/pkg/failpoint"
"github.com/containerd/continuity"
"github.com/containernetworking/cni/pkg/invoke"
"github.com/containernetworking/cni/pkg/skel"
"github.com/containernetworking/cni/pkg/version"
)
const delegatedPlugin = "bridge"
type netConf struct {
RuntimeConfig struct {
PodAnnotations inheritedPodAnnotations `json:"io.kubernetes.cri.pod-annotations"`
} `json:"runtimeConfig,omitempty"`
}
type inheritedPodAnnotations struct {
// FailpointConfPath represents filepath of failpoint settings.
FailpointConfPath string `json:"failpoint.cni.containerd.io/confpath,omitempty"`
}
// failpointConf is used to describe cmdAdd/cmdDel/cmdCheck command's failpoint.
type failpointConf struct {
Add string `json:"cmdAdd,omitempty"`
Del string `json:"cmdDel,omitempty"`
Check string `json:"cmdCheck,omitempty"`
}
func main() {
skel.PluginMain(cmdAdd, cmdCheck, cmdDel, version.All, "bridge with failpoint support")
}
func cmdAdd(args *skel.CmdArgs) error {
if err := handleFailpoint(args, "ADD"); err != nil {
return err
}
result, err := invoke.DelegateAdd(context.TODO(), delegatedPlugin, args.StdinData, nil)
if err != nil {
return err
}
return result.Print()
}
func cmdCheck(args *skel.CmdArgs) error {
if err := handleFailpoint(args, "CHECK"); err != nil {
return err
}
return invoke.DelegateCheck(context.TODO(), delegatedPlugin, args.StdinData, nil)
}
func cmdDel(args *skel.CmdArgs) error {
if err := handleFailpoint(args, "DEL"); err != nil {
return err
}
return invoke.DelegateDel(context.TODO(), delegatedPlugin, args.StdinData, nil)
}
func handleFailpoint(args *skel.CmdArgs, cmdKind string) error {
var conf netConf
if err := json.Unmarshal(args.StdinData, &conf); err != nil {
return fmt.Errorf("failed to parse network configuration: %w", err)
}
confPath := conf.RuntimeConfig.PodAnnotations.FailpointConfPath
if len(confPath) == 0 {
return nil
}
control, err := newFailpointControl(confPath)
if err != nil {
return err
}
evalFn, err := control.delegatedEvalFn(cmdKind)
if err != nil {
return err
}
return evalFn()
}
type failpointControl struct {
confPath string
}
func newFailpointControl(confPath string) (*failpointControl, error) {
if !filepath.IsAbs(confPath) {
return nil, fmt.Errorf("failpoint confPath(%s) is required to be absolute", confPath)
}
return &failpointControl{
confPath: confPath,
}, nil
}
func (c *failpointControl) delegatedEvalFn(cmdKind string) (failpoint.EvalFn, error) {
var resFn failpoint.EvalFn = nopEvalFn
if err := c.updateTx(func(conf *failpointConf) error {
var fpStr *string
switch cmdKind {
case "ADD":
fpStr = &conf.Add
case "DEL":
fpStr = &conf.Del
case "CHECK":
fpStr = &conf.Check
}
if fpStr == nil || *fpStr == "" {
return nil
}
fp, err := failpoint.NewFailpoint(cmdKind, *fpStr)
if err != nil {
return fmt.Errorf("failed to parse failpoint %s: %w", *fpStr, err)
}
resFn = fp.DelegatedEval()
*fpStr = fp.Marshal()
return nil
}); err != nil {
return nil, err
}
return resFn, nil
}
func (c *failpointControl) updateTx(updateFn func(conf *failpointConf) error) error {
f, err := os.OpenFile(c.confPath, os.O_RDWR, 0666)
if err != nil {
return fmt.Errorf("failed to open confPath %s: %w", c.confPath, err)
}
defer f.Close()
if err := flock(f.Fd()); err != nil {
return fmt.Errorf("failed to lock failpoint setting %s: %w", c.confPath, err)
}
defer unflock(f.Fd())
data, err := ioutil.ReadAll(f)
if err != nil {
return fmt.Errorf("failed to read failpoint setting %s: %w", c.confPath, err)
}
var conf failpointConf
if err := json.Unmarshal(data, &conf); err != nil {
return fmt.Errorf("failed to unmarshal failpoint conf %s: %w", string(data), err)
}
if err := updateFn(&conf); err != nil {
return err
}
data, err = json.Marshal(conf)
if err != nil {
return fmt.Errorf("failed to marshal failpoint conf: %w", err)
}
return continuity.AtomicWriteFile(c.confPath, data, 0666)
}
func nopEvalFn() error {
return nil
}
func flock(fd uintptr) error {
return syscall.Flock(int(fd), syscall.LOCK_EX)
}
func unflock(fd uintptr) error {
return syscall.Flock(int(fd), syscall.LOCK_UN)
}

View File

@@ -1,6 +1,3 @@
//go:build linux
// +build linux
/*
Copyright The containerd Authors.

View File

@@ -1,6 +1,3 @@
//go:build linux
// +build linux
/*
Copyright The containerd Authors.

View File

@@ -39,7 +39,7 @@ const (
failpointShimPrefixKey = "io.containerd.runtime.v2.shim.failpoint."
failpointCNIStateDirKey = "cniFailpointControlStateDir"
failpointCNIConfPathKey = "failpoint.cni.containerd.io/confpath"
)
func TestRunPodSandboxWithSetupCNIFailure(t *testing.T) {
@@ -109,7 +109,7 @@ func injectCNIFailpoint(t *testing.T, sbConfig *criapiv1.PodSandboxConfig, conf
err = os.WriteFile(fpFilename, data, 0666)
require.NoError(t, err)
sbConfig.Annotations[failpointCNIStateDirKey] = stateDir
sbConfig.Annotations[failpointCNIConfPathKey] = fpFilename
}
func injectShimFailpoint(t *testing.T, sbConfig *criapiv1.PodSandboxConfig, methodFps map[string]string) {