Merge pull request #122047 from aojea/treeless

KEP-1880 Multiple Service CIDRs: Graduate to Beta (1/2)
This commit is contained in:
Kubernetes Prow Robot
2024-06-28 01:00:20 -07:00
committed by GitHub
28 changed files with 2176 additions and 2019 deletions

View File

@@ -14239,7 +14239,7 @@
"description": "ServiceCIDRSpec define the CIDRs the user wants to use for allocating ClusterIPs for Services.",
"properties": {
"cidrs": {
"description": "CIDRs defines the IP blocks in CIDR notation (e.g. \"192.168.0.0/24\" or \"2001:db8::/64\") from which to assign service cluster IPs. Max of two CIDRs is allowed, one of each IP family. This field is immutable.",
"description": "CIDRs defines the IP blocks in CIDR notation (e.g. \"192.168.0.0/24\" or \"2001:db8::/64\") from which to assign service cluster IPs. Max of two CIDRs is allowed, one of each IP family. The network address of each CIDR, the address that identifies the subnet of a host, is reserved and will not be allocated. The broadcast address for IPv4 CIDRs is also reserved and will not be allocated. This field is immutable.",
"items": {
"type": "string"
},

View File

@@ -225,7 +225,7 @@
"description": "ServiceCIDRSpec define the CIDRs the user wants to use for allocating ClusterIPs for Services.",
"properties": {
"cidrs": {
"description": "CIDRs defines the IP blocks in CIDR notation (e.g. \"192.168.0.0/24\" or \"2001:db8::/64\") from which to assign service cluster IPs. Max of two CIDRs is allowed, one of each IP family. This field is immutable.",
"description": "CIDRs defines the IP blocks in CIDR notation (e.g. \"192.168.0.0/24\" or \"2001:db8::/64\") from which to assign service cluster IPs. Max of two CIDRs is allowed, one of each IP family. The network address of each CIDR, the address that identifies the subnet of a host, is reserved and will not be allocated. The broadcast address for IPv4 CIDRs is also reserved and will not be allocated. This field is immutable.",
"items": {
"default": "",
"type": "string"

View File

@@ -50,7 +50,8 @@ func validateClusterIPFlags(options Extra) []error {
}
// Complete() expected to have set Primary* and Secondary
if !utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRServiceAllocator) {
if !utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRServiceAllocator) ||
!utilfeature.DefaultFeatureGate.Enabled(features.DisableAllocatorDualWrite) {
// primary CIDR validation
if err := validateMaxCIDRRange(options.PrimaryServiceClusterIPRange, maxCIDRBits, "--service-cluster-ip-range"); err != nil {
errs = append(errs, err)
@@ -72,7 +73,8 @@ func validateClusterIPFlags(options Extra) []error {
if !dualstack {
errs = append(errs, errors.New("--service-cluster-ip-range[0] and --service-cluster-ip-range[1] must be of different IP family"))
}
if !utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRServiceAllocator) {
if !utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRServiceAllocator) ||
!utilfeature.DefaultFeatureGate.Enabled(features.DisableAllocatorDualWrite) {
if err := validateMaxCIDRRange(options.SecondaryServiceClusterIPRange, maxCIDRBits, "--service-cluster-ip-range[1]"); err != nil {
errs = append(errs, err)
}

View File

@@ -60,10 +60,11 @@ func makeOptionsWithCIDRs(serviceCIDR string, secondaryServiceCIDR string) *Serv
func TestClusterServiceIPRange(t *testing.T) {
testCases := []struct {
name string
options *ServerRunOptions
expectErrors bool
gate bool
name string
options *ServerRunOptions
expectErrors bool
ipAllocatorGate bool
disableDualWriteGate bool
}{
{
name: "no service cidr",
@@ -91,22 +92,46 @@ func TestClusterServiceIPRange(t *testing.T) {
options: makeOptionsWithCIDRs("10.0.0.0/8", ""),
},
{
name: "service cidr IPv4 is too big but gate enbled",
expectErrors: false,
options: makeOptionsWithCIDRs("10.0.0.0/8", ""),
gate: true,
name: "service cidr IPv4 is too big but gate enbled",
expectErrors: true,
options: makeOptionsWithCIDRs("10.0.0.0/8", ""),
ipAllocatorGate: true,
disableDualWriteGate: false,
},
{
name: "service cidr IPv6 is too big but gate enbled",
expectErrors: false,
options: makeOptionsWithCIDRs("2001:db8::/64", ""),
gate: true,
name: "service cidr IPv6 is too big but only ipallocator gate enabled",
expectErrors: true,
options: makeOptionsWithCIDRs("2001:db8::/64", ""),
ipAllocatorGate: true,
disableDualWriteGate: false,
},
{
name: "service cidr IPv6 is too big and gate enbled",
expectErrors: false,
options: makeOptionsWithCIDRs("2001:db8::/12", ""),
gate: true,
name: "service cidr IPv6 is too big but only ipallocator gate enabled",
expectErrors: true,
options: makeOptionsWithCIDRs("2001:db8::/12", ""),
ipAllocatorGate: true,
disableDualWriteGate: false,
},
{
name: "service cidr IPv4 is too big but gate enabled",
expectErrors: false,
options: makeOptionsWithCIDRs("10.0.0.0/8", ""),
ipAllocatorGate: true,
disableDualWriteGate: true,
},
{
name: "service cidr IPv6 is too big but gate enabled",
expectErrors: false,
options: makeOptionsWithCIDRs("2001:db8::/64", ""),
ipAllocatorGate: true,
disableDualWriteGate: true,
},
{
name: "service cidr IPv6 is too big and gate enabled",
expectErrors: false,
options: makeOptionsWithCIDRs("2001:db8::/12", ""),
ipAllocatorGate: true,
disableDualWriteGate: true,
},
{
name: "dual-stack secondary cidr too big",
@@ -114,10 +139,18 @@ func TestClusterServiceIPRange(t *testing.T) {
options: makeOptionsWithCIDRs("10.0.0.0/16", "3000::/64"),
},
{
name: "dual-stack secondary cidr too big gate enabled",
expectErrors: false,
options: makeOptionsWithCIDRs("10.0.0.0/16", "3000::/48"),
gate: true,
name: "dual-stack secondary cidr too big but only ipallocator gate enabled",
expectErrors: true,
options: makeOptionsWithCIDRs("10.0.0.0/16", "3000::/48"),
ipAllocatorGate: true,
disableDualWriteGate: false,
},
{
name: "dual-stack secondary cidr too big gate enabled",
expectErrors: false,
options: makeOptionsWithCIDRs("10.0.0.0/16", "3000::/48"),
ipAllocatorGate: true,
disableDualWriteGate: true,
},
{
name: "more than two entries",
@@ -149,7 +182,8 @@ func TestClusterServiceIPRange(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.MultiCIDRServiceAllocator, tc.gate)
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.MultiCIDRServiceAllocator, tc.ipAllocatorGate)
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DisableAllocatorDualWrite, tc.disableDualWriteGate)
errs := validateClusterIPFlags(tc.options.Extra)
if len(errs) > 0 && !tc.expectErrors {

View File

@@ -0,0 +1,158 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package servicecidr
import (
"fmt"
"net"
"net/netip"
networkingv1alpha1 "k8s.io/api/networking/v1alpha1"
"k8s.io/apimachinery/pkg/labels"
networkinglisters "k8s.io/client-go/listers/networking/v1alpha1"
)
// OverlapsPrefix return the list of ServiceCIDR that overlaps with the prefix passed as argument
func OverlapsPrefix(serviceCIDRLister networkinglisters.ServiceCIDRLister, prefix netip.Prefix) []*networkingv1alpha1.ServiceCIDR {
result := []*networkingv1alpha1.ServiceCIDR{}
serviceCIDRList, err := serviceCIDRLister.List(labels.Everything())
if err != nil {
return result
}
for _, serviceCIDR := range serviceCIDRList {
for _, cidr := range serviceCIDR.Spec.CIDRs {
if p, err := netip.ParsePrefix(cidr); err == nil { // it can not fail since is already validated
if p.Overlaps(prefix) {
result = append(result, serviceCIDR)
}
}
}
}
return result
}
// ContainsPrefix return the list of ServiceCIDR that contains the prefix passed as argument
func ContainsPrefix(serviceCIDRLister networkinglisters.ServiceCIDRLister, prefix netip.Prefix) []*networkingv1alpha1.ServiceCIDR {
result := []*networkingv1alpha1.ServiceCIDR{}
serviceCIDRList, err := serviceCIDRLister.List(labels.Everything())
if err != nil {
return result
}
for _, serviceCIDR := range serviceCIDRList {
for _, cidr := range serviceCIDR.Spec.CIDRs {
if p, err := netip.ParsePrefix(cidr); err == nil { // it can not fail since is already validated
if p.Overlaps(prefix) && p.Bits() <= prefix.Bits() {
result = append(result, serviceCIDR)
}
}
}
}
return result
}
// ContainsIP return the list of ServiceCIDR that contains the IP address passed as argument
func ContainsIP(serviceCIDRLister networkinglisters.ServiceCIDRLister, ip net.IP) []*networkingv1alpha1.ServiceCIDR {
address := IPToAddr(ip)
return ContainsAddress(serviceCIDRLister, address)
}
// ContainsAddress return the list of ServiceCIDR that contains the address passed as argument
func ContainsAddress(serviceCIDRLister networkinglisters.ServiceCIDRLister, address netip.Addr) []*networkingv1alpha1.ServiceCIDR {
result := []*networkingv1alpha1.ServiceCIDR{}
serviceCIDRList, err := serviceCIDRLister.List(labels.Everything())
if err != nil {
return result
}
for _, serviceCIDR := range serviceCIDRList {
for _, cidr := range serviceCIDR.Spec.CIDRs {
if prefix, err := netip.ParsePrefix(cidr); err == nil { // it can not fail since is already validated
if PrefixContainsIP(prefix, address) {
result = append(result, serviceCIDR)
}
}
}
}
return result
}
// PrefixContainsIP returns true if the given IP is contained with the prefix,
// is not the network address and also, if IPv4, is not the broadcast address.
// This is required (rather than just `prefix.Contains(ip)`) because a ServiceCIDR
// covering prefix will not allocate those IPs, so a service with one of those IPs
// can't belong to that ServiceCIDR.
func PrefixContainsIP(prefix netip.Prefix, ip netip.Addr) bool {
// if the IP is the network address is not contained
if prefix.Masked().Addr() == ip {
return false
}
// the broadcast address is not considered contained for IPv4
if ip.Is4() {
ipLast, err := broadcastAddress(prefix)
if err != nil || ipLast == ip {
return false
}
}
return prefix.Contains(ip)
}
// broadcastAddress returns the broadcast address of the subnet
// The broadcast address is obtained by setting all the host bits
// in a subnet to 1.
// network 192.168.0.0/24 : subnet bits 24 host bits 32 - 24 = 8
// broadcast address 192.168.0.255
func broadcastAddress(subnet netip.Prefix) (netip.Addr, error) {
base := subnet.Masked().Addr()
bytes := base.AsSlice()
// get all the host bits from the subnet
n := 8*len(bytes) - subnet.Bits()
// set all the host bits to 1
for i := len(bytes) - 1; i >= 0 && n > 0; i-- {
if n >= 8 {
bytes[i] = 0xff
n -= 8
} else {
mask := ^uint8(0) >> (8 - n)
bytes[i] |= mask
break
}
}
addr, ok := netip.AddrFromSlice(bytes)
if !ok {
return netip.Addr{}, fmt.Errorf("invalid address %v", bytes)
}
return addr, nil
}
// IPToAddr converts a net.IP to a netip.Addr
// if the net.IP is not valid it returns an empty netip.Addr{}
func IPToAddr(ip net.IP) netip.Addr {
// https://pkg.go.dev/net/netip#AddrFromSlice can return an IPv4 in IPv6 format
// so we have to check the IP family to return exactly the format that we want
// address, _ := netip.AddrFromSlice(net.ParseIPSloppy(192.168.0.1)) returns
// an address like ::ffff:192.168.0.1/32
bytes := ip.To4()
if bytes == nil {
bytes = ip.To16()
}
// AddrFromSlice returns Addr{}, false if the input is invalid.
address, _ := netip.AddrFromSlice(bytes)
return address
}

View File

@@ -0,0 +1,731 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package servicecidr
import (
"net/netip"
"reflect"
"sort"
"testing"
networkingv1alpha1 "k8s.io/api/networking/v1alpha1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
networkinglisters "k8s.io/client-go/listers/networking/v1alpha1"
"k8s.io/client-go/tools/cache"
netutils "k8s.io/utils/net"
)
func newServiceCIDR(name, primary, secondary string) *networkingv1alpha1.ServiceCIDR {
serviceCIDR := &networkingv1alpha1.ServiceCIDR{
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
Spec: networkingv1alpha1.ServiceCIDRSpec{},
}
serviceCIDR.Spec.CIDRs = append(serviceCIDR.Spec.CIDRs, primary)
if secondary != "" {
serviceCIDR.Spec.CIDRs = append(serviceCIDR.Spec.CIDRs, secondary)
}
return serviceCIDR
}
func TestOverlapsPrefix(t *testing.T) {
tests := []struct {
name string
serviceCIDRs []*networkingv1alpha1.ServiceCIDR
prefix netip.Prefix
want []string
}{
{
name: "only one ServiceCIDR and IPv4 prefix contained",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
prefix: netip.MustParsePrefix("10.0.0.0/26"),
want: []string{"kubernetes"},
},
{
name: "only one ServiceCIDR and same IPv4 prefix",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
prefix: netip.MustParsePrefix("10.0.0.0/24"),
want: []string{"kubernetes"},
},
{
name: "only one ServiceCIDR and larger IPv4 prefix",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
prefix: netip.MustParsePrefix("10.0.0.0/16"),
want: []string{"kubernetes"},
},
{
name: "only one ServiceCIDR and non contained IPv4 prefix",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
prefix: netip.MustParsePrefix("192.168.0.0/24"),
want: []string{},
},
{
name: "only one ServiceCIDR and IPv6 prefix contained",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
prefix: netip.MustParsePrefix("2001:db8::/112"),
want: []string{"kubernetes"},
},
{
name: "only one ServiceCIDR and same IPv6 prefix",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
prefix: netip.MustParsePrefix("2001:db8::/96"),
want: []string{"kubernetes"},
},
{
name: "only one ServiceCIDR and IPv6 larger",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
prefix: netip.MustParsePrefix("2001:db8::/64"),
want: []string{"kubernetes"},
},
{
name: "only one ServiceCIDR and IPv6 prefix out of range",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
prefix: netip.MustParsePrefix("2001:db2::/112"),
want: []string{},
},
{
name: "two ServiceCIDR and IPv4 prefix contained",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
prefix: netip.MustParsePrefix("10.0.0.0/24"),
want: []string{"kubernetes", "secondary"},
},
{
name: "two overlapping ServiceCIDR and IPv4 prefix only contained in one",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
prefix: netip.MustParsePrefix("10.0.0.0/18"),
want: []string{"kubernetes", "secondary"},
},
{
name: "two ServiceCIDR and IPv4 larger",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
prefix: netip.MustParsePrefix("10.0.0.0/8"),
want: []string{"kubernetes", "secondary"},
},
{
name: "two ServiceCIDR and IPv4 prefix not contained",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
prefix: netip.MustParsePrefix("192.168.0.0/24"),
want: []string{},
},
{
name: "two ServiceCIDR and IPv6 prefix contained",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
prefix: netip.MustParsePrefix("2001:db8::/96"),
want: []string{"kubernetes", "secondary"},
},
{
name: "two ServiceCIDR and IPv6 prefix contained in one",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
prefix: netip.MustParsePrefix("2001:db8::/72"),
want: []string{"kubernetes", "secondary"},
},
{
name: "two ServiceCIDR and aprefix larger",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
prefix: netip.MustParsePrefix("2001:db8::/52"),
want: []string{"kubernetes", "secondary"},
},
{
name: "two ServiceCIDR and prefix out of range",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
prefix: netip.MustParsePrefix("2001:db2::/64"),
want: []string{},
},
{
name: "multiple ServiceCIDR match with overlap contained",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("kubernetes2", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
prefix: netip.MustParsePrefix("10.0.0.0/28"),
want: []string{"kubernetes", "kubernetes2", "secondary"},
},
{
name: "multiple ServiceCIDR match with overlap contains",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("kubernetes2", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
prefix: netip.MustParsePrefix("10.0.0.0/8"),
want: []string{"kubernetes", "kubernetes2", "secondary"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
indexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc})
for _, serviceCIDR := range tt.serviceCIDRs {
err := indexer.Add(serviceCIDR)
if err != nil {
t.Fatalf("unexpected error %v", err)
}
}
lister := networkinglisters.NewServiceCIDRLister(indexer)
got := []string{}
for _, serviceCIDR := range OverlapsPrefix(lister, tt.prefix) {
got = append(got, serviceCIDR.Name)
}
// sort slices to make the order predictable and avoid flakiness
sort.Strings(got)
sort.Strings(tt.want)
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("OverlapsAddress() = %v, want %v", got, tt.want)
}
})
}
}
func TestContainsPrefix(t *testing.T) {
tests := []struct {
name string
serviceCIDRs []*networkingv1alpha1.ServiceCIDR
prefix netip.Prefix
want []string
}{
{
name: "only one ServiceCIDR and IPv4 prefix contained",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
prefix: netip.MustParsePrefix("10.0.0.0/26"),
want: []string{"kubernetes"},
},
{
name: "only one ServiceCIDR and same IPv4 prefix",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
prefix: netip.MustParsePrefix("10.0.0.0/24"),
want: []string{"kubernetes"},
},
{
name: "only one ServiceCIDR and larger IPv4 prefix",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
prefix: netip.MustParsePrefix("10.0.0.0/16"),
want: []string{},
},
{
name: "only one ServiceCIDR and non containerd IPv4 prefix",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
prefix: netip.MustParsePrefix("192.168.0.0/24"),
want: []string{},
},
{
name: "only one ServiceCIDR and IPv6 prefix contained",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
prefix: netip.MustParsePrefix("2001:db8::/112"),
want: []string{"kubernetes"},
},
{
name: "only one ServiceCIDR and same IPv6 prefix",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
prefix: netip.MustParsePrefix("2001:db8::/96"),
want: []string{"kubernetes"},
},
{
name: "only one ServiceCIDR and IPv6 larger",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
prefix: netip.MustParsePrefix("2001:db8::/64"),
want: []string{},
},
{
name: "only one ServiceCIDR and IPv6 prefix out of range",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
prefix: netip.MustParsePrefix("2001:db2::/112"),
want: []string{},
},
{
name: "two ServiceCIDR and IPv4 prefix contained",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
prefix: netip.MustParsePrefix("10.0.0.0/24"),
want: []string{"kubernetes", "secondary"},
},
{
name: "two ServiceCIDR and IPv4 prefix only contained in one",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
prefix: netip.MustParsePrefix("10.0.0.0/18"),
want: []string{"secondary"},
},
{
name: "two ServiceCIDR and IPv4 larger",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
prefix: netip.MustParsePrefix("10.0.0.0/8"),
want: []string{},
},
{
name: "two ServiceCIDR and IPv4 prefix not contained",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
prefix: netip.MustParsePrefix("192.168.0.0/24"),
want: []string{},
},
{
name: "two ServiceCIDR and IPv6 prefix contained",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
prefix: netip.MustParsePrefix("2001:db8::/96"),
want: []string{"kubernetes", "secondary"},
},
{
name: "two ServiceCIDR and IPv6 prefix contained in one",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
prefix: netip.MustParsePrefix("2001:db8::/72"),
want: []string{"secondary"},
},
{
name: "two ServiceCIDR and aprefix larger",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
prefix: netip.MustParsePrefix("2001:db8::/52"),
want: []string{},
},
{
name: "two ServiceCIDR and prefix out of range",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
prefix: netip.MustParsePrefix("2001:db2::/64"),
want: []string{},
},
{
name: "multiple ServiceCIDR match with overlap",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("kubernetes2", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
prefix: netip.MustParsePrefix("10.0.0.0/28"),
want: []string{"kubernetes", "kubernetes2", "secondary"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
indexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc})
for _, serviceCIDR := range tt.serviceCIDRs {
err := indexer.Add(serviceCIDR)
if err != nil {
t.Fatalf("unexpected error %v", err)
}
}
lister := networkinglisters.NewServiceCIDRLister(indexer)
got := []string{}
for _, serviceCIDR := range ContainsPrefix(lister, tt.prefix) {
got = append(got, serviceCIDR.Name)
}
// sort slices to make the order predictable and avoid flakiness
sort.Strings(got)
sort.Strings(tt.want)
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("ContainsAddress() = %v, want %v", got, tt.want)
}
})
}
}
func TestContainsAddress(t *testing.T) {
tests := []struct {
name string
serviceCIDRs []*networkingv1alpha1.ServiceCIDR
address netip.Addr
want []string
}{
{
name: "only one ServiceCIDR and IPv4 address contained",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
address: netip.MustParseAddr("10.0.0.1"),
want: []string{"kubernetes"},
},
{
name: "only one ServiceCIDR and IPv4 address broadcast",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
address: netip.MustParseAddr("10.0.0.255"),
want: []string{},
},
{
name: "only one ServiceCIDR and IPv4 address base",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
address: netip.MustParseAddr("10.0.0.0"),
want: []string{},
},
{
name: "only one ServiceCIDR and IPv4 address out of range",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
address: netip.MustParseAddr("192.0.0.1"),
want: []string{},
},
{
name: "only one ServiceCIDR and IPv6 address contained",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
address: netip.MustParseAddr("2001:db8::2:3"),
want: []string{"kubernetes"},
},
{
name: "only one ServiceCIDR and IPv6 address broadcast",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
address: netip.MustParseAddr("2001:db8::ffff:ffff"),
want: []string{"kubernetes"},
},
{
name: "only one ServiceCIDR and IPv6 address base",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
address: netip.MustParseAddr("2001:db8::"),
want: []string{},
},
{
name: "only one ServiceCIDR and IPv6 address out of range",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
},
address: netip.MustParseAddr("2002:1:2:3::2"),
want: []string{},
},
{
name: "two ServiceCIDR and IPv4 address contained",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
address: netip.MustParseAddr("10.0.0.1"),
want: []string{"kubernetes", "secondary"},
},
{
name: "two ServiceCIDR and IPv4 address broadcast",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
address: netip.MustParseAddr("10.0.0.255"),
want: []string{"secondary"},
},
{
name: "two ServiceCIDR and IPv4 address base",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
address: netip.MustParseAddr("10.0.0.0"),
want: []string{},
},
{
name: "two ServiceCIDR and IPv4 address out of range",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
address: netip.MustParseAddr("192.0.0.1"),
want: []string{},
},
{
name: "two ServiceCIDR and IPv6 address contained",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
address: netip.MustParseAddr("2001:db8::2:3"),
want: []string{"kubernetes", "secondary"},
},
{
name: "two ServiceCIDR and address broadcast",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
address: netip.MustParseAddr("2001:db8::ffff:ffff"),
want: []string{"kubernetes", "secondary"},
},
{
name: "two ServiceCIDR and address base",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
address: netip.MustParseAddr("2001:db8::"),
want: []string{},
},
{
name: "two ServiceCIDR and address out of range",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
address: netip.MustParseAddr("2002:1:2:3::2"),
want: []string{},
},
{
name: "multiple ServiceCIDR match with overlap",
serviceCIDRs: []*networkingv1alpha1.ServiceCIDR{
newServiceCIDR("kubernetes", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("kubernetes2", "10.0.0.0/24", "2001:db8::/96"),
newServiceCIDR("secondary", "10.0.0.0/16", "2001:db8::/64"),
},
address: netip.MustParseAddr("10.0.0.2"),
want: []string{"kubernetes", "kubernetes2", "secondary"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
indexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc})
for _, serviceCIDR := range tt.serviceCIDRs {
err := indexer.Add(serviceCIDR)
if err != nil {
t.Fatalf("unexpected error %v", err)
}
}
lister := networkinglisters.NewServiceCIDRLister(indexer)
got := []string{}
for _, serviceCIDR := range ContainsAddress(lister, tt.address) {
got = append(got, serviceCIDR.Name)
}
// sort slices to make the order predictable and avoid flakiness
sort.Strings(got)
sort.Strings(tt.want)
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("ContainsAddress() = %v, want %v", got, tt.want)
}
})
}
}
func Test_PrefixContainIP(t *testing.T) {
tests := []struct {
name string
prefix netip.Prefix
ip netip.Addr
want bool
}{
{
name: "IPv4 contains",
prefix: netip.MustParsePrefix("192.168.0.0/24"),
ip: netip.MustParseAddr("192.168.0.1"),
want: true,
},
{
name: "IPv4 network address",
prefix: netip.MustParsePrefix("192.168.0.0/24"),
ip: netip.MustParseAddr("192.168.0.0"),
},
{
name: "IPv4 broadcast address",
prefix: netip.MustParsePrefix("192.168.0.0/24"),
ip: netip.MustParseAddr("192.168.0.255"),
},
{
name: "IPv4 does not contain",
prefix: netip.MustParsePrefix("192.168.0.0/24"),
ip: netip.MustParseAddr("192.168.1.2"),
},
{
name: "IPv6 contains",
prefix: netip.MustParsePrefix("2001:db2::/96"),
ip: netip.MustParseAddr("2001:db2::1"),
want: true,
},
{
name: "IPv6 network address",
prefix: netip.MustParsePrefix("2001:db2::/96"),
ip: netip.MustParseAddr("2001:db2::"),
},
{
name: "IPv6 broadcast address",
prefix: netip.MustParsePrefix("2001:db2::/96"),
ip: netip.MustParseAddr("2001:db2::ffff:ffff"),
want: true,
},
{
name: "IPv6 does not contain",
prefix: netip.MustParsePrefix("2001:db2::/96"),
ip: netip.MustParseAddr("2001:db2:1:2:3::1"),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := PrefixContainsIP(tt.prefix, tt.ip); got != tt.want {
t.Errorf("prefixContainIP() = %v, want %v", got, tt.want)
}
})
}
}
func TestIPToAddr(t *testing.T) {
tests := []struct {
name string
ip string
want netip.Addr
}{
{
name: "IPv4",
ip: "192.168.2.2",
want: netip.MustParseAddr("192.168.2.2"),
},
{
name: "IPv6",
ip: "2001:db8::2",
want: netip.MustParseAddr("2001:db8::2"),
},
{
name: "IPv4 in IPv6",
ip: "::ffff:192.168.0.1",
want: netip.MustParseAddr("192.168.0.1"),
},
{
name: "invalid",
ip: "invalid_ip",
want: netip.Addr{},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ip := netutils.ParseIPSloppy(tt.ip)
if got := IPToAddr(ip); !reflect.DeepEqual(got, tt.want) {
t.Errorf("IPToAddr() = %v, want %v", got, tt.want)
}
})
}
}
func TestBroadcastAddress(t *testing.T) {
tests := []struct {
name string
subnet netip.Prefix
want netip.Addr
wantErr bool
}{
{
name: "emty subnet",
wantErr: true,
},
{
name: "IPv4 even mask",
subnet: netip.MustParsePrefix("192.168.0.0/24"),
want: netip.MustParseAddr("192.168.0.255"),
},
{
name: "IPv4 odd mask",
subnet: netip.MustParsePrefix("192.168.0.0/23"),
want: netip.MustParseAddr("192.168.1.255"),
},
{
name: "IPv6 even mask",
subnet: netip.MustParsePrefix("fd00:1:2:3::/64"),
want: netip.MustParseAddr("fd00:1:2:3:ffff:ffff:ffff:ffff"),
},
{
name: "IPv6 odd mask",
subnet: netip.MustParsePrefix("fd00:1:2:3::/57"),
want: netip.MustParseAddr("fd00:1:2:007f:ffff:ffff:ffff:ffff"),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := broadcastAddress(tt.subnet)
if (err != nil) != tt.wantErr {
t.Errorf("BroadcastAddress() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("BroadcastAddress() = %v, want %v", got, tt.want)
}
})
}
}

View File

@@ -20,7 +20,6 @@ import (
"context"
"encoding/json"
"net/netip"
"sync"
"time"
v1 "k8s.io/api/core/v1"
@@ -43,8 +42,8 @@ import (
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/workqueue"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/api/servicecidr"
"k8s.io/kubernetes/pkg/registry/core/service/ipallocator"
"k8s.io/kubernetes/pkg/util/iptree"
netutils "k8s.io/utils/net"
)
@@ -80,7 +79,6 @@ func NewController(
workqueue.DefaultTypedControllerRateLimiter[string](),
workqueue.TypedRateLimitingQueueConfig[string]{Name: "ipaddresses"},
),
tree: iptree.New[sets.Set[string]](),
workerLoopPeriod: time.Second,
}
@@ -122,10 +120,6 @@ type Controller struct {
// workerLoopPeriod is the time between worker runs. The workers process the queue of service and ipRange changes.
workerLoopPeriod time.Duration
// tree store the ServiceCIDRs names associated to each
muTree sync.Mutex
tree *iptree.Tree[sets.Set[string]]
}
// Run will not return until stopCh is closed.
@@ -213,24 +207,17 @@ func (c *Controller) deleteIPAddress(obj interface{}) {
// this is required because adding or removing a CIDR will require to recompute the
// state of each ServiceCIDR to check if can be unblocked on deletion.
func (c *Controller) overlappingServiceCIDRs(serviceCIDR *networkingapiv1alpha1.ServiceCIDR) []string {
c.muTree.Lock()
defer c.muTree.Unlock()
serviceCIDRs := sets.New[string]()
result := sets.New[string]()
for _, cidr := range serviceCIDR.Spec.CIDRs {
if prefix, err := netip.ParsePrefix(cidr); err == nil { // if is empty err will not be nil
c.tree.WalkPath(prefix, func(k netip.Prefix, v sets.Set[string]) bool {
serviceCIDRs.Insert(v.UnsortedList()...)
return false
})
c.tree.WalkPrefix(prefix, func(k netip.Prefix, v sets.Set[string]) bool {
serviceCIDRs.Insert(v.UnsortedList()...)
return false
})
serviceCIDRs := servicecidr.OverlapsPrefix(c.serviceCIDRLister, prefix)
for _, v := range serviceCIDRs {
result.Insert(v.Name)
}
}
}
return serviceCIDRs.UnsortedList()
return result.UnsortedList()
}
// containingServiceCIDRs, given an IPAddress return the ServiceCIDRs that contains the IP,
@@ -249,16 +236,13 @@ func (c *Controller) containingServiceCIDRs(ip *networkingapiv1alpha1.IPAddress)
return []string{}
}
c.muTree.Lock()
defer c.muTree.Unlock()
serviceCIDRs := []string{}
// walk the tree to get all the ServiceCIDRs that contain this IP address
prefixes := c.tree.GetHostIPPrefixMatches(address)
for _, v := range prefixes {
serviceCIDRs = append(serviceCIDRs, v.UnsortedList()...)
result := sets.New[string]()
serviceCIDRs := servicecidr.ContainsAddress(c.serviceCIDRLister, address)
for _, v := range serviceCIDRs {
result.Insert(v.Name)
}
return serviceCIDRs
return result.UnsortedList()
}
func (c *Controller) worker(ctx context.Context) {
@@ -290,38 +274,6 @@ func (c *Controller) processNext(ctx context.Context) bool {
return true
}
// syncCIDRs rebuilds the radix tree based from the informers cache
func (c *Controller) syncCIDRs() error {
serviceCIDRList, err := c.serviceCIDRLister.List(labels.Everything())
if err != nil {
return err
}
// track the names of the different ServiceCIDRs, there
// can be multiple ServiceCIDRs sharing the same prefixes
// and this is important to determine if a ServiceCIDR can
// be deleted.
tree := iptree.New[sets.Set[string]]()
for _, serviceCIDR := range serviceCIDRList {
for _, cidr := range serviceCIDR.Spec.CIDRs {
if prefix, err := netip.ParsePrefix(cidr); err == nil { // if is empty err will not be nil
// if the prefix already exist append the new ServiceCIDR name
v, ok := tree.GetPrefix(prefix)
if !ok {
v = sets.Set[string]{}
}
v.Insert(serviceCIDR.Name)
tree.InsertPrefix(prefix, v)
}
}
}
c.muTree.Lock()
defer c.muTree.Unlock()
c.tree = tree
return nil
}
func (c *Controller) sync(ctx context.Context, key string) error {
logger := klog.FromContext(ctx)
startTime := time.Now()
@@ -329,13 +281,6 @@ func (c *Controller) sync(ctx context.Context, key string) error {
logger.V(4).Info("Finished syncing ServiceCIDR)", "ServiceCIDR", key, "elapsed", time.Since(startTime))
}()
// TODO(aojea) verify if this present a performance problem
// restore the radix tree from the current state
err := c.syncCIDRs()
if err != nil {
return err
}
logger.V(4).Info("syncing ServiceCIDR", "ServiceCIDR", key)
cidr, err := c.serviceCIDRLister.Get(key)
if err != nil {
@@ -406,23 +351,16 @@ func (c *Controller) sync(ctx context.Context, key string) error {
// canDeleteCIDR checks that the ServiceCIDR can be safely deleted and not leave orphan IPAddresses
func (c *Controller) canDeleteCIDR(ctx context.Context, serviceCIDR *networkingapiv1alpha1.ServiceCIDR) (bool, error) {
// TODO(aojea) Revisit the lock usage and if we need to keep it only for the tree operations
// to avoid holding it during the whole operation.
c.muTree.Lock()
defer c.muTree.Unlock()
logger := klog.FromContext(ctx)
// Check if there is a subnet that already contains the ServiceCIDR that is going to be deleted.
hasParent := true
for _, cidr := range serviceCIDR.Spec.CIDRs {
// Walk the tree to find if there is a larger subnet that contains the existing one,
// Find if there is a larger subnet that contains the existing one,
// or there is another ServiceCIDR with the same subnet.
if prefix, err := netip.ParsePrefix(cidr); err == nil {
serviceCIDRs := sets.New[string]()
c.tree.WalkPath(prefix, func(k netip.Prefix, v sets.Set[string]) bool {
serviceCIDRs.Insert(v.UnsortedList()...)
return false
})
if serviceCIDRs.Len() == 1 && serviceCIDRs.Has(serviceCIDR.Name) {
serviceCIDRs := servicecidr.ContainsPrefix(c.serviceCIDRLister, prefix)
if len(serviceCIDRs) == 0 ||
len(serviceCIDRs) == 1 && serviceCIDRs[0].Name == serviceCIDR.Name {
hasParent = false
}
}
@@ -431,7 +369,7 @@ func (c *Controller) canDeleteCIDR(ctx context.Context, serviceCIDR *networkinga
// All the existing IP addresses will be contained on the parent ServiceCIDRs,
// it is safe to delete, remove the finalizer.
if hasParent {
logger.V(2).Info("Removing finalizer for ServiceCIDR", "ServiceCIDR", serviceCIDR.String())
logger.Info("Deleting ServiceCIDR contained in other ServiceCIDR", "ServiceCIDR", serviceCIDR.String())
return true, nil
}
@@ -458,22 +396,18 @@ func (c *Controller) canDeleteCIDR(ctx context.Context, serviceCIDR *networkinga
logger.Info("[SHOULD NOT HAPPEN] unexpected error parsing IPAddress", "IPAddress", ip.Name, "error", err)
continue
}
// walk the tree to find all ServiceCIDRs containing this IP
prefixes := c.tree.GetHostIPPrefixMatches(address)
if len(prefixes) != 1 {
continue
}
for _, v := range prefixes {
if v.Len() == 1 && v.Has(serviceCIDR.Name) {
return false, nil
}
// find all ServiceCIDRs containing this IP
serviceCIDRs := servicecidr.ContainsAddress(c.serviceCIDRLister, address)
if len(serviceCIDRs) == 1 && serviceCIDRs[0].Name == serviceCIDR.Name {
logger.Info("Deleting ServiceCIDR blocked by IP address", "IPAddress", address.String())
return false, nil
}
}
}
// There are no IPAddresses that depend on the existing ServiceCIDR, so
// it is safe to delete, remove finalizer.
logger.Info("ServiceCIDR no longer have orphan IPs", "ServiceCDIR", serviceCIDR.String())
logger.Info("Deleting ServiceCIDR no longer have orphan IPs", "ServiceCIDR", serviceCIDR.String())
return true, nil
}

View File

@@ -428,11 +428,6 @@ func TestController_canDeleteCIDR(t *testing.T) {
t.Run(tc.name, func(t *testing.T) {
tCtx := ktesting.Init(t)
_, controller := newController(tCtx, t, tc.cidrs, tc.ips)
err := controller.syncCIDRs()
if err != nil {
t.Fatal(err)
}
got, err := controller.canDeleteCIDR(tCtx, tc.cidrSynced)
if err != nil {
t.Fatal(err)
@@ -534,10 +529,6 @@ func TestController_ipToCidrs(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
tCtx := ktesting.Init(t)
_, controller := newController(tCtx, t, tt.cidrs, nil)
err := controller.syncCIDRs()
if err != nil {
t.Fatal(err)
}
if got := controller.containingServiceCIDRs(tt.ip); !cmp.Equal(got, tt.want, cmpopts.SortSlices(func(a, b string) bool { return a < b })) {
t.Errorf("Controller.ipToCidrs() = %v, want %v", got, tt.want)
}
@@ -591,10 +582,6 @@ func TestController_cidrToCidrs(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
tCtx := ktesting.Init(t)
_, controller := newController(tCtx, t, tt.cidrs, nil)
err := controller.syncCIDRs()
if err != nil {
t.Fatal(err)
}
if got := controller.overlappingServiceCIDRs(tt.cidr); !cmp.Equal(got, tt.want, cmpopts.SortSlices(func(a, b string) bool { return a < b })) {
t.Errorf("Controller.cidrToCidrs() = %v, want %v", got, tt.want)
}

View File

@@ -145,11 +145,15 @@ func (c *Controller) Stop() {
// Run periodically updates the kubernetes service
func (c *Controller) Run(ch <-chan struct{}) {
// wait until process is ready
wait.PollImmediateUntil(100*time.Millisecond, func() (bool, error) {
ctx := wait.ContextForChannel(ch)
err := wait.PollUntilContextCancel(ctx, 100*time.Millisecond, true, func(context.Context) (bool, error) {
var code int
c.client.CoreV1().RESTClient().Get().AbsPath("/readyz").Do(context.TODO()).StatusCode(&code)
return code == http.StatusOK, nil
}, ch)
})
if err != nil {
return
}
wait.NonSlidingUntil(func() {
// Service definition is not reconciled after first

View File

@@ -178,6 +178,14 @@ const (
// Add support for CDI Device IDs in the Device Plugin API.
DevicePluginCDIDevices featuregate.Feature = "DevicePluginCDIDevices"
// owner: @aojea
// alpha: v1.31
//
// The apiservers with the MultiCIDRServiceAllocator feature enable, in order to support live migration from the old bitmap ClusterIP
// allocators to the new IPAddress allocators introduced by the MultiCIDRServiceAllocator feature, performs a dual-write on
// both allocators. This feature gate disables the dual write on the new Cluster IP allocators.
DisableAllocatorDualWrite featuregate.Feature = "DisableAllocatorDualWrite"
// owner: @andrewsykim
// alpha: v1.22
// beta: v1.29
@@ -1003,6 +1011,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
CronJobsScheduledAnnotation: {Default: true, PreRelease: featuregate.Beta},
DisableAllocatorDualWrite: {Default: false, PreRelease: featuregate.Alpha}, // remove after MultiCIDRServiceAllocator is GA
DisableCloudProviders: {Default: true, PreRelease: featuregate.GA, LockToDefault: true},
DisableKubeletCloudCredentialProviders: {Default: true, PreRelease: featuregate.GA, LockToDefault: true},

View File

@@ -41096,7 +41096,7 @@ func schema_k8sio_api_networking_v1alpha1_ServiceCIDRSpec(ref common.ReferenceCa
},
},
SchemaProps: spec.SchemaProps{
Description: "CIDRs defines the IP blocks in CIDR notation (e.g. \"192.168.0.0/24\" or \"2001:db8::/64\") from which to assign service cluster IPs. Max of two CIDRs is allowed, one of each IP family. This field is immutable.",
Description: "CIDRs defines the IP blocks in CIDR notation (e.g. \"192.168.0.0/24\" or \"2001:db8::/64\") from which to assign service cluster IPs. Max of two CIDRs is allowed, one of each IP family. The network address of each CIDR, the address that identifies the subnet of a host, is reserved and will not be allocated. The broadcast address for IPv4 CIDRs is also reserved and will not be allocated. This field is immutable.",
Type: []string{"array"},
Items: &spec.SchemaOrArray{
Schema: &spec.Schema{

View File

@@ -36,6 +36,7 @@ import (
"k8s.io/client-go/kubernetes"
networkingv1alpha1client "k8s.io/client-go/kubernetes/typed/networking/v1alpha1"
policyclient "k8s.io/client-go/kubernetes/typed/policy/v1"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/api/legacyscheme"
api "k8s.io/kubernetes/pkg/apis/core"
"k8s.io/kubernetes/pkg/cluster/ports"
@@ -351,6 +352,37 @@ func (c *Config) newServiceIPAllocators() (registries rangeRegistries, primaryCl
if err != nil {
return rangeRegistries{}, nil, nil, nil, err
}
var bitmapAllocator ipallocator.Interface
if !utilfeature.DefaultFeatureGate.Enabled(features.DisableAllocatorDualWrite) {
bitmapAllocator, err = ipallocator.New(&serviceClusterIPRange, func(max int, rangeSpec string, offset int) (allocator.Interface, error) {
mem := allocator.NewAllocationMapWithOffset(max, rangeSpec, offset)
etcd, err := serviceallocator.NewEtcd(mem, "/ranges/serviceips", serviceStorageConfig.ForResource(api.Resource("serviceipallocations")))
if err != nil {
return nil, err
}
// It is possible to start apiserver clusters with the new allocator and dual write enable on new environments.
// If this is the case we need to initialize the bitmap or it will fail to allocate IP addresses because
// the ResourceVersion of the opaque API object is zero.
rangeRegistry, err := etcd.Get()
if err != nil {
return nil, err
}
rangeRegistry.Range = serviceClusterIPRange.String()
if len(rangeRegistry.ResourceVersion) == 0 {
klog.Infof("kube-apiserver started with IP allocator and dual write enabled but bitmap allocator does not exist, recreating it ...")
err := etcd.CreateOrUpdate(rangeRegistry)
if err != nil {
return nil, err
}
}
registries.clusterIP = etcd
return etcd, nil
})
if err != nil {
return rangeRegistries{}, nil, nil, nil, fmt.Errorf("cannot create cluster IP allocator: %w", err)
}
}
// TODO(aojea) Revisit the initialization of the allocators
// since right now it depends on the service-cidr flags and
// sets the default IPFamily that may not be coherent with the
@@ -360,6 +392,7 @@ func (c *Config) newServiceIPAllocators() (registries rangeRegistries, primaryCl
c.Informers.Networking().V1alpha1().ServiceCIDRs(),
c.Informers.Networking().V1alpha1().IPAddresses(),
netutils.IsIPv6CIDR(&serviceClusterIPRange),
bitmapAllocator,
)
if err != nil {
return rangeRegistries{}, nil, nil, nil, fmt.Errorf("cannot create cluster IP allocator: %v", err)
@@ -391,6 +424,37 @@ func (c *Config) newServiceIPAllocators() (registries rangeRegistries, primaryCl
if err != nil {
return rangeRegistries{}, nil, nil, nil, err
}
var bitmapAllocator ipallocator.Interface
if !utilfeature.DefaultFeatureGate.Enabled(features.DisableAllocatorDualWrite) {
bitmapAllocator, err = ipallocator.New(&c.Services.SecondaryClusterIPRange, func(max int, rangeSpec string, offset int) (allocator.Interface, error) {
mem := allocator.NewAllocationMapWithOffset(max, rangeSpec, offset)
// TODO etcdallocator package to return a storage interface via the storageFactory
etcd, err := serviceallocator.NewEtcd(mem, "/ranges/secondaryserviceips", serviceStorageConfig.ForResource(api.Resource("serviceipallocations")))
if err != nil {
return nil, err
}
// It is possible to start apiserver clusters with the new allocator and dual write enable on new environments.
// If this is the case we need to initialize the bitmap or it will fail to allocate IP addresses because
// the ResourceVersion of the opaque API object is zero.
rangeRegistry, err := etcd.Get()
if err != nil {
return nil, err
}
rangeRegistry.Range = serviceClusterIPRange.String()
if len(rangeRegistry.ResourceVersion) == 0 {
klog.Infof("kube-apiserver started with IP allocator and dual write enabled but bitmap allocator does not exist, recreating it ...")
err := etcd.CreateOrUpdate(rangeRegistry)
if err != nil {
return nil, err
}
}
registries.secondaryClusterIP = etcd
return etcd, nil
})
if err != nil {
return rangeRegistries{}, nil, nil, nil, fmt.Errorf("cannot create cluster secondary IP allocator: %w", err)
}
}
// TODO(aojea) Revisit the initialization of the allocators
// since right now it depends on the service-cidr flags and
// sets the default IPFamily that may not be coherent with the
@@ -400,6 +464,7 @@ func (c *Config) newServiceIPAllocators() (registries rangeRegistries, primaryCl
c.Informers.Networking().V1alpha1().ServiceCIDRs(),
c.Informers.Networking().V1alpha1().IPAddresses(),
netutils.IsIPv6CIDR(&c.Services.SecondaryClusterIPRange),
bitmapAllocator,
)
if err != nil {
return rangeRegistries{}, nil, nil, nil, fmt.Errorf("cannot create cluster secondary IP allocator: %v", err)

View File

@@ -27,26 +27,27 @@ import (
networkingv1alpha1 "k8s.io/api/networking/v1alpha1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
networkingv1alpha1informers "k8s.io/client-go/informers/networking/v1alpha1"
networkingv1alpha1client "k8s.io/client-go/kubernetes/typed/networking/v1alpha1"
networkingv1alpha1listers "k8s.io/client-go/listers/networking/v1alpha1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/api/servicecidr"
api "k8s.io/kubernetes/pkg/apis/core"
"k8s.io/kubernetes/pkg/util/iptree"
"k8s.io/kubernetes/pkg/features"
netutils "k8s.io/utils/net"
)
// MetaAllocator maintains a Tree with the ServiceCIDRs containing an IP Allocator
// on the nodes. Since each allocator doesn't stored the IPAddresses because it reads
// them from the informer cache, it is cheap to create and delete IP Allocators.
// MetaAllocator forwards the request to any of the internal allocators that has free
// addresses.
// MetaAllocator maintains a structure with IP alloctors for the corresponding ServiceCIDRs.
// CIDR overlapping is allowed and the MetaAllocator should take this into consideration.
// Each allocator doesn't stored the IPAddresses, instead it reads them from the informer
// cache, it is cheap to create and delete IP Allocators.
// MetaAllocator use any READY allocator to Allocate IP addresses that has available IPs.
// MetaAllocator implements current allocator interface using
// ServiceCIDR and IPAddress API objects.
@@ -61,10 +62,23 @@ type MetaAllocator struct {
internalStopCh chan struct{}
muTree sync.Mutex
tree *iptree.Tree[*Allocator]
// allocators is a map indexed by the network prefix
// Multiple ServiceCIDR can contain the same network prefix
// so we need to store the references from each allocators to
// the corresponding ServiceCIDRs
mu sync.Mutex
allocators map[string]*item
ipFamily api.IPFamily
metrics bool // enable the metrics collection
// TODO(aojea): remove with the feature gate DisableAllocatorDualWrite
bitmapAllocator Interface
}
type item struct {
allocator *Allocator
serviceCIDRs sets.Set[string] // reference of the serviceCIDRs using this Allocator
}
var _ Interface = &MetaAllocator{}
@@ -77,8 +91,21 @@ func NewMetaAllocator(
serviceCIDRInformer networkingv1alpha1informers.ServiceCIDRInformer,
ipAddressInformer networkingv1alpha1informers.IPAddressInformer,
isIPv6 bool,
bitmapAllocator Interface,
) (*MetaAllocator, error) {
c := newMetaAllocator(client, serviceCIDRInformer, ipAddressInformer, isIPv6, bitmapAllocator)
go c.run()
return c, nil
}
// newMetaAllocator is used to build the allocator for testing
func newMetaAllocator(client networkingv1alpha1client.NetworkingV1alpha1Interface,
serviceCIDRInformer networkingv1alpha1informers.ServiceCIDRInformer,
ipAddressInformer networkingv1alpha1informers.IPAddressInformer,
isIPv6 bool,
bitmapAllocator Interface,
) *MetaAllocator {
// TODO: make the NewMetaAllocator agnostic of the IP family
family := api.IPv4Protocol
if isIPv6 {
@@ -96,47 +123,79 @@ func NewMetaAllocator(
workqueue.DefaultTypedControllerRateLimiter[string](),
workqueue.TypedRateLimitingQueueConfig[string]{Name: ControllerName},
),
internalStopCh: make(chan struct{}),
tree: iptree.New[*Allocator](),
ipFamily: family,
internalStopCh: make(chan struct{}),
allocators: make(map[string]*item),
ipFamily: family,
metrics: false,
bitmapAllocator: bitmapAllocator,
}
_, _ = serviceCIDRInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: c.addServiceCIDR,
UpdateFunc: c.updateServiceCIDR,
AddFunc: c.enqueueServiceCIDR,
UpdateFunc: func(old, new interface{}) {
c.enqueueServiceCIDR(new)
},
// Process the deletion directly in the handler to be able to use the object fields
// without having to cache them. ServiceCIDRs are protected by finalizers
// so the "started deletion" logic will be handled in the reconcile loop.
DeleteFunc: c.deleteServiceCIDR,
})
go c.run()
return c, nil
return c
}
func (c *MetaAllocator) addServiceCIDR(obj interface{}) {
func (c *MetaAllocator) enqueueServiceCIDR(obj interface{}) {
key, err := cache.MetaNamespaceKeyFunc(obj)
if err == nil {
c.queue.Add(key)
}
}
func (c *MetaAllocator) updateServiceCIDR(old, new interface{}) {
key, err := cache.MetaNamespaceKeyFunc(new)
if err == nil {
c.queue.Add(key)
}
}
func (c *MetaAllocator) deleteServiceCIDR(obj interface{}) {
key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj)
if err == nil {
c.queue.Add(key)
serviceCIDR, ok := obj.(*networkingv1alpha1.ServiceCIDR)
if !ok {
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
if !ok {
return
}
serviceCIDR, ok = tombstone.Obj.(*networkingv1alpha1.ServiceCIDR)
if !ok {
return
}
}
klog.Infof("deleting ClusterIP allocator for Service CIDR %v", serviceCIDR)
c.mu.Lock()
defer c.mu.Unlock()
for _, cidr := range serviceCIDR.Spec.CIDRs {
// skip IP families not supported by this MetaAllocator
if c.ipFamily != api.IPFamily(convertToV1IPFamily(netutils.IPFamilyOfCIDRString(cidr))) {
continue
}
// get the Allocator used by this ServiceCIDR
v, ok := c.allocators[cidr]
if !ok {
continue
}
// remove the reference to this ServiceCIDR
v.serviceCIDRs.Delete(serviceCIDR.Name)
if v.serviceCIDRs.Len() > 0 {
klog.V(2).Infof("deleted Service CIDR from allocator %s, remaining %v", cidr, v.serviceCIDRs)
} else {
// if there are no references to this Allocator
// destroy and remove it from the map
v.allocator.Destroy()
delete(c.allocators, cidr)
klog.Infof("deleted ClusterIP allocator for Service CIDR %s", cidr)
}
}
}
func (c *MetaAllocator) run() {
defer runtime.HandleCrash()
defer c.queue.ShutDown()
klog.Info("Starting ServiceCIDR Allocator Controller")
defer klog.Info("Stopping ServiceCIDR Allocator Controllerr")
klog.Info("starting ServiceCIDR Allocator Controller")
defer klog.Info("stopping ServiceCIDR Allocator Controller")
// Wait for all involved caches to be synced, before processing items from the queue is started
if !cache.WaitForCacheSync(c.internalStopCh, c.serviceCIDRSynced, c.ipAddressSynced) {
@@ -162,12 +221,11 @@ func (c *MetaAllocator) processNextItem() bool {
return false
}
defer c.queue.Done(key)
err := c.syncTree()
err := c.syncAllocators()
// Handle the error if something went wrong during the execution of the business logic
if err != nil {
if c.queue.NumRequeues(key) < 5 {
klog.Infof("Error syncing cidr %v: %v", key, err)
klog.Infof("error syncing cidr %v: %v", key, err)
c.queue.AddRateLimited(key)
return true
}
@@ -176,131 +234,135 @@ func (c *MetaAllocator) processNextItem() bool {
return true
}
// syncTree syncs the ipTrees from the informer cache
// It deletes or creates allocator and sets the corresponding state
func (c *MetaAllocator) syncTree() error {
now := time.Now()
// syncAllocators adds new allocators and syncs the ready state of the allocators
// deletion of allocators is handled directly on the event handler.
func (c *MetaAllocator) syncAllocators() error {
start := time.Now()
klog.V(2).Info("syncing ServiceCIDR allocators")
defer func() {
klog.V(2).Infof("Finished sync for CIDRs took %v", time.Since(now))
klog.V(2).Infof("syncing ServiceCIDR allocators took: %v", time.Since(start))
}()
c.mu.Lock()
defer c.mu.Unlock()
serviceCIDRs, err := c.serviceCIDRLister.List(labels.Everything())
if err != nil {
return err
}
cidrsSet := sets.New[string]()
cidrReady := map[string]bool{}
for _, serviceCIDR := range serviceCIDRs {
ready := true
if !isReady(serviceCIDR) || !serviceCIDR.DeletionTimestamp.IsZero() {
ready = false
}
for _, cidr := range serviceCIDR.Spec.CIDRs {
if c.ipFamily == api.IPFamily(convertToV1IPFamily(netutils.IPFamilyOfCIDRString(cidr))) {
cidrsSet.Insert(cidr)
cidrReady[cidr] = ready
// skip IP families not supported by this MetaAllocator
if c.ipFamily != api.IPFamily(convertToV1IPFamily(netutils.IPFamilyOfCIDRString(cidr))) {
continue
}
// the allocator is ready if the object is ready and is not being deleted
ready := false
if isReady(serviceCIDR) && serviceCIDR.DeletionTimestamp.IsZero() {
ready = true
}
// check if an allocator already exist for this CIDR
v, ok := c.allocators[cidr]
// Update allocator with ServiceCIDR
if ok {
v.serviceCIDRs.Insert(serviceCIDR.Name)
// an Allocator is ready if at least one of the ServiceCIDRs is ready
if ready {
v.allocator.ready.Store(true)
} else if v.serviceCIDRs.Has(serviceCIDR.Name) && len(v.serviceCIDRs) == 1 {
v.allocator.ready.Store(false)
}
klog.Infof("updated ClusterIP allocator for Service CIDR %s", cidr)
continue
}
// Create new allocator for ServiceCIDR
_, ipnet, err := netutils.ParseCIDRSloppy(cidr) // this was already validated
if err != nil {
klog.Infof("error parsing cidr %s", cidr)
continue
}
// New ServiceCIDR, create new allocator
allocator, err := NewIPAllocator(ipnet, c.client, c.ipAddressInformer)
if err != nil {
klog.Infof("error creating new IPAllocator for Service CIDR %s", cidr)
continue
}
if c.metrics {
allocator.EnableMetrics()
}
allocator.ready.Store(ready)
c.allocators[cidr] = &item{
allocator: allocator,
serviceCIDRs: sets.New[string](serviceCIDR.Name),
}
klog.Infof("created ClusterIP allocator for Service CIDR %s", cidr)
}
}
return nil
}
// getAllocator returns any allocator that contains the IP passed as argument.
// if ready is set only an allocator that is ready is returned.
// Allocate operations can work with ANY allocator that is ready, the allocators
// contain references to the IP addresses hence does not matter what allocators have
// the IP. Release operations need to work with ANY allocator independent of its state.
func (c *MetaAllocator) getAllocator(ip net.IP, ready bool) (*Allocator, error) {
c.mu.Lock()
defer c.mu.Unlock()
address := servicecidr.IPToAddr(ip)
// use the first allocator that contains the address
for cidr, item := range c.allocators {
prefix, err := netip.ParsePrefix(cidr)
if err != nil {
return nil, err
}
if servicecidr.PrefixContainsIP(prefix, address) {
if !ready {
return item.allocator, nil
}
if item.allocator.ready.Load() {
return item.allocator, nil
}
}
}
// obtain the existing allocators and set the existing state
treeSet := sets.New[string]()
c.muTree.Lock()
c.tree.DepthFirstWalk(c.ipFamily == api.IPv6Protocol, func(k netip.Prefix, v *Allocator) bool {
v.ready.Store(cidrReady[k.String()])
treeSet.Insert(k.String())
return false
})
c.muTree.Unlock()
cidrsToRemove := treeSet.Difference(cidrsSet)
cidrsToAdd := cidrsSet.Difference(treeSet)
errs := []error{}
// Add new allocators
for _, cidr := range cidrsToAdd.UnsortedList() {
_, ipnet, err := netutils.ParseCIDRSloppy(cidr)
if err != nil {
return err
}
// New ServiceCIDR, create new allocator
allocator, err := NewIPAllocator(ipnet, c.client, c.ipAddressInformer)
if err != nil {
errs = append(errs, err)
continue
}
allocator.ready.Store(cidrReady[cidr])
prefix, err := netip.ParsePrefix(cidr)
if err != nil {
return err
}
c.addAllocator(prefix, allocator)
klog.Infof("Created ClusterIP allocator for Service CIDR %s", cidr)
}
// Remove allocators that no longer exist
for _, cidr := range cidrsToRemove.UnsortedList() {
prefix, err := netip.ParsePrefix(cidr)
if err != nil {
return err
}
c.deleteAllocator(prefix)
}
return utilerrors.NewAggregate(errs)
}
func (c *MetaAllocator) getAllocator(ip net.IP) (*Allocator, error) {
c.muTree.Lock()
defer c.muTree.Unlock()
address := ipToAddr(ip)
prefix := netip.PrefixFrom(address, address.BitLen())
// Use the largest subnet to allocate addresses because
// all the other subnets will be contained.
_, allocator, ok := c.tree.ShortestPrefixMatch(prefix)
if !ok {
klog.V(2).Infof("Could not get allocator for IP %s", ip.String())
return nil, ErrMismatchedNetwork
}
return allocator, nil
}
func (c *MetaAllocator) addAllocator(cidr netip.Prefix, allocator *Allocator) {
c.muTree.Lock()
defer c.muTree.Unlock()
c.tree.InsertPrefix(cidr, allocator)
}
func (c *MetaAllocator) deleteAllocator(cidr netip.Prefix) {
c.muTree.Lock()
defer c.muTree.Unlock()
ok := c.tree.DeletePrefix(cidr)
if ok {
klog.V(3).Infof("CIDR %s deleted", cidr)
}
klog.V(2).Infof("Could not get allocator for IP %s", ip.String())
return nil, ErrMismatchedNetwork
}
func (c *MetaAllocator) AllocateService(service *api.Service, ip net.IP) error {
allocator, err := c.getAllocator(ip)
allocator, err := c.getAllocator(ip, true)
if err != nil {
return err
}
if !utilfeature.DefaultFeatureGate.Enabled(features.DisableAllocatorDualWrite) {
cidr := c.bitmapAllocator.CIDR()
if cidr.Contains(ip) {
err := c.bitmapAllocator.Allocate(ip)
if err != nil {
return err
}
}
}
return allocator.AllocateService(service, ip)
}
// Allocate attempts to reserve the provided IP. ErrNotInRange or
// ErrAllocated will be returned if the IP is not valid for this range
// or has already been reserved. ErrFull will be returned if there
// are no addresses left.
// Only for testing, it will fail to create the IPAddress object because
// the Service reference is required.s
func (c *MetaAllocator) Allocate(ip net.IP) error {
allocator, err := c.getAllocator(ip)
if err != nil {
return err
}
return allocator.Allocate(ip)
return c.AllocateService(nil, ip)
}
func (c *MetaAllocator) AllocateNextService(service *api.Service) (net.IP, error) {
c.muTree.Lock()
defer c.muTree.Unlock()
c.mu.Lock()
defer c.mu.Unlock()
// TODO(aojea) add strategy to return a random allocator but
// taking into consideration the number of addresses of each allocator.
// Per example, if we have allocator A and B with 256 and 1024 possible
@@ -308,40 +370,45 @@ func (c *MetaAllocator) AllocateNextService(service *api.Service) (net.IP, error
// get A so we can spread the load of IPs randomly.
// However, we need to validate the best strategy before going to Beta.
isIPv6 := c.ipFamily == api.IPFamily(v1.IPv6Protocol)
for _, allocator := range c.tree.TopLevelPrefixes(isIPv6) {
ip, err := allocator.AllocateNextService(service)
for cidr, item := range c.allocators {
if netutils.IsIPv6CIDRString(cidr) != isIPv6 {
continue
}
ip, err := item.allocator.AllocateNextService(service)
if err == nil {
if !utilfeature.DefaultFeatureGate.Enabled(features.DisableAllocatorDualWrite) {
cidr := c.bitmapAllocator.CIDR()
if cidr.Contains(ip) {
err := c.bitmapAllocator.Allocate(ip)
if err != nil {
continue
}
}
}
return ip, nil
}
}
return nil, ErrFull
}
// AllocateNext return an IP address that wasn't allocated yet.
// Only for testing, it will fail to create the IPAddress object because
// the Service reference is required
func (c *MetaAllocator) AllocateNext() (net.IP, error) {
c.muTree.Lock()
defer c.muTree.Unlock()
// TODO(aojea) add strategy to return a random allocator but
// taking into consideration the number of addresses of each allocator.
// Per example, if we have allocator A and B with 256 and 1024 possible
// addresses each, the chances to get B has to be 4 times the chances to
// get A so we can spread the load of IPs randomly.
// However, we need to validate the best strategy before going to Beta.
isIPv6 := c.ipFamily == api.IPFamily(v1.IPv6Protocol)
for _, allocator := range c.tree.TopLevelPrefixes(isIPv6) {
ip, err := allocator.AllocateNext()
if err == nil {
return ip, nil
}
}
return nil, ErrFull
return c.AllocateNextService(nil)
}
func (c *MetaAllocator) Release(ip net.IP) error {
allocator, err := c.getAllocator(ip)
allocator, err := c.getAllocator(ip, false)
if err != nil {
return err
}
if !utilfeature.DefaultFeatureGate.Enabled(features.DisableAllocatorDualWrite) {
cidr := c.bitmapAllocator.CIDR()
if cidr.Contains(ip) {
_ = c.bitmapAllocator.Release(ip)
}
}
return allocator.Release(ip)
}
@@ -367,7 +434,7 @@ func (c *MetaAllocator) IPFamily() api.IPFamily {
return c.ipFamily
}
func (c *MetaAllocator) Has(ip net.IP) bool {
allocator, err := c.getAllocator(ip)
allocator, err := c.getAllocator(ip, true)
if err != nil {
return false
}
@@ -377,6 +444,9 @@ func (c *MetaAllocator) Destroy() {
select {
case <-c.internalStopCh:
default:
if !utilfeature.DefaultFeatureGate.Enabled(features.DisableAllocatorDualWrite) {
c.bitmapAllocator.Destroy()
}
close(c.internalStopCh)
}
}
@@ -396,26 +466,48 @@ func (c *MetaAllocator) Used() int {
// for testing
func (c *MetaAllocator) Free() int {
c.muTree.Lock()
defer c.muTree.Unlock()
c.mu.Lock()
defer c.mu.Unlock()
size := 0
isIPv6 := c.ipFamily == api.IPFamily(v1.IPv6Protocol)
for _, allocator := range c.tree.TopLevelPrefixes(isIPv6) {
size += int(allocator.size)
prefixes := []netip.Prefix{}
// Get all the existing prefixes
for cidr := range c.allocators {
prefix, err := netip.ParsePrefix(cidr)
if err != nil {
continue
}
prefixes = append(prefixes, prefix)
}
// only count the top level prefixes to not double count
for _, prefix := range prefixes {
if !isNotContained(prefix, prefixes) {
continue
}
v, ok := c.allocators[prefix.String()]
if !ok {
continue
}
size += int(v.allocator.size)
}
return size - c.Used()
}
func (c *MetaAllocator) EnableMetrics() {}
func (c *MetaAllocator) EnableMetrics() {
c.mu.Lock()
defer c.mu.Unlock()
c.metrics = true
for _, item := range c.allocators {
item.allocator.EnableMetrics()
}
}
// DryRun returns a random allocator
func (c *MetaAllocator) DryRun() Interface {
c.muTree.Lock()
defer c.muTree.Unlock()
isIPv6 := c.ipFamily == api.IPFamily(v1.IPv6Protocol)
for _, allocator := range c.tree.TopLevelPrefixes(isIPv6) {
return allocator.DryRun()
c.mu.Lock()
defer c.mu.Unlock()
for _, item := range c.allocators {
return item.allocator.DryRun()
}
return &Allocator{}
}
@@ -434,22 +526,6 @@ func isReady(serviceCIDR *networkingv1alpha1.ServiceCIDR) bool {
return true
}
// ipToAddr converts a net.IP to a netip.Addr
// if the net.IP is not valid it returns an empty netip.Addr{}
func ipToAddr(ip net.IP) netip.Addr {
// https://pkg.go.dev/net/netip#AddrFromSlice can return an IPv4 in IPv6 format
// so we have to check the IP family to return exactly the format that we want
// address, _ := netip.AddrFromSlice(net.ParseIPSloppy(192.168.0.1)) returns
// an address like ::ffff:192.168.0.1/32
bytes := ip.To4()
if bytes == nil {
bytes = ip.To16()
}
// AddrFromSlice returns Addr{}, false if the input is invalid.
address, _ := netip.AddrFromSlice(bytes)
return address
}
// Convert netutils.IPFamily to v1.IPFamily
// TODO: consolidate helpers
// copied from pkg/proxy/util/utils.go
@@ -463,3 +539,19 @@ func convertToV1IPFamily(ipFamily netutils.IPFamily) v1.IPFamily {
return v1.IPFamilyUnknown
}
// isNotContained returns true if the prefix is not contained in any
// of the passed prefixes.
func isNotContained(prefix netip.Prefix, prefixes []netip.Prefix) bool {
for _, p := range prefixes {
// skip same prefix
if prefix == p {
continue
}
// 192.168.0.0/24 is contained within 192.168.0.0/16
if prefix.Overlaps(p) && prefix.Bits() >= p.Bits() {
return false
}
}
return true
}

View File

@@ -19,6 +19,7 @@ package ipallocator
import (
"context"
"fmt"
"net/netip"
"testing"
"time"
@@ -27,9 +28,12 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
k8stesting "k8s.io/client-go/testing"
featuregatetesting "k8s.io/component-base/featuregate/testing"
"k8s.io/kubernetes/pkg/features"
netutils "k8s.io/utils/net"
)
@@ -39,7 +43,6 @@ func newTestMetaAllocator() (*MetaAllocator, error) {
informerFactory := informers.NewSharedInformerFactory(client, 0*time.Second)
serviceCIDRInformer := informerFactory.Networking().V1alpha1().ServiceCIDRs()
serviceCIDRStore := serviceCIDRInformer.Informer().GetIndexer()
serviceCIDRInformer.Informer().HasSynced()
ipInformer := informerFactory.Networking().V1alpha1().IPAddresses()
ipStore := ipInformer.Informer().GetIndexer()
@@ -85,17 +88,16 @@ func newTestMetaAllocator() (*MetaAllocator, error) {
return false, ip, err
}))
c, err := NewMetaAllocator(client.NetworkingV1alpha1(), serviceCIDRInformer, ipInformer, false)
if err != nil {
return nil, err
}
// we can not force the state of the informers to be synced without racing
// so we run our worker here
go wait.Until(c.runWorker, time.Second, c.internalStopCh)
c := newMetaAllocator(client.NetworkingV1alpha1(), serviceCIDRInformer, ipInformer, false, nil)
c.serviceCIDRSynced = func() bool { return true }
c.ipAddressSynced = func() bool { return true }
go c.run()
return c, nil
}
func TestCIDRAllocateMultiple(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DisableAllocatorDualWrite, true)
r, err := newTestMetaAllocator()
if err != nil {
t.Fatal(err)
@@ -114,10 +116,10 @@ func TestCIDRAllocateMultiple(t *testing.T) {
if err != nil {
t.Fatal(err)
}
r.addServiceCIDR(cidr)
r.enqueueServiceCIDR(cidr)
// wait for the cidr to be processed and set the informer synced
err = wait.PollUntilContextTimeout(context.Background(), 100*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) {
allocator, err := r.getAllocator(netutils.ParseIPSloppy("192.168.0.1"))
allocator, err := r.getAllocator(netutils.ParseIPSloppy("192.168.0.1"), true)
if err != nil {
t.Logf("unexpected error %v", err)
return false, nil
@@ -153,10 +155,10 @@ func TestCIDRAllocateMultiple(t *testing.T) {
if err != nil {
t.Fatal(err)
}
r.addServiceCIDR(cidr2)
r.enqueueServiceCIDR(cidr2)
// wait for the cidr to be processed
err = wait.PollUntilContextTimeout(context.Background(), 100*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) {
allocator, err := r.getAllocator(netutils.ParseIPSloppy("10.0.0.11"))
allocator, err := r.getAllocator(netutils.ParseIPSloppy("10.0.0.11"), true)
if err != nil {
return false, nil
}
@@ -193,6 +195,7 @@ func TestCIDRAllocateMultiple(t *testing.T) {
}
func TestCIDRAllocateShadow(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DisableAllocatorDualWrite, true)
r, err := newTestMetaAllocator()
if err != nil {
t.Fatal(err)
@@ -211,10 +214,10 @@ func TestCIDRAllocateShadow(t *testing.T) {
if err != nil {
t.Fatal(err)
}
r.addServiceCIDR(cidr)
r.enqueueServiceCIDR(cidr)
// wait for the cidr to be processed and set the informer synced
err = wait.PollUntilContextTimeout(context.Background(), 100*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) {
allocator, err := r.getAllocator(netutils.ParseIPSloppy("192.168.1.0"))
allocator, err := r.getAllocator(netutils.ParseIPSloppy("192.168.1.1"), true)
if err != nil {
return false, nil
}
@@ -224,7 +227,7 @@ func TestCIDRAllocateShadow(t *testing.T) {
if err != nil {
t.Fatal(err)
}
// allocate one IP from the new allocator
// can not allocate the subnet IP from the new allocator
err = r.Allocate(netutils.ParseIPSloppy("192.168.1.0"))
if err == nil {
t.Fatalf("unexpected allocation for IP 192.168.1.0")
@@ -239,10 +242,10 @@ func TestCIDRAllocateShadow(t *testing.T) {
if err != nil {
t.Fatal(err)
}
r.addServiceCIDR(cidr2)
r.enqueueServiceCIDR(cidr2)
// wait for the cidr to be processed
err = wait.PollUntilContextTimeout(context.Background(), 100*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) {
allocator, err := r.getAllocator(netutils.ParseIPSloppy("192.168.0.0"))
allocator, err := r.getAllocator(netutils.ParseIPSloppy("192.168.0.1"), true)
if err != nil {
return false, nil
}
@@ -265,6 +268,7 @@ func TestCIDRAllocateShadow(t *testing.T) {
}
func TestCIDRAllocateGrow(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DisableAllocatorDualWrite, true)
r, err := newTestMetaAllocator()
if err != nil {
t.Fatal(err)
@@ -283,10 +287,10 @@ func TestCIDRAllocateGrow(t *testing.T) {
if err != nil {
t.Fatal(err)
}
r.addServiceCIDR(cidr)
r.enqueueServiceCIDR(cidr)
// wait for the cidr to be processed
err = wait.PollUntilContextTimeout(context.Background(), 100*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) {
allocator, err := r.getAllocator(netutils.ParseIPSloppy("192.168.0.1"))
allocator, err := r.getAllocator(netutils.ParseIPSloppy("192.168.0.1"), true)
if err != nil {
return false, nil
}
@@ -321,10 +325,10 @@ func TestCIDRAllocateGrow(t *testing.T) {
if err != nil {
t.Fatal(err)
}
r.addServiceCIDR(cidr2)
r.enqueueServiceCIDR(cidr2)
// wait for the cidr to be processed
err = wait.PollUntilContextTimeout(context.Background(), 100*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) {
allocator, err := r.getAllocator(netutils.ParseIPSloppy("192.168.0.253"))
allocator, err := r.getAllocator(netutils.ParseIPSloppy("192.168.0.253"), true)
if err != nil {
return false, nil
}
@@ -356,6 +360,7 @@ func TestCIDRAllocateGrow(t *testing.T) {
}
func TestCIDRAllocateShrink(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DisableAllocatorDualWrite, true)
r, err := newTestMetaAllocator()
if err != nil {
t.Fatal(err)
@@ -374,10 +379,10 @@ func TestCIDRAllocateShrink(t *testing.T) {
if err != nil {
t.Fatal(err)
}
r.addServiceCIDR(cidr)
r.enqueueServiceCIDR(cidr)
// wait for the cidr to be processed
err = wait.PollUntilContextTimeout(context.Background(), 100*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) {
allocator, err := r.getAllocator(netutils.ParseIPSloppy("192.168.0.1"))
allocator, err := r.getAllocator(netutils.ParseIPSloppy("192.168.0.1"), true)
if err != nil {
return false, nil
}
@@ -420,7 +425,7 @@ func TestCIDRAllocateShrink(t *testing.T) {
if err != nil {
t.Fatal(err)
}
r.addServiceCIDR(cidr2)
r.enqueueServiceCIDR(cidr2)
err = r.client.ServiceCIDRs().Delete(context.Background(), cidr.Name, metav1.DeleteOptions{})
if err != nil {
t.Fatal(err)
@@ -429,7 +434,7 @@ func TestCIDRAllocateShrink(t *testing.T) {
// wait for the cidr to be processed (delete ServiceCIDR)
err = wait.PollUntilContextTimeout(context.Background(), 100*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) {
_, err := r.getAllocator(netutils.ParseIPSloppy("192.168.0.253"))
_, err := r.getAllocator(netutils.ParseIPSloppy("192.168.0.253"), true)
if err != nil {
return true, nil
}
@@ -441,7 +446,7 @@ func TestCIDRAllocateShrink(t *testing.T) {
}
// wait for the cidr to be processed (create ServiceCIDR)
err = wait.PollUntilContextTimeout(context.Background(), 100*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) {
allocator, err := r.getAllocator(netutils.ParseIPSloppy("192.168.0.1"))
allocator, err := r.getAllocator(netutils.ParseIPSloppy("192.168.0.1"), true)
if err != nil {
return false, nil
}
@@ -468,6 +473,134 @@ func TestCIDRAllocateShrink(t *testing.T) {
}
func TestCIDRAllocateDualWrite(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DisableAllocatorDualWrite, false)
r, err := newTestMetaAllocator()
if err != nil {
t.Fatal(err)
}
defer r.Destroy()
if f := r.Free(); f != 0 {
t.Errorf("free: %d", f)
}
if _, err := r.AllocateNext(); err == nil {
t.Error(err)
}
cidr := newServiceCIDR("test", "192.168.0.0/28")
_, err = r.client.ServiceCIDRs().Create(context.Background(), cidr, metav1.CreateOptions{})
if err != nil {
t.Fatal(err)
}
r.enqueueServiceCIDR(cidr)
// wait for the cidr to be processed and set the informer synced
err = wait.PollUntilContextTimeout(context.Background(), 100*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) {
allocator, err := r.getAllocator(netutils.ParseIPSloppy("192.168.0.1"), true)
if err != nil {
t.Logf("unexpected error %v", err)
return false, nil
}
allocator.ipAddressSynced = func() bool { return true }
return allocator.ready.Load(), nil
})
if err != nil {
t.Fatal(err)
}
// Create a bitmap allocator that will mirror the ip allocator
_, ipnet, err := netutils.ParseCIDRSloppy(cidr.Spec.CIDRs[0])
if err != nil {
t.Fatalf("unexpected failure: %v", err)
}
bitmapAllocator, err := NewInMemory(ipnet)
if err != nil {
t.Fatalf("unexpected failure: %v", err)
}
r.bitmapAllocator = bitmapAllocator
found := sets.NewString()
count := 0
for r.Free() > 0 {
ip, err := r.AllocateNext()
if err != nil {
t.Fatalf("error @ free: %d count: %d: %v", r.Free(), count, err)
}
if r.Free() != bitmapAllocator.Free() {
t.Fatalf("ip and bitmap allocator out of sync: %d %d", r.Free(), bitmapAllocator.Free())
}
count++
if found.Has(ip.String()) {
t.Fatalf("allocated %s twice: %d", ip, count)
}
found.Insert(ip.String())
}
if count != 14 {
t.Fatalf("expected 14 IPs got %d", count)
}
if _, err := r.AllocateNext(); err == nil {
t.Fatal(err)
}
}
func TestCIDRAllocateDualWriteCollision(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DisableAllocatorDualWrite, false)
r, err := newTestMetaAllocator()
if err != nil {
t.Fatal(err)
}
defer r.Destroy()
if f := r.Free(); f != 0 {
t.Errorf("free: %d", f)
}
if _, err := r.AllocateNext(); err == nil {
t.Error(err)
}
cidr := newServiceCIDR("test", "192.168.0.0/28")
_, err = r.client.ServiceCIDRs().Create(context.Background(), cidr, metav1.CreateOptions{})
if err != nil {
t.Fatal(err)
}
r.enqueueServiceCIDR(cidr)
// wait for the cidr to be processed and set the informer synced
err = wait.PollUntilContextTimeout(context.Background(), 100*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) {
allocator, err := r.getAllocator(netutils.ParseIPSloppy("192.168.0.1"), true)
if err != nil {
t.Logf("unexpected error %v", err)
return false, nil
}
allocator.ipAddressSynced = func() bool { return true }
return allocator.ready.Load(), nil
})
if err != nil {
t.Fatal(err)
}
// Create a bitmap allocator that will mirror the ip allocator
_, ipnet, err := netutils.ParseCIDRSloppy(cidr.Spec.CIDRs[0])
if err != nil {
t.Fatalf("unexpected failure: %v", err)
}
bitmapAllocator, err := NewInMemory(ipnet)
if err != nil {
t.Fatalf("unexpected failure: %v", err)
}
r.bitmapAllocator = bitmapAllocator
// preallocate one IP in the bitmap allocator
err = bitmapAllocator.Allocate(netutils.ParseIPSloppy("192.168.0.5"))
if err != nil {
t.Fatalf("unexpected error allocating an IP on the bitmap allocator: %v", err)
}
// the ipallocator must not be able to allocate
err = r.Allocate(netutils.ParseIPSloppy("192.168.0.5"))
if err == nil {
t.Fatalf("unexpected allocation: %v", err)
}
}
// TODO: add IPv6 and dual stack test cases
func newServiceCIDR(name, cidr string) *networkingv1alpha1.ServiceCIDR {
return &networkingv1alpha1.ServiceCIDR{
@@ -487,3 +620,44 @@ func newServiceCIDR(name, cidr string) *networkingv1alpha1.ServiceCIDR {
},
}
}
func Test_isNotContained(t *testing.T) {
tests := []struct {
name string
prefix netip.Prefix
prefixes []netip.Prefix
want bool
}{
{
name: "ipv4 not contained nor overlapping",
prefix: netip.MustParsePrefix("192.168.0.0/24"),
prefixes: []netip.Prefix{netip.MustParsePrefix("10.0.0.0/24"), netip.MustParsePrefix("10.0.0.0/27")},
want: true,
},
{
name: "ipv4 not contained but contains",
prefix: netip.MustParsePrefix("10.0.0.0/8"),
prefixes: []netip.Prefix{netip.MustParsePrefix("10.0.0.0/24"), netip.MustParsePrefix("10.0.0.0/27")},
want: true,
},
{
name: "ipv4 not contained but matches existing one",
prefix: netip.MustParsePrefix("10.0.0.0/24"),
prefixes: []netip.Prefix{netip.MustParsePrefix("10.0.0.0/24"), netip.MustParsePrefix("10.0.0.0/27")},
want: true,
},
{
name: "ipv4 contained but matches existing one",
prefix: netip.MustParsePrefix("10.0.0.0/27"),
prefixes: []netip.Prefix{netip.MustParsePrefix("10.0.0.0/24"), netip.MustParsePrefix("10.0.0.0/27")},
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := isNotContained(tt.prefix, tt.prefixes); got != tt.want {
t.Errorf("isNotContained() = %v, want %v", got, tt.want)
}
})
}
}

View File

@@ -20,8 +20,6 @@ import (
"context"
"fmt"
"net"
"net/netip"
"sync"
"time"
v1 "k8s.io/api/core/v1"
@@ -42,9 +40,10 @@ import (
"k8s.io/client-go/util/workqueue"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/api/legacyscheme"
"k8s.io/kubernetes/pkg/api/servicecidr"
"k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/controlplane/controller/defaultservicecidr"
"k8s.io/kubernetes/pkg/registry/core/service/ipallocator"
"k8s.io/kubernetes/pkg/util/iptree"
"k8s.io/utils/clock"
netutils "k8s.io/utils/net"
)
@@ -100,14 +99,10 @@ type RepairIPAddress struct {
ipAddressLister networkinglisters.IPAddressLister
ipAddressSynced cache.InformerSynced
cidrQueue workqueue.TypedRateLimitingInterface[string]
svcQueue workqueue.TypedRateLimitingInterface[string]
ipQueue workqueue.TypedRateLimitingInterface[string]
workerLoopPeriod time.Duration
muTree sync.Mutex
tree *iptree.Tree[string]
broadcaster events.EventBroadcaster
recorder events.EventRecorder
clock clock.Clock
@@ -132,10 +127,6 @@ func NewRepairIPAddress(interval time.Duration,
serviceCIDRSynced: serviceCIDRInformer.Informer().HasSynced,
ipAddressLister: ipAddressInformer.Lister(),
ipAddressSynced: ipAddressInformer.Informer().HasSynced,
cidrQueue: workqueue.NewTypedRateLimitingQueueWithConfig(
workqueue.DefaultTypedControllerRateLimiter[string](),
workqueue.TypedRateLimitingQueueConfig[string]{Name: "servicecidrs"},
),
svcQueue: workqueue.NewTypedRateLimitingQueueWithConfig(
workqueue.DefaultTypedControllerRateLimiter[string](),
workqueue.TypedRateLimitingQueueConfig[string]{Name: "services"},
@@ -144,7 +135,6 @@ func NewRepairIPAddress(interval time.Duration,
workqueue.DefaultTypedControllerRateLimiter[string](),
workqueue.TypedRateLimitingQueueConfig[string]{Name: "ipaddresses"},
),
tree: iptree.New[string](),
workerLoopPeriod: time.Second,
broadcaster: eventBroadcaster,
recorder: recorder,
@@ -174,29 +164,6 @@ func NewRepairIPAddress(interval time.Duration,
},
}, interval)
_, _ = serviceCIDRInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
key, err := cache.MetaNamespaceKeyFunc(obj)
if err == nil {
r.cidrQueue.Add(key)
}
},
UpdateFunc: func(old interface{}, new interface{}) {
key, err := cache.MetaNamespaceKeyFunc(new)
if err == nil {
r.cidrQueue.Add(key)
}
},
DeleteFunc: func(obj interface{}) {
// IndexerInformer uses a delta queue, therefore for deletes we have to use this
// key function.
key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj)
if err == nil {
r.cidrQueue.Add(key)
}
},
})
ipAddressInformer.Informer().AddEventHandlerWithResyncPeriod(cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
key, err := cache.MetaNamespaceKeyFunc(obj)
@@ -225,7 +192,6 @@ func NewRepairIPAddress(interval time.Duration,
// RunUntil starts the controller until the provided ch is closed.
func (r *RepairIPAddress) RunUntil(onFirstSuccess func(), stopCh chan struct{}) {
defer r.cidrQueue.ShutDown()
defer r.ipQueue.ShutDown()
defer r.svcQueue.ShutDown()
r.broadcaster.StartRecordingToSink(stopCh)
@@ -238,6 +204,20 @@ func (r *RepairIPAddress) RunUntil(onFirstSuccess func(), stopCh chan struct{})
return
}
// wait for the default ServiceCIDR
ctx := wait.ContextForChannel(stopCh)
err := wait.PollUntilContextCancel(ctx, 100*time.Millisecond, true, func(context.Context) (bool, error) {
_, err := r.serviceCIDRLister.Get(defaultservicecidr.DefaultServiceCIDRName)
if err != nil {
return false, nil
}
return true, nil
})
if err != nil {
runtime.HandleError(err)
return
}
// First sync goes through all the Services and IPAddresses in the cache,
// once synced, it signals the main loop and works using the handlers, since
// it's less expensive and more optimal.
@@ -247,9 +227,6 @@ func (r *RepairIPAddress) RunUntil(onFirstSuccess func(), stopCh chan struct{})
}
onFirstSuccess()
// serialize the operations on ServiceCIDRs
go wait.Until(r.cidrWorker, r.workerLoopPeriod, stopCh)
for i := 0; i < workers; i++ {
go wait.Until(r.ipWorker, r.workerLoopPeriod, stopCh)
go wait.Until(r.svcWorker, r.workerLoopPeriod, stopCh)
@@ -370,11 +347,7 @@ func (r *RepairIPAddress) syncService(key string) error {
}
// TODO(aojea) Refactor to abstract the IPs checks
family := getFamilyByIP(ip)
r.muTree.Lock()
prefixes := r.tree.GetHostIPPrefixMatches(ipToAddr(ip))
r.muTree.Unlock()
if len(prefixes) == 0 {
if r.isIPOutOfRange(ip) {
// ClusterIP is out of range
r.recorder.Eventf(svc, nil, v1.EventTypeWarning, "ClusterIPOutOfRange", "ClusterIPAllocation", "Cluster IP [%v]: %s is not within any configured Service CIDR; please recreate service", family, ip)
runtime.HandleError(fmt.Errorf("the ClusterIP [%v]: %s for Service %s/%s is not within any service CIDR; please recreate", family, ip, svc.Namespace, svc.Name))
@@ -557,60 +530,9 @@ func (r *RepairIPAddress) syncIPAddress(key string) error {
}
func (r *RepairIPAddress) cidrWorker() {
for r.processNextWorkCIDR() {
}
}
func (r *RepairIPAddress) processNextWorkCIDR() bool {
eKey, quit := r.cidrQueue.Get()
if quit {
return false
}
defer r.cidrQueue.Done(eKey)
err := r.syncCIDRs()
r.handleCIDRErr(err, eKey)
return true
}
func (r *RepairIPAddress) handleCIDRErr(err error, key string) {
if err == nil {
r.cidrQueue.Forget(key)
return
}
if r.cidrQueue.NumRequeues(key) < maxRetries {
klog.V(2).InfoS("Error syncing ServiceCIDR, retrying", "serviceCIDR", key, "err", err)
r.cidrQueue.AddRateLimited(key)
return
}
klog.Warningf("Dropping ServiceCIDR %q out of the queue: %v", key, err)
r.cidrQueue.Forget(key)
runtime.HandleError(err)
}
// syncCIDRs rebuilds the radix tree based from the informers cache
func (r *RepairIPAddress) syncCIDRs() error {
serviceCIDRList, err := r.serviceCIDRLister.List(labels.Everything())
if err != nil {
return err
}
tree := iptree.New[string]()
for _, serviceCIDR := range serviceCIDRList {
for _, cidr := range serviceCIDR.Spec.CIDRs {
if prefix, err := netip.ParsePrefix(cidr); err == nil { // it can not fail since is already validated
tree.InsertPrefix(prefix, serviceCIDR.Name)
}
}
}
r.muTree.Lock()
defer r.muTree.Unlock()
r.tree = tree
return nil
// isIPOutOfRange returns true if the IP is not contained in any of the ServiceCIDRs
func (r *RepairIPAddress) isIPOutOfRange(ip net.IP) bool {
return len(servicecidr.ContainsIP(r.serviceCIDRLister, ip)) == 0
}
func newIPAddress(name string, svc *v1.Service) *networkingv1alpha1.IPAddress {
@@ -677,20 +599,3 @@ func verifyIPAddressLabels(ip *networkingv1alpha1.IPAddress) bool {
}
return managedByController(ip)
}
// TODO(aojea) move to utils, already in pkg/registry/core/service/ipallocator/cidrallocator.go
// ipToAddr converts a net.IP to a netip.Addr
// if the net.IP is not valid it returns an empty netip.Addr{}
func ipToAddr(ip net.IP) netip.Addr {
// https://pkg.go.dev/net/netip#AddrFromSlice can return an IPv4 in IPv6 format
// so we have to check the IP family to return exactly the format that we want
// address, _ := netip.AddrFromSlice(net.ParseIPSloppy(192.168.0.1)) returns
// an address like ::ffff:192.168.0.1/32
bytes := ip.To4()
if bytes == nil {
bytes = ip.To16()
}
// AddrFromSlice returns Addr{}, false if the input is invalid.
address, _ := netip.AddrFromSlice(bytes)
return address
}

View File

@@ -327,10 +327,7 @@ func TestRepairServiceIP(t *testing.T) {
t.Errorf("Unexpected error trying to add Service %v object: %v", cidr, err)
}
}
err := r.syncCIDRs()
if err != nil {
t.Fatal(err)
}
// override for testing
r.servicesSynced = func() bool { return true }
r.ipAddressSynced = func() bool { return true }
@@ -352,8 +349,7 @@ func TestRepairServiceIP(t *testing.T) {
}
}
err = r.runOnce()
if err != nil {
if err := r.runOnce(); err != nil {
t.Fatal(err)
}

View File

@@ -206,7 +206,13 @@ func (a *Allocator) allocateService(svc *api.Service, ip net.IP, dryRun bool) er
if dryRun {
return nil
}
return a.createIPAddress(ip.String(), svc, "static")
start := time.Now()
err = a.createIPAddress(ip.String(), svc, "static")
if err != nil {
return err
}
a.metrics.setLatency(a.metricLabel, time.Since(start))
return nil
}
// AllocateNext return an IP address that wasn't allocated yet.
@@ -239,6 +245,7 @@ func (a *Allocator) allocateNextService(svc *api.Service, dryRun bool) (net.IP,
trace := utiltrace.New("allocate dynamic ClusterIP address")
defer trace.LogIfLong(500 * time.Millisecond)
start := time.Now()
// rand.Int63n panics for n <= 0 so we need to avoid problems when
// converting from uint64 to int64
@@ -255,6 +262,7 @@ func (a *Allocator) allocateNextService(svc *api.Service, dryRun bool) (net.IP,
iterator := ipIterator(a.offsetAddress, a.lastAddress, offset)
ip, err := a.allocateFromRange(iterator, svc)
if err == nil {
a.metrics.setLatency(a.metricLabel, time.Since(start))
return ip, nil
}
// check the lower range
@@ -263,6 +271,7 @@ func (a *Allocator) allocateNextService(svc *api.Service, dryRun bool) (net.IP,
iterator = ipIterator(a.firstAddress, a.offsetAddress.Prev(), offset)
ip, err = a.allocateFromRange(iterator, svc)
if err == nil {
a.metrics.setLatency(a.metricLabel, time.Since(start))
return ip, nil
}
}

View File

@@ -18,6 +18,7 @@ package ipallocator
import (
"sync"
"time"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
@@ -73,6 +74,17 @@ var (
},
[]string{"cidr", "scope"},
)
clusterIPAllocationLatency = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "allocation_duration_seconds",
Help: "Duration in seconds to allocate a Cluster IP by ServiceCIDR",
Buckets: metrics.DefBuckets,
StabilityLevel: metrics.ALPHA,
},
[]string{"cidr"},
)
)
var registerMetricsOnce sync.Once
@@ -83,6 +95,7 @@ func registerMetrics() {
legacyregistry.MustRegister(clusterIPAvailable)
legacyregistry.MustRegister(clusterIPAllocations)
legacyregistry.MustRegister(clusterIPAllocationErrors)
legacyregistry.MustRegister(clusterIPAllocationLatency)
})
}
@@ -90,6 +103,7 @@ func registerMetrics() {
type metricsRecorderInterface interface {
setAllocated(cidr string, allocated int)
setAvailable(cidr string, available int)
setLatency(cidr string, latency time.Duration)
incrementAllocations(cidr, scope string)
incrementAllocationErrors(cidr, scope string)
}
@@ -105,6 +119,10 @@ func (m *metricsRecorder) setAvailable(cidr string, available int) {
clusterIPAvailable.WithLabelValues(cidr).Set(float64(available))
}
func (m *metricsRecorder) setLatency(cidr string, latency time.Duration) {
clusterIPAllocationLatency.WithLabelValues(cidr).Observe(latency.Seconds())
}
func (m *metricsRecorder) incrementAllocations(cidr, scope string) {
clusterIPAllocations.WithLabelValues(cidr, scope).Inc()
}
@@ -116,7 +134,8 @@ func (m *metricsRecorder) incrementAllocationErrors(cidr, scope string) {
// emptyMetricsRecorder is a null object implements metricsRecorderInterface.
type emptyMetricsRecorder struct{}
func (*emptyMetricsRecorder) setAllocated(cidr string, allocated int) {}
func (*emptyMetricsRecorder) setAvailable(cidr string, available int) {}
func (*emptyMetricsRecorder) incrementAllocations(cidr, scope string) {}
func (*emptyMetricsRecorder) incrementAllocationErrors(cidr, scope string) {}
func (*emptyMetricsRecorder) setAllocated(cidr string, allocated int) {}
func (*emptyMetricsRecorder) setAvailable(cidr string, available int) {}
func (*emptyMetricsRecorder) setLatency(cidr string, latency time.Duration) {}
func (*emptyMetricsRecorder) incrementAllocations(cidr, scope string) {}
func (*emptyMetricsRecorder) incrementAllocationErrors(cidr, scope string) {}

View File

@@ -1,679 +0,0 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package iptree
import (
"fmt"
"math/bits"
"net/netip"
)
// iptree implement a radix tree that uses IP prefixes as nodes and allows to store values in each node.
// Example:
//
// r := New[int]()
//
// prefixes := []string{
// "0.0.0.0/0",
// "10.0.0.0/8",
// "10.0.0.0/16",
// "10.1.0.0/16",
// "10.1.1.0/24",
// "10.1.244.0/24",
// "10.0.0.0/24",
// "10.0.0.3/32",
// "192.168.0.0/24",
// "192.168.0.0/28",
// "192.168.129.0/28",
// }
// for _, k := range prefixes {
// r.InsertPrefix(netip.MustParsePrefix(k), 0)
// }
//
// (*) means the node is not public, is not storing any value
//
// 0.0.0.0/0 --- 10.0.0.0/8 --- *10.0.0.0/15 --- 10.0.0.0/16 --- 10.0.0.0/24 --- 10.0.0.3/32
// | |
// | \ -------- 10.1.0.0/16 --- 10.1.1.0/24
// | |
// | \ ------- 10.1.244.0/24
// |
// \------ *192.168.0.0/16 --- 192.168.0.0/24 --- 192.168.0.0/28
// |
// \ -------- 192.168.129.0/28
// node is an element of radix tree with a netip.Prefix optimized to store IP prefixes.
type node[T any] struct {
// prefix network CIDR
prefix netip.Prefix
// public nodes are used to store values
public bool
val T
child [2]*node[T] // binary tree
}
// mergeChild allow to compress the tree
// when n has exactly one child and no value
// p -> n -> b -> c ==> p -> b -> c
func (n *node[T]) mergeChild() {
// public nodes can not be merged
if n.public {
return
}
// can not merge if there are two children
if n.child[0] != nil &&
n.child[1] != nil {
return
}
// can not merge if there are no children
if n.child[0] == nil &&
n.child[1] == nil {
return
}
// find the child and merge it
var child *node[T]
if n.child[0] != nil {
child = n.child[0]
} else if n.child[1] != nil {
child = n.child[1]
}
n.prefix = child.prefix
n.public = child.public
n.val = child.val
n.child = child.child
// remove any references from the deleted node
// to avoid memory leak
child.child[0] = nil
child.child[1] = nil
}
// Tree is a radix tree for IPv4 and IPv6 networks.
type Tree[T any] struct {
rootV4 *node[T]
rootV6 *node[T]
}
// New creates a new Radix Tree for IP addresses.
func New[T any]() *Tree[T] {
return &Tree[T]{
rootV4: &node[T]{
prefix: netip.PrefixFrom(netip.IPv4Unspecified(), 0),
},
rootV6: &node[T]{
prefix: netip.PrefixFrom(netip.IPv6Unspecified(), 0),
},
}
}
// GetPrefix returns the stored value and true if the exact prefix exists in the tree.
func (t *Tree[T]) GetPrefix(prefix netip.Prefix) (T, bool) {
var zeroT T
n := t.rootV4
if prefix.Addr().Is6() {
n = t.rootV6
}
bitPosition := 0
// mask the address for sanity
address := prefix.Masked().Addr()
// we can't check longer than the request mask
mask := prefix.Bits()
// walk the network bits of the prefix
for bitPosition < mask {
// Look for a child checking the bit position after the mask
n = n.child[getBitFromAddr(address, bitPosition+1)]
if n == nil {
return zeroT, false
}
// check we are in the right branch comparing the suffixes
if !n.prefix.Contains(address) {
return zeroT, false
}
// update the new bit position with the new node mask
bitPosition = n.prefix.Bits()
}
// check if this node is a public node and contains a prefix
if n != nil && n.public && n.prefix == prefix {
return n.val, true
}
return zeroT, false
}
// LongestPrefixMatch returns the longest prefix match, the stored value and true if exist.
// For example, considering the following prefixes 192.168.20.16/28 and 192.168.0.0/16,
// when the address 192.168.20.19/32 is looked up it will return 192.168.20.16/28.
func (t *Tree[T]) LongestPrefixMatch(prefix netip.Prefix) (netip.Prefix, T, bool) {
n := t.rootV4
if prefix.Addr().Is6() {
n = t.rootV6
}
var last *node[T]
// bit position is given by the mask bits
bitPosition := 0
// mask the address
address := prefix.Masked().Addr()
mask := prefix.Bits()
// walk the network bits of the prefix
for bitPosition < mask {
if n.public {
last = n
}
// Look for a child checking the bit position after the mask
n = n.child[getBitFromAddr(address, bitPosition+1)]
if n == nil {
break
}
// check we are in the right branch comparing the suffixes
if !n.prefix.Contains(address) {
break
}
// update the new bit position with the new node mask
bitPosition = n.prefix.Bits()
}
if n != nil && n.public && n.prefix == prefix {
last = n
}
if last != nil {
return last.prefix, last.val, true
}
var zeroT T
return netip.Prefix{}, zeroT, false
}
// ShortestPrefixMatch returns the shortest prefix match, the stored value and true if exist.
// For example, considering the following prefixes 192.168.20.16/28 and 192.168.0.0/16,
// when the address 192.168.20.19/32 is looked up it will return 192.168.0.0/16.
func (t *Tree[T]) ShortestPrefixMatch(prefix netip.Prefix) (netip.Prefix, T, bool) {
var zeroT T
n := t.rootV4
if prefix.Addr().Is6() {
n = t.rootV6
}
// bit position is given by the mask bits
bitPosition := 0
// mask the address
address := prefix.Masked().Addr()
mask := prefix.Bits()
for bitPosition < mask {
if n.public {
return n.prefix, n.val, true
}
// Look for a child checking the bit position after the mask
n = n.child[getBitFromAddr(address, bitPosition+1)]
if n == nil {
return netip.Prefix{}, zeroT, false
}
// check we are in the right branch comparing the suffixes
if !n.prefix.Contains(address) {
return netip.Prefix{}, zeroT, false
}
// update the new bit position with the new node mask
bitPosition = n.prefix.Bits()
}
if n != nil && n.public && n.prefix == prefix {
return n.prefix, n.val, true
}
return netip.Prefix{}, zeroT, false
}
// InsertPrefix is used to add a new entry or update
// an existing entry. Returns true if updated.
func (t *Tree[T]) InsertPrefix(prefix netip.Prefix, v T) bool {
n := t.rootV4
if prefix.Addr().Is6() {
n = t.rootV6
}
var parent *node[T]
// bit position is given by the mask bits
bitPosition := 0
// mask the address
address := prefix.Masked().Addr()
mask := prefix.Bits()
for bitPosition < mask {
// Look for a child checking the bit position after the mask
childIndex := getBitFromAddr(address, bitPosition+1)
parent = n
n = n.child[childIndex]
// if no child create a new one with
if n == nil {
parent.child[childIndex] = &node[T]{
public: true,
val: v,
prefix: prefix,
}
return false
}
// update the new bit position with the new node mask
bitPosition = n.prefix.Bits()
// continue if we are in the right branch and current
// node is our parent
if n.prefix.Contains(address) && bitPosition <= mask {
continue
}
// Split the node and add a new child:
// - Case 1: parent -> child -> n
// - Case 2: parent -> newnode |--> child
// |--> n
child := &node[T]{
prefix: prefix,
public: true,
val: v,
}
// Case 1: existing node is a sibling
if prefix.Contains(n.prefix.Addr()) && bitPosition > mask {
// parent to child
parent.child[childIndex] = child
pos := prefix.Bits() + 1
// calculate if the sibling is at the left or right
child.child[getBitFromAddr(n.prefix.Addr(), pos)] = n
return false
}
// Case 2: existing node has the same mask but different base address
// add common ancestor and branch on it
ancestor := findAncestor(prefix, n.prefix)
link := &node[T]{
prefix: ancestor,
}
pos := parent.prefix.Bits() + 1
parent.child[getBitFromAddr(ancestor.Addr(), pos)] = link
// ancestor -> children
pos = ancestor.Bits() + 1
idxChild := getBitFromAddr(prefix.Addr(), pos)
idxN := getBitFromAddr(n.prefix.Addr(), pos)
if idxChild == idxN {
panic(fmt.Sprintf("wrong ancestor %s: child %s N %s", ancestor.String(), prefix.String(), n.prefix.String()))
}
link.child[idxChild] = child
link.child[idxN] = n
return false
}
// if already exist update it and make it public
if n != nil && n.prefix == prefix {
if n.public {
n.val = v
n.public = true
return true
}
n.val = v
n.public = true
return false
}
return false
}
// DeletePrefix delete the exact prefix and return true if it existed.
func (t *Tree[T]) DeletePrefix(prefix netip.Prefix) bool {
root := t.rootV4
if prefix.Addr().Is6() {
root = t.rootV6
}
var parent *node[T]
n := root
// bit position is given by the mask bits
bitPosition := 0
// mask the address
address := prefix.Masked().Addr()
mask := prefix.Bits()
for bitPosition < mask {
// Look for a child checking the bit position after the mask
parent = n
n = n.child[getBitFromAddr(address, bitPosition+1)]
if n == nil {
return false
}
// check we are in the right branch comparing the suffixes
if !n.prefix.Contains(address) {
return false
}
// update the new bit position with the new node mask
bitPosition = n.prefix.Bits()
}
// check if the node contains the prefix we want to delete
if n.prefix != prefix {
return false
}
// Delete the value
n.public = false
var zeroT T
n.val = zeroT
nodeChildren := 0
if n.child[0] != nil {
nodeChildren++
}
if n.child[1] != nil {
nodeChildren++
}
// If there is a parent and this node does not have any children
// this is a leaf so we can delete this node.
// - parent -> child(to be deleted)
if parent != nil && nodeChildren == 0 {
if parent.child[0] != nil && parent.child[0] == n {
parent.child[0] = nil
} else if parent.child[1] != nil && parent.child[1] == n {
parent.child[1] = nil
} else {
panic("wrong parent")
}
n = nil
}
// Check if we should merge this node
// The root node can not be merged
if n != root && nodeChildren == 1 {
n.mergeChild()
}
// Check if we should merge the parent's other child
// parent -> deletedNode
// |--> child
parentChildren := 0
if parent != nil {
if parent.child[0] != nil {
parentChildren++
}
if parent.child[1] != nil {
parentChildren++
}
if parent != root && parentChildren == 1 && !parent.public {
parent.mergeChild()
}
}
return true
}
// for testing, returns the number of public nodes in the tree.
func (t *Tree[T]) Len(isV6 bool) int {
count := 0
t.DepthFirstWalk(isV6, func(k netip.Prefix, v T) bool {
count++
return false
})
return count
}
// WalkFn is used when walking the tree. Takes a
// key and value, returning if iteration should
// be terminated.
type WalkFn[T any] func(s netip.Prefix, v T) bool
// DepthFirstWalk is used to walk the tree of the corresponding IP family
func (t *Tree[T]) DepthFirstWalk(isIPv6 bool, fn WalkFn[T]) {
if isIPv6 {
recursiveWalk(t.rootV6, fn)
}
recursiveWalk(t.rootV4, fn)
}
// recursiveWalk is used to do a pre-order walk of a node
// recursively. Returns true if the walk should be aborted
func recursiveWalk[T any](n *node[T], fn WalkFn[T]) bool {
if n == nil {
return true
}
// Visit the public values if any
if n.public && fn(n.prefix, n.val) {
return true
}
// Recurse on the children
if n.child[0] != nil {
if recursiveWalk(n.child[0], fn) {
return true
}
}
if n.child[1] != nil {
if recursiveWalk(n.child[1], fn) {
return true
}
}
return false
}
// WalkPrefix is used to walk the tree under a prefix
func (t *Tree[T]) WalkPrefix(prefix netip.Prefix, fn WalkFn[T]) {
n := t.rootV4
if prefix.Addr().Is6() {
n = t.rootV6
}
bitPosition := 0
// mask the address for sanity
address := prefix.Masked().Addr()
// we can't check longer than the request mask
mask := prefix.Bits()
// walk the network bits of the prefix
for bitPosition < mask {
// Look for a child checking the bit position after the mask
n = n.child[getBitFromAddr(address, bitPosition+1)]
if n == nil {
return
}
// check we are in the right branch comparing the suffixes
if !n.prefix.Contains(address) {
break
}
// update the new bit position with the new node mask
bitPosition = n.prefix.Bits()
}
recursiveWalk[T](n, fn)
}
// WalkPath is used to walk the tree, but only visiting nodes
// from the root down to a given IP prefix. Where WalkPrefix walks
// all the entries *under* the given prefix, this walks the
// entries *above* the given prefix.
func (t *Tree[T]) WalkPath(path netip.Prefix, fn WalkFn[T]) {
n := t.rootV4
if path.Addr().Is6() {
n = t.rootV6
}
bitPosition := 0
// mask the address for sanity
address := path.Masked().Addr()
// we can't check longer than the request mask
mask := path.Bits()
// walk the network bits of the prefix
for bitPosition < mask {
// Visit the public values if any
if n.public && fn(n.prefix, n.val) {
return
}
// Look for a child checking the bit position after the mask
n = n.child[getBitFromAddr(address, bitPosition+1)]
if n == nil {
return
}
// check we are in the right branch comparing the suffixes
if !n.prefix.Contains(address) {
return
}
// update the new bit position with the new node mask
bitPosition = n.prefix.Bits()
}
// check if this node is a public node and contains a prefix
if n != nil && n.public && n.prefix == path {
fn(n.prefix, n.val)
}
}
// TopLevelPrefixes is used to return a map with all the Top Level prefixes
// from the corresponding IP family and its values.
// For example, if the tree contains entries for 10.0.0.0/8, 10.1.0.0/16, and 192.168.0.0/16,
// this will return 10.0.0.0/8 and 192.168.0.0/16.
func (t *Tree[T]) TopLevelPrefixes(isIPv6 bool) map[string]T {
if isIPv6 {
return t.topLevelPrefixes(t.rootV6)
}
return t.topLevelPrefixes(t.rootV4)
}
// topLevelPrefixes is used to return a map with all the Top Level prefixes and its values
func (t *Tree[T]) topLevelPrefixes(root *node[T]) map[string]T {
result := map[string]T{}
queue := []*node[T]{root}
for len(queue) > 0 {
n := queue[0]
queue = queue[1:]
// store and continue, only interested on the top level prefixes
if n.public {
result[n.prefix.String()] = n.val
continue
}
if n.child[0] != nil {
queue = append(queue, n.child[0])
}
if n.child[1] != nil {
queue = append(queue, n.child[1])
}
}
return result
}
// GetHostIPPrefixMatches returns the list of prefixes that contain the specified Host IP.
// An IP is considered a Host IP if is within the subnet range and is not the network address
// or, if IPv4, the broadcast address (RFC 1878).
func (t *Tree[T]) GetHostIPPrefixMatches(ip netip.Addr) map[netip.Prefix]T {
// walk the tree to find all the prefixes containing this IP
ipPrefix := netip.PrefixFrom(ip, ip.BitLen())
prefixes := map[netip.Prefix]T{}
t.WalkPath(ipPrefix, func(k netip.Prefix, v T) bool {
if prefixContainIP(k, ipPrefix.Addr()) {
prefixes[k] = v
}
return false
})
return prefixes
}
// assume starts at 0 from the MSB: 0.1.2......31
// return 0 or 1
func getBitFromAddr(ip netip.Addr, pos int) int {
bytes := ip.AsSlice()
// get the byte in the slice
index := (pos - 1) / 8
if index >= len(bytes) {
panic(fmt.Sprintf("ip %s pos %d index %d bytes %v", ip, pos, index, bytes))
}
// get the offset inside the byte
offset := (pos - 1) % 8
// check if the bit is set
if bytes[index]&(uint8(0x80)>>offset) > 0 {
return 1
}
return 0
}
// find the common subnet, aka the one with the common prefix
func findAncestor(a, b netip.Prefix) netip.Prefix {
bytesA := a.Addr().AsSlice()
bytesB := b.Addr().AsSlice()
bytes := make([]byte, len(bytesA))
max := a.Bits()
if l := b.Bits(); l < max {
max = l
}
mask := 0
for i := range bytesA {
xor := bytesA[i] ^ bytesB[i]
if xor == 0 {
bytes[i] = bytesA[i]
mask += 8
} else {
pos := bits.LeadingZeros8(xor)
mask += pos
// mask off the non leading zeros
bytes[i] = bytesA[i] & (^uint8(0) << (8 - pos))
break
}
}
if mask > max {
mask = max
}
addr, ok := netip.AddrFromSlice(bytes)
if !ok {
panic(bytes)
}
ancestor := netip.PrefixFrom(addr, mask)
return ancestor.Masked()
}
// prefixContainIP returns true if the given IP is contained with the prefix,
// is not the network address and also, if IPv4, is not the broadcast address.
// This is required because the Kubernetes allocators reserve these addresses
// so IPAddresses can not block deletion of this ranges.
func prefixContainIP(prefix netip.Prefix, ip netip.Addr) bool {
// if the IP is the network address is not contained
if prefix.Masked().Addr() == ip {
return false
}
// the broadcast address is not considered contained for IPv4
if !ip.Is6() {
ipLast, err := broadcastAddress(prefix)
if err != nil || ipLast == ip {
return false
}
}
return prefix.Contains(ip)
}
// TODO(aojea) consolidate all these IPs utils
// pkg/registry/core/service/ipallocator/ipallocator.go
// broadcastAddress returns the broadcast address of the subnet
// The broadcast address is obtained by setting all the host bits
// in a subnet to 1.
// network 192.168.0.0/24 : subnet bits 24 host bits 32 - 24 = 8
// broadcast address 192.168.0.255
func broadcastAddress(subnet netip.Prefix) (netip.Addr, error) {
base := subnet.Masked().Addr()
bytes := base.AsSlice()
// get all the host bits from the subnet
n := 8*len(bytes) - subnet.Bits()
// set all the host bits to 1
for i := len(bytes) - 1; i >= 0 && n > 0; i-- {
if n >= 8 {
bytes[i] = 0xff
n -= 8
} else {
mask := ^uint8(0) >> (8 - n)
bytes[i] |= mask
break
}
}
addr, ok := netip.AddrFromSlice(bytes)
if !ok {
return netip.Addr{}, fmt.Errorf("invalid address %v", bytes)
}
return addr, nil
}

View File

@@ -1,781 +0,0 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package iptree
import (
"math/rand"
"net/netip"
"reflect"
"sort"
"testing"
"time"
"github.com/google/go-cmp/cmp"
"k8s.io/apimachinery/pkg/util/sets"
)
func Test_InsertGetDelete(t *testing.T) {
testCases := []struct {
name string
prefix netip.Prefix
}{
{
name: "ipv4",
prefix: netip.MustParsePrefix("192.168.0.0/24"),
},
{
name: "ipv6",
prefix: netip.MustParsePrefix("fd00:1:2:3::/124"),
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
tree := New[int]()
ok := tree.InsertPrefix(tc.prefix, 1)
if ok {
t.Fatal("should not exist")
}
if _, ok := tree.GetPrefix(tc.prefix); !ok {
t.Errorf("CIDR %s not found", tc.prefix)
}
if ok := tree.DeletePrefix(tc.prefix); !ok {
t.Errorf("CIDR %s not deleted", tc.prefix)
}
if _, ok := tree.GetPrefix(tc.prefix); ok {
t.Errorf("CIDR %s found", tc.prefix)
}
})
}
}
func TestBasicIPv4(t *testing.T) {
tree := New[int]()
// insert
ipnet := netip.MustParsePrefix("192.168.0.0/24")
ok := tree.InsertPrefix(ipnet, 1)
if ok {
t.Fatal("should not exist")
}
// check exist
if _, ok := tree.GetPrefix(ipnet); !ok {
t.Errorf("CIDR %s not found", ipnet)
}
// check does not exist
ipnet2 := netip.MustParsePrefix("12.1.0.0/16")
if _, ok := tree.GetPrefix(ipnet2); ok {
t.Errorf("CIDR %s not expected", ipnet2)
}
// check insert existing prefix updates the value
ok = tree.InsertPrefix(ipnet2, 2)
if ok {
t.Errorf("should not exist: %s", ipnet2)
}
ok = tree.InsertPrefix(ipnet2, 3)
if !ok {
t.Errorf("should be updated: %s", ipnet2)
}
if v, ok := tree.GetPrefix(ipnet2); !ok || v != 3 {
t.Errorf("CIDR %s not expected", ipnet2)
}
// check longer prefix matching
ipnet3 := netip.MustParsePrefix("12.1.0.2/32")
lpm, _, ok := tree.LongestPrefixMatch(ipnet3)
if !ok || lpm != ipnet2 {
t.Errorf("expected %s got %s", ipnet2, lpm)
}
}
func TestBasicIPv6(t *testing.T) {
tree := New[int]()
// insert
ipnet := netip.MustParsePrefix("2001:db8::/64")
ok := tree.InsertPrefix(ipnet, 1)
if ok {
t.Fatal("should not exist")
}
// check exist
if _, ok := tree.GetPrefix(ipnet); !ok {
t.Errorf("CIDR %s not found", ipnet)
}
// check does not exist
ipnet2 := netip.MustParsePrefix("2001:db8:1:3:4::/64")
if _, ok := tree.GetPrefix(ipnet2); ok {
t.Errorf("CIDR %s not expected", ipnet2)
}
// check insert existing prefix updates the value
ok = tree.InsertPrefix(ipnet2, 2)
if ok {
t.Errorf("should not exist: %s", ipnet2)
}
ok = tree.InsertPrefix(ipnet2, 3)
if !ok {
t.Errorf("should be updated: %s", ipnet2)
}
if v, ok := tree.GetPrefix(ipnet2); !ok || v != 3 {
t.Errorf("CIDR %s not expected", ipnet2)
}
// check longer prefix matching
ipnet3 := netip.MustParsePrefix("2001:db8:1:3:4::/96")
lpm, _, ok := tree.LongestPrefixMatch(ipnet3)
if !ok || lpm != ipnet2 {
t.Errorf("expected %s got %s", ipnet2, lpm)
}
}
func TestInsertGetDelete100K(t *testing.T) {
testCases := []struct {
name string
is6 bool
}{
{
name: "ipv4",
},
{
name: "ipv6",
is6: true,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
cidrs := generateRandomCIDRs(tc.is6, 100*1000)
tree := New[string]()
for k := range cidrs {
ok := tree.InsertPrefix(k, k.String())
if ok {
t.Errorf("error inserting: %v", k)
}
}
if tree.Len(tc.is6) != len(cidrs) {
t.Errorf("expected %d nodes on the tree, got %d", len(cidrs), tree.Len(tc.is6))
}
list := cidrs.UnsortedList()
for _, k := range list {
if v, ok := tree.GetPrefix(k); !ok {
t.Errorf("CIDR %s not found", k)
return
} else if v != k.String() {
t.Errorf("CIDR value %s not found", k)
return
}
ok := tree.DeletePrefix(k)
if !ok {
t.Errorf("CIDR delete %s error", k)
}
}
if tree.Len(tc.is6) != 0 {
t.Errorf("No node expected on the tree, got: %d %v", tree.Len(tc.is6), cidrs)
}
})
}
}
func Test_findAncestor(t *testing.T) {
tests := []struct {
name string
a netip.Prefix
b netip.Prefix
want netip.Prefix
}{
{
name: "ipv4 direct parent",
a: netip.MustParsePrefix("192.168.0.0/24"),
b: netip.MustParsePrefix("192.168.1.0/24"),
want: netip.MustParsePrefix("192.168.0.0/23"),
},
{
name: "ipv4 root parent ",
a: netip.MustParsePrefix("192.168.0.0/24"),
b: netip.MustParsePrefix("1.168.1.0/24"),
want: netip.MustParsePrefix("0.0.0.0/0"),
},
{
name: "ipv4 parent /1",
a: netip.MustParsePrefix("192.168.0.0/24"),
b: netip.MustParsePrefix("184.168.1.0/24"),
want: netip.MustParsePrefix("128.0.0.0/1"),
},
{
name: "ipv6 direct parent",
a: netip.MustParsePrefix("fd00:1:1:1::/64"),
b: netip.MustParsePrefix("fd00:1:1:2::/64"),
want: netip.MustParsePrefix("fd00:1:1::/62"),
},
{
name: "ipv6 root parent ",
a: netip.MustParsePrefix("fd00:1:1:1::/64"),
b: netip.MustParsePrefix("1:1:1:1::/64"),
want: netip.MustParsePrefix("::/0"),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := findAncestor(tt.a, tt.b); !reflect.DeepEqual(got, tt.want) {
t.Errorf("findAncestor() = %v, want %v", got, tt.want)
}
})
}
}
func Test_getBitFromAddr(t *testing.T) {
tests := []struct {
name string
ip netip.Addr
pos int
want int
}{
// 192.168.0.0
// 11000000.10101000.00000000.00000001
{
name: "ipv4 first is a one",
ip: netip.MustParseAddr("192.168.0.0"),
pos: 1,
want: 1,
},
{
name: "ipv4 middle is a zero",
ip: netip.MustParseAddr("192.168.0.0"),
pos: 16,
want: 0,
},
{
name: "ipv4 middle is a one",
ip: netip.MustParseAddr("192.168.0.0"),
pos: 13,
want: 1,
},
{
name: "ipv4 last is a zero",
ip: netip.MustParseAddr("192.168.0.0"),
pos: 32,
want: 0,
},
// 2001:db8::ff00:42:8329
// 0010000000000001:0000110110111000:0000000000000000:0000000000000000:0000000000000000:1111111100000000:0000000001000010:1000001100101001
{
name: "ipv6 first is a zero",
ip: netip.MustParseAddr("2001:db8::ff00:42:8329"),
pos: 1,
want: 0,
},
{
name: "ipv6 middle is a zero",
ip: netip.MustParseAddr("2001:db8::ff00:42:8329"),
pos: 56,
want: 0,
},
{
name: "ipv6 middle is a one",
ip: netip.MustParseAddr("2001:db8::ff00:42:8329"),
pos: 81,
want: 1,
},
{
name: "ipv6 last is a one",
ip: netip.MustParseAddr("2001:db8::ff00:42:8329"),
pos: 128,
want: 1,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := getBitFromAddr(tt.ip, tt.pos); got != tt.want {
t.Errorf("getBitFromAddr() = %v, want %v", got, tt.want)
}
})
}
}
func TestShortestPrefix(t *testing.T) {
r := New[int]()
keys := []string{
"10.0.0.0/8",
"10.21.0.0/16",
"10.221.0.0/16",
"10.1.2.3/32",
"10.1.2.0/24",
"192.168.0.0/24",
"192.168.0.0/16",
}
for _, k := range keys {
ok := r.InsertPrefix(netip.MustParsePrefix(k), 0)
if ok {
t.Errorf("unexpected update on insert %s", k)
}
}
if r.Len(false) != len(keys) {
t.Fatalf("bad len: %v %v", r.Len(false), len(keys))
}
type exp struct {
inp string
out string
}
cases := []exp{
{"192.168.0.3/32", "192.168.0.0/16"},
{"10.1.2.4/21", "10.0.0.0/8"},
{"192.168.0.0/16", "192.168.0.0/16"},
{"192.168.0.0/32", "192.168.0.0/16"},
{"10.1.2.3/32", "10.0.0.0/8"},
}
for _, test := range cases {
m, _, ok := r.ShortestPrefixMatch(netip.MustParsePrefix(test.inp))
if !ok {
t.Fatalf("no match: %v", test)
}
if m != netip.MustParsePrefix(test.out) {
t.Fatalf("mis-match: %v %v", m, test)
}
}
// not match
_, _, ok := r.ShortestPrefixMatch(netip.MustParsePrefix("0.0.0.0/0"))
if ok {
t.Fatalf("match unexpected for 0.0.0.0/0")
}
}
func TestLongestPrefixMatch(t *testing.T) {
r := New[int]()
keys := []string{
"10.0.0.0/8",
"10.21.0.0/16",
"10.221.0.0/16",
"10.1.2.3/32",
"10.1.2.0/24",
"192.168.0.0/24",
"192.168.0.0/16",
}
for _, k := range keys {
ok := r.InsertPrefix(netip.MustParsePrefix(k), 0)
if ok {
t.Errorf("unexpected update on insert %s", k)
}
}
if r.Len(false) != len(keys) {
t.Fatalf("bad len: %v %v", r.Len(false), len(keys))
}
type exp struct {
inp string
out string
}
cases := []exp{
{"192.168.0.3/32", "192.168.0.0/24"},
{"10.1.2.4/21", "10.0.0.0/8"},
{"10.21.2.0/24", "10.21.0.0/16"},
{"10.1.2.3/32", "10.1.2.3/32"},
}
for _, test := range cases {
m, _, ok := r.LongestPrefixMatch(netip.MustParsePrefix(test.inp))
if !ok {
t.Fatalf("no match: %v", test)
}
if m != netip.MustParsePrefix(test.out) {
t.Fatalf("mis-match: %v %v", m, test)
}
}
// not match
_, _, ok := r.LongestPrefixMatch(netip.MustParsePrefix("0.0.0.0/0"))
if ok {
t.Fatalf("match unexpected for 0.0.0.0/0")
}
}
func TestTopLevelPrefixesV4(t *testing.T) {
r := New[string]()
keys := []string{
"10.0.0.0/8",
"10.21.0.0/16",
"10.221.0.0/16",
"10.1.2.3/32",
"10.1.2.0/24",
"192.168.0.0/20",
"192.168.1.0/24",
"172.16.0.0/12",
"172.21.23.0/24",
}
for _, k := range keys {
ok := r.InsertPrefix(netip.MustParsePrefix(k), k)
if ok {
t.Errorf("unexpected update on insert %s", k)
}
}
if r.Len(false) != len(keys) {
t.Fatalf("bad len: %v %v", r.Len(false), len(keys))
}
expected := []string{
"10.0.0.0/8",
"192.168.0.0/20",
"172.16.0.0/12",
}
parents := r.TopLevelPrefixes(false)
if len(parents) != len(expected) {
t.Fatalf("bad len: %v %v", len(parents), len(expected))
}
for _, k := range expected {
v, ok := parents[k]
if !ok {
t.Errorf("key %s not found", k)
}
if v != k {
t.Errorf("value expected %s got %s", k, v)
}
}
}
func TestTopLevelPrefixesV6(t *testing.T) {
r := New[string]()
keys := []string{
"2001:db8:1:2:3::/64",
"2001:db8::/64",
"2001:db8:1:1:1::/64",
"2001:db8:1:1:1::/112",
}
for _, k := range keys {
ok := r.InsertPrefix(netip.MustParsePrefix(k), k)
if ok {
t.Errorf("unexpected update on insert %s", k)
}
}
if r.Len(true) != len(keys) {
t.Fatalf("bad len: %v %v", r.Len(true), len(keys))
}
expected := []string{
"2001:db8::/64",
"2001:db8:1:2:3::/64",
"2001:db8:1:1:1::/64",
}
parents := r.TopLevelPrefixes(true)
if len(parents) != len(expected) {
t.Fatalf("bad len: %v %v", len(parents), len(expected))
}
for _, k := range expected {
v, ok := parents[k]
if !ok {
t.Errorf("key %s not found", k)
}
if v != k {
t.Errorf("value expected %s got %s", k, v)
}
}
}
func TestWalkV4(t *testing.T) {
r := New[int]()
keys := []string{
"10.0.0.0/8",
"10.1.0.0/16",
"10.1.1.0/24",
"10.1.1.32/26",
"10.1.1.33/32",
}
for _, k := range keys {
ok := r.InsertPrefix(netip.MustParsePrefix(k), 0)
if ok {
t.Errorf("unexpected update on insert %s", k)
}
}
if r.Len(false) != len(keys) {
t.Fatalf("bad len: %v %v", r.Len(false), len(keys))
}
// match exact prefix
path := []string{}
r.WalkPath(netip.MustParsePrefix("10.1.1.32/26"), func(k netip.Prefix, v int) bool {
path = append(path, k.String())
return false
})
if !cmp.Equal(path, keys[:4]) {
t.Errorf("Walkpath expected %v got %v", keys[:4], path)
}
// not match on prefix
path = []string{}
r.WalkPath(netip.MustParsePrefix("10.1.1.33/26"), func(k netip.Prefix, v int) bool {
path = append(path, k.String())
return false
})
if !cmp.Equal(path, keys[:3]) {
t.Errorf("Walkpath expected %v got %v", keys[:3], path)
}
// match exact prefix
path = []string{}
r.WalkPrefix(netip.MustParsePrefix("10.0.0.0/8"), func(k netip.Prefix, v int) bool {
path = append(path, k.String())
return false
})
if !cmp.Equal(path, keys) {
t.Errorf("WalkPrefix expected %v got %v", keys, path)
}
// not match on prefix
path = []string{}
r.WalkPrefix(netip.MustParsePrefix("10.0.0.0/9"), func(k netip.Prefix, v int) bool {
path = append(path, k.String())
return false
})
if !cmp.Equal(path, keys[1:]) {
t.Errorf("WalkPrefix expected %v got %v", keys[1:], path)
}
}
func TestWalkV6(t *testing.T) {
r := New[int]()
keys := []string{
"2001:db8::/48",
"2001:db8::/64",
"2001:db8::/96",
"2001:db8::/112",
"2001:db8::/128",
}
for _, k := range keys {
ok := r.InsertPrefix(netip.MustParsePrefix(k), 0)
if ok {
t.Errorf("unexpected update on insert %s", k)
}
}
if r.Len(true) != len(keys) {
t.Fatalf("bad len: %v %v", r.Len(false), len(keys))
}
// match exact prefix
path := []string{}
r.WalkPath(netip.MustParsePrefix("2001:db8::/112"), func(k netip.Prefix, v int) bool {
path = append(path, k.String())
return false
})
if !cmp.Equal(path, keys[:4]) {
t.Errorf("Walkpath expected %v got %v", keys[:4], path)
}
// not match on prefix
path = []string{}
r.WalkPath(netip.MustParsePrefix("2001:db8::1/112"), func(k netip.Prefix, v int) bool {
path = append(path, k.String())
return false
})
if !cmp.Equal(path, keys[:3]) {
t.Errorf("Walkpath expected %v got %v", keys[:3], path)
}
// match exact prefix
path = []string{}
r.WalkPrefix(netip.MustParsePrefix("2001:db8::/48"), func(k netip.Prefix, v int) bool {
path = append(path, k.String())
return false
})
if !cmp.Equal(path, keys) {
t.Errorf("WalkPrefix expected %v got %v", keys, path)
}
// not match on prefix
path = []string{}
r.WalkPrefix(netip.MustParsePrefix("2001:db8::/49"), func(k netip.Prefix, v int) bool {
path = append(path, k.String())
return false
})
if !cmp.Equal(path, keys[1:]) {
t.Errorf("WalkPrefix expected %v got %v", keys[1:], path)
}
}
func TestGetHostIPPrefixMatches(t *testing.T) {
r := New[int]()
keys := []string{
"10.0.0.0/8",
"10.21.0.0/16",
"10.221.0.0/16",
"10.1.2.3/32",
"10.1.2.0/24",
"192.168.0.0/24",
"192.168.0.0/16",
"2001:db8::/48",
"2001:db8::/64",
"2001:db8::/96",
}
for _, k := range keys {
ok := r.InsertPrefix(netip.MustParsePrefix(k), 0)
if ok {
t.Errorf("unexpected update on insert %s", k)
}
}
type exp struct {
inp string
out []string
}
cases := []exp{
{"192.168.0.3", []string{"192.168.0.0/24", "192.168.0.0/16"}},
{"10.1.2.4", []string{"10.1.2.0/24", "10.0.0.0/8"}},
{"10.1.2.0", []string{"10.0.0.0/8"}},
{"10.1.2.255", []string{"10.0.0.0/8"}},
{"192.168.0.0", []string{}},
{"192.168.1.0", []string{"192.168.0.0/16"}},
{"10.1.2.255", []string{"10.0.0.0/8"}},
{"2001:db8::1", []string{"2001:db8::/96", "2001:db8::/64", "2001:db8::/48"}},
{"2001:db8::", []string{}},
{"2001:db8::ffff:ffff:ffff:ffff", []string{"2001:db8::/64", "2001:db8::/48"}},
}
for _, test := range cases {
m := r.GetHostIPPrefixMatches(netip.MustParseAddr(test.inp))
in := []netip.Prefix{}
for k := range m {
in = append(in, k)
}
out := []netip.Prefix{}
for _, s := range test.out {
out = append(out, netip.MustParsePrefix(s))
}
// sort by prefix bits to avoid flakes
sort.Slice(in, func(i, j int) bool { return in[i].Bits() < in[j].Bits() })
sort.Slice(out, func(i, j int) bool { return out[i].Bits() < out[j].Bits() })
if !reflect.DeepEqual(in, out) {
t.Fatalf("mis-match: %v %v", in, out)
}
}
// not match
_, _, ok := r.ShortestPrefixMatch(netip.MustParsePrefix("0.0.0.0/0"))
if ok {
t.Fatalf("match unexpected for 0.0.0.0/0")
}
}
func Test_prefixContainIP(t *testing.T) {
tests := []struct {
name string
prefix netip.Prefix
ip netip.Addr
want bool
}{
{
name: "IPv4 contains",
prefix: netip.MustParsePrefix("192.168.0.0/24"),
ip: netip.MustParseAddr("192.168.0.1"),
want: true,
},
{
name: "IPv4 network address",
prefix: netip.MustParsePrefix("192.168.0.0/24"),
ip: netip.MustParseAddr("192.168.0.0"),
},
{
name: "IPv4 broadcast address",
prefix: netip.MustParsePrefix("192.168.0.0/24"),
ip: netip.MustParseAddr("192.168.0.255"),
},
{
name: "IPv4 does not contain",
prefix: netip.MustParsePrefix("192.168.0.0/24"),
ip: netip.MustParseAddr("192.168.1.2"),
},
{
name: "IPv6 contains",
prefix: netip.MustParsePrefix("2001:db2::/96"),
ip: netip.MustParseAddr("2001:db2::1"),
want: true,
},
{
name: "IPv6 network address",
prefix: netip.MustParsePrefix("2001:db2::/96"),
ip: netip.MustParseAddr("2001:db2::"),
},
{
name: "IPv6 broadcast address",
prefix: netip.MustParsePrefix("2001:db2::/96"),
ip: netip.MustParseAddr("2001:db2::ffff:ffff"),
want: true,
},
{
name: "IPv6 does not contain",
prefix: netip.MustParsePrefix("2001:db2::/96"),
ip: netip.MustParseAddr("2001:db2:1:2:3::1"),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := prefixContainIP(tt.prefix, tt.ip); got != tt.want {
t.Errorf("prefixContainIP() = %v, want %v", got, tt.want)
}
})
}
}
func BenchmarkInsertUpdate(b *testing.B) {
r := New[bool]()
ipList := generateRandomCIDRs(true, 20000).UnsortedList()
for _, ip := range ipList {
r.InsertPrefix(ip, true)
}
b.ResetTimer()
for n := 0; n < b.N; n++ {
r.InsertPrefix(ipList[n%len(ipList)], true)
}
}
func generateRandomCIDRs(is6 bool, number int) sets.Set[netip.Prefix] {
n := 4
if is6 {
n = 16
}
cidrs := sets.Set[netip.Prefix]{}
rand.New(rand.NewSource(time.Now().UnixNano()))
for i := 0; i < number; i++ {
bytes := make([]byte, n)
for i := 0; i < n; i++ {
bytes[i] = uint8(rand.Intn(255))
}
ip, ok := netip.AddrFromSlice(bytes)
if !ok {
continue
}
bits := rand.Intn(n * 8)
prefix := netip.PrefixFrom(ip, bits).Masked()
if prefix.IsValid() {
cidrs.Insert(prefix)
}
}
return cidrs
}

View File

@@ -119,6 +119,9 @@ message ServiceCIDRList {
message ServiceCIDRSpec {
// CIDRs defines the IP blocks in CIDR notation (e.g. "192.168.0.0/24" or "2001:db8::/64")
// from which to assign service cluster IPs. Max of two CIDRs is allowed, one of each IP family.
// The network address of each CIDR, the address that identifies the subnet of a host, is reserved
// and will not be allocated. The broadcast address for IPv4 CIDRs is also reserved and will not be
// allocated.
// This field is immutable.
// +optional
// +listType=atomic

View File

@@ -109,6 +109,9 @@ type ServiceCIDR struct {
type ServiceCIDRSpec struct {
// CIDRs defines the IP blocks in CIDR notation (e.g. "192.168.0.0/24" or "2001:db8::/64")
// from which to assign service cluster IPs. Max of two CIDRs is allowed, one of each IP family.
// The network address of each CIDR, the address that identifies the subnet of a host, is reserved
// and will not be allocated. The broadcast address for IPv4 CIDRs is also reserved and will not be
// allocated.
// This field is immutable.
// +optional
// +listType=atomic

View File

@@ -91,7 +91,7 @@ func (ServiceCIDRList) SwaggerDoc() map[string]string {
var map_ServiceCIDRSpec = map[string]string{
"": "ServiceCIDRSpec define the CIDRs the user wants to use for allocating ClusterIPs for Services.",
"cidrs": "CIDRs defines the IP blocks in CIDR notation (e.g. \"192.168.0.0/24\" or \"2001:db8::/64\") from which to assign service cluster IPs. Max of two CIDRs is allowed, one of each IP family. This field is immutable.",
"cidrs": "CIDRs defines the IP blocks in CIDR notation (e.g. \"192.168.0.0/24\" or \"2001:db8::/64\") from which to assign service cluster IPs. Max of two CIDRs is allowed, one of each IP family. The network address of each CIDR, the address that identifies the subnet of a host, is reserved and will not be allocated. The broadcast address for IPv4 CIDRs is also reserved and will not be allocated. This field is immutable.",
}
func (ServiceCIDRSpec) SwaggerDoc() map[string]string {

View File

@@ -20,6 +20,8 @@ import (
"context"
"fmt"
"strings"
"sync"
"sync/atomic"
"testing"
"time"
@@ -28,104 +30,149 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/kubernetes"
featuregatetesting "k8s.io/component-base/featuregate/testing"
"k8s.io/kubernetes/cmd/kube-apiserver/app/options"
clientset "k8s.io/client-go/kubernetes"
kubeapiservertesting "k8s.io/kubernetes/cmd/kube-apiserver/app/testing"
"k8s.io/kubernetes/pkg/api/legacyscheme"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/test/integration/framework"
"k8s.io/kubernetes/test/utils/ktesting"
netutils "k8s.io/utils/net"
)
func TestServiceAlloc(t *testing.T) {
func TestServiceAllocation(t *testing.T) {
// Create an IPv4 single stack control-plane
serviceCIDR := "192.168.0.0/29"
tCtx := ktesting.Init(t)
client, _, tearDownFn := framework.StartTestServer(tCtx, t, framework.TestServerSetup{
ModifyServerRunOptions: func(opts *options.ServerRunOptions) {
opts.ServiceClusterIPRanges = serviceCIDR
var testcases = []struct {
name string
ipAllocatorGate bool
disableDualWriteGate bool
}{
{
name: "Bitmap allocator",
ipAllocatorGate: false,
disableDualWriteGate: false,
},
})
defer tearDownFn()
svc := func(i int) *v1.Service {
return &v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("svc-%v", i),
},
Spec: v1.ServiceSpec{
Type: v1.ServiceTypeClusterIP,
Ports: []v1.ServicePort{
{Port: 80},
{
name: "IP allocator and dual write",
ipAllocatorGate: true,
disableDualWriteGate: false,
},
{
name: "IP allocator only",
ipAllocatorGate: true,
disableDualWriteGate: true,
},
{
name: "disable dual write with bitmap allocator",
ipAllocatorGate: false,
disableDualWriteGate: true,
},
}
for _, tc := range testcases {
t.Run(fmt.Sprintf(tc.name), func(t *testing.T) {
etcdOptions := framework.SharedEtcd()
apiServerOptions := kubeapiservertesting.NewDefaultTestServerOptions()
s1 := kubeapiservertesting.StartTestServerOrDie(t,
apiServerOptions,
[]string{
"--runtime-config=networking.k8s.io/v1alpha1=true",
"--service-cluster-ip-range=" + serviceCIDR,
"--advertise-address=10.0.0.2",
"--disable-admission-plugins=ServiceAccount",
fmt.Sprintf("--feature-gates=%s=%v,%s=%v", features.MultiCIDRServiceAllocator, tc.ipAllocatorGate, features.DisableAllocatorDualWrite, tc.disableDualWriteGate),
},
},
}
}
etcdOptions)
defer s1.TearDownFn()
// Wait until the default "kubernetes" service is created.
if err := wait.Poll(250*time.Millisecond, time.Minute, func() (bool, error) {
_, err := client.CoreV1().Services(metav1.NamespaceDefault).Get(context.TODO(), "kubernetes", metav1.GetOptions{})
if err != nil && !apierrors.IsNotFound(err) {
return false, err
}
return !apierrors.IsNotFound(err), nil
}); err != nil {
t.Fatalf("creating kubernetes service timed out")
}
client, err := clientset.NewForConfig(s1.ClientConfig)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
// make 5 more services to take up all IPs
for i := 0; i < 5; i++ {
if _, err := client.CoreV1().Services(metav1.NamespaceDefault).Create(context.TODO(), svc(i), metav1.CreateOptions{}); err != nil {
t.Error(err)
}
}
svc := func(i int) *v1.Service {
return &v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("svc-%v", i),
},
Spec: v1.ServiceSpec{
Type: v1.ServiceTypeClusterIP,
Ports: []v1.ServicePort{
{Port: 80},
},
},
}
}
// Make another service. It will fail because we're out of cluster IPs
if _, err := client.CoreV1().Services(metav1.NamespaceDefault).Create(context.TODO(), svc(8), metav1.CreateOptions{}); err != nil {
if !strings.Contains(err.Error(), "range is full") {
t.Errorf("unexpected error text: %v", err)
}
} else {
svcs, err := client.CoreV1().Services(metav1.NamespaceAll).List(context.TODO(), metav1.ListOptions{})
if err != nil {
t.Fatalf("unexpected success, and error getting the services: %v", err)
}
allIPs := []string{}
for _, s := range svcs.Items {
allIPs = append(allIPs, s.Spec.ClusterIP)
}
t.Fatalf("unexpected creation success. The following IPs exist: %#v. It should only be possible to allocate 2 IP addresses in this cluster.\n\n%#v", allIPs, svcs)
}
// Wait until the default "kubernetes" service is created.
if err := wait.PollUntilContextTimeout(context.Background(), 250*time.Millisecond, 15*time.Second, true, func(context.Context) (bool, error) {
_, err := client.CoreV1().Services(metav1.NamespaceDefault).Get(context.TODO(), "kubernetes", metav1.GetOptions{})
if err != nil && !apierrors.IsNotFound(err) {
return false, err
}
return !apierrors.IsNotFound(err), nil
}); err != nil {
t.Fatalf("creating kubernetes service timed out: %v", err)
}
// Delete the first service.
if err := client.CoreV1().Services(metav1.NamespaceDefault).Delete(context.TODO(), svc(1).ObjectMeta.Name, metav1.DeleteOptions{}); err != nil {
t.Fatalf("got unexpected error: %v", err)
}
// make 5 more services to take up all IPs
for i := 0; i < 5; i++ {
if _, err := client.CoreV1().Services(metav1.NamespaceDefault).Create(context.TODO(), svc(i), metav1.CreateOptions{}); err != nil {
t.Error(err)
}
}
// This time creating the second service should work.
if _, err := client.CoreV1().Services(metav1.NamespaceDefault).Create(context.TODO(), svc(8), metav1.CreateOptions{}); err != nil {
t.Fatalf("got unexpected error: %v", err)
// Make another service. It will fail because we're out of cluster IPs
if _, err := client.CoreV1().Services(metav1.NamespaceDefault).Create(context.TODO(), svc(8), metav1.CreateOptions{}); err != nil {
if !strings.Contains(err.Error(), "range is full") {
t.Errorf("unexpected error text: %v", err)
}
} else {
svcs, err := client.CoreV1().Services(metav1.NamespaceAll).List(context.TODO(), metav1.ListOptions{})
if err != nil {
t.Fatalf("unexpected success, and error getting the services: %v", err)
}
allIPs := []string{}
for _, s := range svcs.Items {
allIPs = append(allIPs, s.Spec.ClusterIP)
}
t.Fatalf("unexpected creation success. The following IPs exist: %#v. It should only be possible to allocate 2 IP addresses in this cluster.\n\n%#v", allIPs, svcs)
}
// Delete the first service.
if err := client.CoreV1().Services(metav1.NamespaceDefault).Delete(context.TODO(), svc(1).ObjectMeta.Name, metav1.DeleteOptions{}); err != nil {
t.Fatalf("got unexpected error: %v", err)
}
// This time creating the second service should work.
if _, err := client.CoreV1().Services(metav1.NamespaceDefault).Create(context.TODO(), svc(8), metav1.CreateOptions{}); err != nil {
t.Fatalf("got unexpected error: %v", err)
}
})
}
}
func TestServiceAllocIPAddress(t *testing.T) {
func TestServiceAllocIPAddressLargeCIDR(t *testing.T) {
// Create an IPv6 single stack control-plane with a large range
serviceCIDR := "2001:db8::/64"
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.MultiCIDRServiceAllocator, true)
tCtx := ktesting.Init(t)
client, _, tearDownFn := framework.StartTestServer(tCtx, t, framework.TestServerSetup{
ModifyServerRunOptions: func(opts *options.ServerRunOptions) {
opts.ServiceClusterIPRanges = serviceCIDR
opts.GenericServerRunOptions.AdvertiseAddress = netutils.ParseIPSloppy("2001:db8::10")
opts.APIEnablement.RuntimeConfig.Set("networking.k8s.io/v1alpha1=true")
etcdOptions := framework.SharedEtcd()
apiServerOptions := kubeapiservertesting.NewDefaultTestServerOptions()
s1 := kubeapiservertesting.StartTestServerOrDie(t,
apiServerOptions,
[]string{
"--runtime-config=networking.k8s.io/v1alpha1=true",
"--service-cluster-ip-range=" + serviceCIDR,
"--advertise-address=2001:db8::10",
"--disable-admission-plugins=ServiceAccount",
// bitmap allocator does not support large service CIDRs set DisableAllocatorDualWrite to false
fmt.Sprintf("--feature-gates=%s=true,%s=true", features.MultiCIDRServiceAllocator, features.DisableAllocatorDualWrite),
},
})
defer tearDownFn()
etcdOptions)
defer s1.TearDownFn()
client, err := clientset.NewForConfig(s1.ClientConfig)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
svc := func(i int) *v1.Service {
return &v1.Service{
@@ -168,10 +215,10 @@ func TestServiceAllocIPAddress(t *testing.T) {
// because it is not reasonable to create 2^64 services
lastSvc := svc(8)
lastSvc.Spec.ClusterIP = "2001:db8::ffff:ffff:ffff:ffff"
if _, err := client.CoreV1().Services(metav1.NamespaceDefault).Create(context.TODO(), lastSvc, metav1.CreateOptions{}); err != nil {
if _, err := client.CoreV1().Services(metav1.NamespaceDefault).Create(tCtx, lastSvc, metav1.CreateOptions{}); err != nil {
t.Errorf("unexpected error text: %v", err)
}
_, err := client.NetworkingV1alpha1().IPAddresses().Get(context.TODO(), lastSvc.Spec.ClusterIP, metav1.GetOptions{})
_, err = client.NetworkingV1alpha1().IPAddresses().Get(tCtx, lastSvc.Spec.ClusterIP, metav1.GetOptions{})
if err != nil {
t.Error(err)
}
@@ -179,9 +226,6 @@ func TestServiceAllocIPAddress(t *testing.T) {
}
func TestMigrateService(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.MultiCIDRServiceAllocator, true)
//logs.GlogSetter("7")
etcdOptions := framework.SharedEtcd()
apiServerOptions := kubeapiservertesting.NewDefaultTestServerOptions()
s := kubeapiservertesting.StartTestServerOrDie(t,
@@ -191,6 +235,7 @@ func TestMigrateService(t *testing.T) {
"--service-cluster-ip-range=10.0.0.0/24",
"--advertise-address=10.1.1.1",
"--disable-admission-plugins=ServiceAccount",
fmt.Sprintf("--feature-gates=%s=true,%s=false", features.MultiCIDRServiceAllocator, features.DisableAllocatorDualWrite),
},
etcdOptions)
defer s.TearDownFn()
@@ -224,7 +269,7 @@ func TestMigrateService(t *testing.T) {
}
t.Logf("Service stored in etcd %v", string(svcJSON))
kubeclient, err := kubernetes.NewForConfig(s.ClientConfig)
kubeclient, err := clientset.NewForConfig(s.ClientConfig)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
@@ -252,7 +297,9 @@ func TestMigrateService(t *testing.T) {
}
func TestSkewedAllocators(t *testing.T) {
// TestSkewedAllocatorsRollback creating an apiserver with the new allocator and
// later starting an old apiserver with the bitmap allocator.
func TestSkewedAllocatorsRollback(t *testing.T) {
svc := func(i int) *v1.Service {
return &v1.Service{
ObjectMeta: metav1.ObjectMeta{
@@ -275,11 +322,11 @@ func TestSkewedAllocators(t *testing.T) {
"--runtime-config=networking.k8s.io/v1alpha1=true",
"--service-cluster-ip-range=10.0.0.0/24",
"--disable-admission-plugins=ServiceAccount",
fmt.Sprintf("--feature-gates=%s=true", features.MultiCIDRServiceAllocator)},
fmt.Sprintf("--feature-gates=%s=true,%s=true", features.MultiCIDRServiceAllocator, features.DisableAllocatorDualWrite)},
etcdOptions)
defer s1.TearDownFn()
kubeclient1, err := kubernetes.NewForConfig(s1.ClientConfig)
kubeclient1, err := clientset.NewForConfig(s1.ClientConfig)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
@@ -307,7 +354,7 @@ func TestSkewedAllocators(t *testing.T) {
etcdOptions)
defer s2.TearDownFn()
kubeclient2, err := kubernetes.NewForConfig(s2.ClientConfig)
kubeclient2, err := clientset.NewForConfig(s2.ClientConfig)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
@@ -335,6 +382,162 @@ func TestSkewedAllocators(t *testing.T) {
}
// TestSkewAllocatorsRollout test that two different apiservers, one with
// the feature gate enable and other with it disable, can not allocate
// the same IP to two different Services
func TestSkewAllocatorsRollout(t *testing.T) {
svc := func(name string, ip string) *v1.Service {
return &v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
Spec: v1.ServiceSpec{
Type: v1.ServiceTypeClusterIP,
ClusterIP: ip,
Ports: []v1.ServicePort{
{Port: 80},
},
},
}
}
etcdOptions := framework.SharedEtcd()
apiServerOptions := kubeapiservertesting.NewDefaultTestServerOptions()
// Order matters here because the apiserver allocator logic needs to cast
// the Allocator interface to be able to pass the Service reference.
// oldServer uses bitmap allocator
oldServer := kubeapiservertesting.StartTestServerOrDie(t, apiServerOptions,
[]string{
"--runtime-config=networking.k8s.io/v1alpha1=false",
"--service-cluster-ip-range=10.0.0.0/16",
"--disable-admission-plugins=ServiceAccount",
fmt.Sprintf("--feature-gates=%s=false", features.MultiCIDRServiceAllocator)},
etcdOptions)
defer oldServer.TearDownFn()
kubeclientOld, err := clientset.NewForConfig(oldServer.ClientConfig)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
// s1 uses IPAddress allocator
newServer := kubeapiservertesting.StartTestServerOrDie(t, apiServerOptions,
[]string{
"--runtime-config=networking.k8s.io/v1alpha1=true",
"--service-cluster-ip-range=10.0.0.0/16",
"--disable-admission-plugins=ServiceAccount",
fmt.Sprintf("--feature-gates=%s=true,%s=false", features.MultiCIDRServiceAllocator, features.DisableAllocatorDualWrite)},
etcdOptions)
defer newServer.TearDownFn()
kubeclientNew, err := clientset.NewForConfig(newServer.ClientConfig)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
namespace := "test-ns"
ns := framework.CreateNamespaceOrDie(kubeclientNew, namespace, t)
defer framework.DeleteNamespaceOrDie(kubeclientNew, ns, t)
// create two Services in parallel , with the same ClusterIP, in each apiserver N times.
concurrency := 100
var errorsOld, errorsNew atomic.Uint64
var wg sync.WaitGroup
for i := 5; i < concurrency+5; i++ {
ip := fmt.Sprintf("10.0.0.%d", i)
service1 := svc(fmt.Sprintf("svc-%d-new", i), ip)
service2 := svc(fmt.Sprintf("svc-%d-old", i), ip)
wg.Add(2)
go func() {
defer wg.Done()
_, err := kubeclientNew.CoreV1().Services(namespace).Create(context.TODO(), service1, metav1.CreateOptions{})
if err != nil {
t.Logf("Service %s with ip %s result: %v", service1.Name, service1.Spec.ClusterIP, err)
errorsNew.Add(1)
}
}()
go func() {
defer wg.Done()
_, err := kubeclientOld.CoreV1().Services(namespace).Create(context.TODO(), service2, metav1.CreateOptions{})
if err != nil {
t.Logf("Service %s with ip %s result: %v", service2.Name, service2.Spec.ClusterIP, err)
errorsOld.Add(1)
}
}()
}
wg.Wait()
errorsTotal := errorsOld.Load() + errorsNew.Load()
t.Logf("errors received, old allocator %d new allocator %d", errorsOld.Load(), errorsNew.Load())
if errorsTotal != uint64(concurrency) {
t.Fatalf("expected %d Services creation to have failed, got %d", concurrency, errorsTotal)
}
// It takes some time for Services to be available,
servicesList := []v1.Service{}
err = wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 10*time.Second, true, func(context.Context) (bool, error) {
svcs, err := kubeclientNew.CoreV1().Services(namespace).List(context.TODO(), metav1.ListOptions{})
if err != nil {
return false, nil
}
if len(svcs.Items) != concurrency {
t.Logf("expected %d Services to exist, got %d", concurrency, len(svcs.Items))
return false, nil
}
servicesList = svcs.Items
return true, nil
})
if err != nil {
t.Fatalf("No expected Services objects created: %v", err)
}
// It takes some time for the repairip loop to create the corresponding IPAddress objects
// ClusterIPs are synchronized through the bitmap.
err = wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 10*time.Second, true, func(context.Context) (bool, error) {
ips, err := kubeclientNew.NetworkingV1alpha1().IPAddresses().List(context.Background(), metav1.ListOptions{})
if err != nil {
return false, nil
}
// count the kubernetes.default service too
if len(ips.Items) != concurrency+1 {
t.Logf("expected %d IPAddresses to exist, got %d: %v", concurrency+1, len(ips.Items), ips.Items)
return false, nil
}
return true, nil
})
if err != nil {
t.Fatalf("No expected IPAddress objects created: %v", err)
}
allIPs := map[string]string{}
for _, s := range servicesList {
if svc, ok := allIPs[s.Spec.ClusterIP]; ok {
t.Fatalf("duplicate IP %s for Services %s and %s", s.Spec.ClusterIP, svc, s.Name)
} else {
allIPs[s.Spec.ClusterIP] = s.Name
}
}
// Check all the IPAddress objects are created
for i := 5; i < concurrency+5; i++ {
ip := fmt.Sprintf("10.0.0.%d", i)
err = wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 10*time.Second, true, func(context.Context) (bool, error) {
// The repair loop must create the IP address associated
_, err = kubeclientNew.NetworkingV1alpha1().IPAddresses().Get(context.Background(), ip, metav1.GetOptions{})
if err != nil {
return false, nil
}
return true, nil
})
if err != nil {
t.Fatalf("No expected IPAddress objects created: %v", err)
}
}
}
func TestFlagsIPAllocator(t *testing.T) {
svc := func(i int) *v1.Service {
return &v1.Service{
@@ -361,7 +564,7 @@ func TestFlagsIPAllocator(t *testing.T) {
etcdOptions)
defer s1.TearDownFn()
kubeclient1, err := kubernetes.NewForConfig(s1.ClientConfig)
kubeclient1, err := clientset.NewForConfig(s1.ClientConfig)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}

View File

@@ -0,0 +1,142 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package servicecidr
import (
"context"
"fmt"
"testing"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clientset "k8s.io/client-go/kubernetes"
kubeapiservertesting "k8s.io/kubernetes/cmd/kube-apiserver/app/testing"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/test/integration/framework"
)
func TestEnableDisableServiceCIDR(t *testing.T) {
svc := func(i int) *v1.Service {
return &v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("svc-%v", i),
},
Spec: v1.ServiceSpec{
Type: v1.ServiceTypeClusterIP,
Ports: []v1.ServicePort{
{Port: 80},
},
},
}
}
// start etcd instance
etcdOptions := framework.SharedEtcd()
// apiserver with the feature disabled
apiServerOptions := kubeapiservertesting.NewDefaultTestServerOptions()
s1 := kubeapiservertesting.StartTestServerOrDie(t, apiServerOptions,
[]string{
"--runtime-config=networking.k8s.io/v1alpha1=false",
"--service-cluster-ip-range=10.0.0.0/24",
"--disable-admission-plugins=ServiceAccount",
fmt.Sprintf("--feature-gates=%s=false", features.MultiCIDRServiceAllocator)},
etcdOptions)
client1, err := clientset.NewForConfig(s1.ClientConfig)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
ns := framework.CreateNamespaceOrDie(client1, "test-enable-disable-service-cidr", t)
// make 2 services , there will be 3 services counting the kubernetes.default
for i := 0; i < 2; i++ {
if _, err := client1.CoreV1().Services(ns.Name).Create(context.TODO(), svc(i), metav1.CreateOptions{}); err != nil {
t.Fatal(err)
}
}
services, err := client1.CoreV1().Services("").List(context.TODO(), metav1.ListOptions{})
if err != nil {
t.Fatal(err)
}
if len(services.Items) != 3 {
t.Fatalf("expected 3 Services got %d", len(services.Items))
}
// shutdown s1
s1.TearDownFn()
// apiserver with the feature enabled
s2 := kubeapiservertesting.StartTestServerOrDie(t, apiServerOptions,
[]string{
"--runtime-config=networking.k8s.io/v1alpha1=true",
"--service-cluster-ip-range=10.0.0.0/24",
"--disable-admission-plugins=ServiceAccount",
fmt.Sprintf("--feature-gates=%s=true", features.MultiCIDRServiceAllocator)},
etcdOptions)
client2, err := clientset.NewForConfig(s2.ClientConfig)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
// make 2 services , there will be 5 services now
for i := 2; i < 5; i++ {
if _, err := client2.CoreV1().Services(ns.Name).Create(context.TODO(), svc(i), metav1.CreateOptions{}); err != nil {
t.Fatal(err)
}
}
services, err = client2.CoreV1().Services("").List(context.TODO(), metav1.ListOptions{})
if err != nil {
t.Fatal(err)
}
if len(services.Items) != 5 {
t.Fatalf("expected 5 Services got %d", len(services.Items))
}
// shutdown apiserver with the feature enabled
s2.TearDownFn()
// start an apiserver with the feature disabled
s3 := kubeapiservertesting.StartTestServerOrDie(t, apiServerOptions,
[]string{
"--runtime-config=networking.k8s.io/v1alpha1=false",
"--service-cluster-ip-range=10.0.0.0/24",
"--disable-admission-plugins=ServiceAccount",
fmt.Sprintf("--feature-gates=%s=false", features.MultiCIDRServiceAllocator)},
etcdOptions)
defer s3.TearDownFn()
client3, err := clientset.NewForConfig(s3.ClientConfig)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
// make 2 services , there will be 7 services now
for i := 5; i < 7; i++ {
if _, err := client2.CoreV1().Services(ns.Name).Create(context.TODO(), svc(i), metav1.CreateOptions{}); err != nil {
t.Fatal(err)
}
}
services, err = client3.CoreV1().Services("").List(context.TODO(), metav1.ListOptions{})
if err != nil {
t.Fatal(err)
}
if len(services.Items) != 7 {
t.Fatalf("expected 5 Services got %d", len(services.Items))
}
}

View File

@@ -27,10 +27,8 @@ import (
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
featuregatetesting "k8s.io/component-base/featuregate/testing"
kubeapiservertesting "k8s.io/kubernetes/cmd/kube-apiserver/app/testing"
"k8s.io/kubernetes/pkg/controller/servicecidrs"
"k8s.io/kubernetes/pkg/controlplane/controller/defaultservicecidr"
@@ -49,7 +47,6 @@ import (
// 6. start the new apiserver with the new ServiceCIDRs on the flags and shutdown the old one
// 7. delete the kubernetes.default service, the new apiserver will recreate it within the new ServiceCIDR
func TestMigrateServiceCIDR(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.MultiCIDRServiceAllocator, true)
tCtx := ktesting.Init(t)
cidr1 := "192.168.0.0/29"
@@ -64,6 +61,7 @@ func TestMigrateServiceCIDR(t *testing.T) {
"--service-cluster-ip-range=" + cidr1,
"--advertise-address=10.1.1.1",
"--disable-admission-plugins=ServiceAccount",
fmt.Sprintf("--feature-gates=%s=true,%s=true", features.MultiCIDRServiceAllocator, features.DisableAllocatorDualWrite),
},
etcdOptions)
@@ -168,18 +166,22 @@ func TestMigrateServiceCIDR(t *testing.T) {
if svc.Name == "kubernetes" {
continue
}
if err := client1.CoreV1().Services(svc.Namespace).Delete(context.Background(), svc.Name, metav1.DeleteOptions{}); err != nil {
t.Fatalf("got unexpected error: %v", err)
}
t.Logf("Deleted Service with IP %s", svc.Spec.ClusterIP)
// wipe the necessary fields so we can recreate the Service
svc.ResourceVersion = ""
svc.Spec.ClusterIP = ""
svc.Spec.ClusterIPs = nil
svc.Status = v1.ServiceStatus{}
if err := client1.CoreV1().Services(svc.Namespace).Delete(context.Background(), svc.Name, metav1.DeleteOptions{}); err != nil {
t.Fatalf("got unexpected error: %v", err)
}
svc, err := client1.CoreV1().Services(svc.Namespace).Create(context.Background(), &svc, metav1.CreateOptions{})
if err != nil {
t.Fatalf("got unexpected error: %v", err)
}
t.Logf("Created Service with IP %s", svc.Spec.ClusterIP)
if !cidrContainsIP(cidr2, svc.Spec.ClusterIP) {
t.Fatalf("Service expected to have an ip in range 10.168.0.0/24, got %s", svc.Spec.ClusterIP)
}
@@ -233,19 +235,23 @@ func TestMigrateServiceCIDR(t *testing.T) {
}
if len(cidr.Spec.CIDRs) == 0 {
t.Logf("No CIDR available")
return false, nil
}
if cidr.Spec.CIDRs[0] != cidr2 {
t.Logf("CIDR expected %s got %s", cidr2, cidr.Spec.CIDRs[0])
return false, nil
}
if len(cidr.Finalizers) == 0 {
t.Logf("Expected finalizer to be set")
return false, nil
}
for _, condition := range cidr.Status.Conditions {
if condition.Type == networkingv1alpha1.ServiceCIDRConditionReady {
t.Logf("Expected Condition %s to be %s", condition.Status, metav1.ConditionTrue)
return condition.Status == metav1.ConditionTrue, nil
}
}

View File

@@ -0,0 +1,143 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package servicecidr
import (
"context"
"fmt"
"testing"
"time"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
"k8s.io/component-base/metrics/testutil"
kubeapiservertesting "k8s.io/kubernetes/cmd/kube-apiserver/app/testing"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/test/integration/framework"
)
// TestServiceAllocPerformance measure the latency to create N services with a parallelism of K
// using the old and the new ClusterIP allocators.
// The test is skipped to run on CI and is left to execute manually to check for possible regressions.
// The current results with 100 works and 15k services on a (n2-standard-48) vCPU: 48 RAM: 192 GB are:
// legacy perf_test.go:139: [RESULT] Duration 1m9.646167533s: [quantile:0.5 value:0.462886801 quantile:0.9 value:0.496662838 quantile:0.99 value:0.725845905]
// new perf_test.go:139: [RESULT] Duration 2m12.900694343s: [quantile:0.5 value:0.481814448 quantile:0.9 value:1.3867615469999999 quantile:0.99 value:1.888190671]
func TestServiceAllocPerformance(t *testing.T) {
t.Skip("KEP-1880 performance comparison")
serviceCreation := metrics.NewHistogram(&metrics.HistogramOpts{
Name: "service_duration_seconds",
Help: "A summary of the Service creation durations in seconds.",
Buckets: metrics.DefBuckets,
})
legacyregistry.MustRegister(serviceCreation)
svc := func(i, j int) *v1.Service {
return &v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("svc-%v-%v", i, j),
},
Spec: v1.ServiceSpec{
Type: v1.ServiceTypeClusterIP,
Ports: []v1.ServicePort{
{Port: 80},
},
},
}
}
worker := func(client clientset.Interface, id int, jobs <-chan int, results chan<- error) {
for j := range jobs {
t.Logf("Worker: %d Job: %d", id, j)
func() {
now := time.Now()
defer func() {
t.Logf("worker %d job %d took %v", id, j, time.Since(now))
serviceCreation.Observe(time.Since(now).Seconds())
}()
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
_, err := client.CoreV1().Services(metav1.NamespaceDefault).Create(ctx, svc(id, j), metav1.CreateOptions{})
if err != nil {
t.Errorf("unexpected error: %v", err)
}
results <- err
}()
}
}
for _, gate := range []bool{false, true} {
t.Run(fmt.Sprintf("feature-gate=%v", gate), func(t *testing.T) {
etcdOptions := framework.SharedEtcd()
apiServerOptions := kubeapiservertesting.NewDefaultTestServerOptions()
s1 := kubeapiservertesting.StartTestServerOrDie(t,
apiServerOptions,
[]string{
"--runtime-config=networking.k8s.io/v1alpha1=true",
"--service-cluster-ip-range=" + "10.0.0.0/12",
"--advertise-address=10.0.0.1",
"--disable-admission-plugins=ServiceAccount",
fmt.Sprintf("--feature-gates=%s=true,%s=true", features.MultiCIDRServiceAllocator, features.DisableAllocatorDualWrite),
},
etcdOptions)
defer s1.TearDownFn()
client, err := clientset.NewForConfig(s1.ClientConfig)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
legacyregistry.Reset()
// 100 workers for 15k services
nworkers := 100
nservices := 15000
jobs := make(chan int, nservices)
results := make(chan error, nservices)
t.Log("Starting workers to create ClusterIP Service")
now := time.Now()
for w := 0; w < nworkers; w++ {
t.Logf("Starting worker %d", w)
go worker(client, w, jobs, results)
}
for i := 0; i < nservices; i++ {
t.Logf("Sending job %d", i)
jobs <- i
}
t.Log("All jobs processed")
close(jobs)
for c := 0; c < nservices; c++ {
t.Logf("Getting results %d", c)
err := <-results
if err != nil {
t.Errorf("error creating service: %v", err)
}
}
vec, err := testutil.GetHistogramVecFromGatherer(legacyregistry.DefaultGatherer, serviceCreation.Name, map[string]string{})
if err != nil {
t.Error(err)
}
t.Logf("[RESULT] feature-gate=%v Duration: %v Avg: %.4f p95: %.4f p99: %.4f", gate, time.Since(now), vec.Average(), vec.Quantile(0.95), vec.Quantile(0.99))
})
}
}

View File

@@ -29,10 +29,8 @@ import (
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
featuregatetesting "k8s.io/component-base/featuregate/testing"
kubeapiservertesting "k8s.io/kubernetes/cmd/kube-apiserver/app/testing"
"k8s.io/kubernetes/pkg/controller/servicecidrs"
"k8s.io/kubernetes/pkg/features"
@@ -40,8 +38,6 @@ import (
)
func TestServiceAllocNewServiceCIDR(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.MultiCIDRServiceAllocator, true)
etcdOptions := framework.SharedEtcd()
apiServerOptions := kubeapiservertesting.NewDefaultTestServerOptions()
s := kubeapiservertesting.StartTestServerOrDie(t,
@@ -51,6 +47,7 @@ func TestServiceAllocNewServiceCIDR(t *testing.T) {
"--service-cluster-ip-range=192.168.0.0/29",
"--advertise-address=10.1.1.1",
"--disable-admission-plugins=ServiceAccount",
fmt.Sprintf("--feature-gates=%s=true,%s=true", features.MultiCIDRServiceAllocator, features.DisableAllocatorDualWrite),
},
etcdOptions)
defer s.TearDownFn()
@@ -134,7 +131,6 @@ func TestServiceAllocNewServiceCIDR(t *testing.T) {
// Deletes the Service with the IPAddress blocking the deletion
// cidr3 must not exist at this point
func TestServiceCIDRDeletion(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.MultiCIDRServiceAllocator, true)
cidr1 := "192.168.0.0/29" // same as the default
cidr2 := "10.0.0.0/24" // new range
cidr3 := "10.0.0.0/16" // contains cidr2
@@ -148,6 +144,7 @@ func TestServiceCIDRDeletion(t *testing.T) {
"--service-cluster-ip-range=" + cidr1,
"--advertise-address=172.16.1.1",
"--disable-admission-plugins=ServiceAccount",
fmt.Sprintf("--feature-gates=%s=true,%s=true", features.MultiCIDRServiceAllocator, features.DisableAllocatorDualWrite),
},
etcdOptions)
defer s.TearDownFn()