From 2bb3466f9b3171c79edc19872947edab21a163a1 Mon Sep 17 00:00:00 2001 From: wangyang60 Date: Fri, 2 Aug 2024 15:11:55 +0800 Subject: [PATCH] koord-manager: add unallocated resource into mid resource. Signed-off-by: wangyang60 --- apis/configuration/slo_controller_config.go | 9 +- apis/configuration/zz_generated.deepcopy.go | 5 + .../colocation_cm_event_handler_test.go | 30 +++++ .../plugins/midresource/plugin.go | 119 ++++++++++++++---- .../plugins/midresource/plugin_test.go | 94 ++++++++++++-- .../noderesource/resource_calculator_test.go | 52 ++++---- pkg/util/sloconfig/colocation_config.go | 8 +- .../colocation_config_extender_test.go | 2 +- pkg/util/sloconfig/colocation_config_test.go | 93 ++++++++++++++ 9 files changed, 350 insertions(+), 62 deletions(-) diff --git a/apis/configuration/slo_controller_config.go b/apis/configuration/slo_controller_config.go index 17f832ac52..8ea15d90c4 100644 --- a/apis/configuration/slo_controller_config.go +++ b/apis/configuration/slo_controller_config.go @@ -247,12 +247,13 @@ type ColocationStrategy struct { UpdateTimeThresholdSeconds *int64 `json:"updateTimeThresholdSeconds,omitempty" validate:"omitempty,min=1"` ResourceDiffThreshold *float64 `json:"resourceDiffThreshold,omitempty" validate:"omitempty,gt=0,max=1"` - // MidCPUThresholdPercent defines the maximum percentage of the Mid-tier cpu resource dividing the node allocatable. - // MidCPUAllocatable <= NodeCPUAllocatable * MidCPUThresholdPercent / 100. + // AllocatableCPU[Mid]' := min(Reclaimable[Mid], NodeAllocatable * MidCPUThresholdPercent) + Unallocated[Mid] * midUnallocatedRatio. MidCPUThresholdPercent *int64 `json:"midCPUThresholdPercent,omitempty" validate:"omitempty,min=0,max=100"` - // MidMemoryThresholdPercent defines the maximum percentage of the Mid-tier memory resource dividing the node allocatable. - // MidMemoryAllocatable <= NodeMemoryAllocatable * MidMemoryThresholdPercent / 100. + // AllocatableMemory[Mid]' := min(Reclaimable[Mid], NodeAllocatable * MidMemoryThresholdPercent) + Unallocated[Mid] * midUnallocatedRatio. MidMemoryThresholdPercent *int64 `json:"midMemoryThresholdPercent,omitempty" validate:"omitempty,min=0,max=100"` + // MidUnallocatedPercent defines the percentage of unallocated resources in the Mid-tier allocable resources. + // Allocatable[Mid]' := min(Reclaimable[Mid], NodeAllocatable * thresholdRatio) + Unallocated[Mid] * midUnallocatedRatio. + MidUnallocatedPercent *int64 `json:"midUnallocatedPercent,omitempty" validate:"omitempty,min=0,max=100"` ColocationStrategyExtender `json:",inline"` // for third-party extension } diff --git a/apis/configuration/zz_generated.deepcopy.go b/apis/configuration/zz_generated.deepcopy.go index f9a59a310b..cc52c891f6 100644 --- a/apis/configuration/zz_generated.deepcopy.go +++ b/apis/configuration/zz_generated.deepcopy.go @@ -204,6 +204,11 @@ func (in *ColocationStrategy) DeepCopyInto(out *ColocationStrategy) { *out = new(int64) **out = **in } + if in.MidUnallocatedPercent != nil { + in, out := &in.MidUnallocatedPercent, &out.MidUnallocatedPercent + *out = new(int64) + **out = **in + } in.ColocationStrategyExtender.DeepCopyInto(&out.ColocationStrategyExtender) } diff --git a/pkg/slo-controller/config/colocation_cm_event_handler_test.go b/pkg/slo-controller/config/colocation_cm_event_handler_test.go index 572b3f2dce..df0745f367 100644 --- a/pkg/slo-controller/config/colocation_cm_event_handler_test.go +++ b/pkg/slo-controller/config/colocation_cm_event_handler_test.go @@ -208,6 +208,9 @@ func Test_syncColocationConfigIfChanged(t *testing.T) { UpdateTimeThresholdSeconds: pointer.Int64(100), ResourceDiffThreshold: pointer.Float64(0.1), MetricMemoryCollectPolicy: &defaultNodeMemoryCollectPolicy, + MidCPUThresholdPercent: pointer.Int64(100), + MidMemoryThresholdPercent: pointer.Int64(100), + MidUnallocatedPercent: pointer.Int64(0), }, }, available: true, @@ -299,6 +302,9 @@ func Test_syncColocationConfigIfChanged(t *testing.T) { UpdateTimeThresholdSeconds: pointer.Int64(300), ResourceDiffThreshold: pointer.Float64(0.1), MetricMemoryCollectPolicy: &defaultNodeMemoryCollectPolicy, + MidCPUThresholdPercent: pointer.Int64(100), + MidMemoryThresholdPercent: pointer.Int64(100), + MidUnallocatedPercent: pointer.Int64(0), }, NodeConfigs: []configuration.NodeColocationCfg{ { @@ -322,6 +328,9 @@ func Test_syncColocationConfigIfChanged(t *testing.T) { UpdateTimeThresholdSeconds: pointer.Int64(300), ResourceDiffThreshold: pointer.Float64(0.1), MetricMemoryCollectPolicy: &defaultNodeMemoryCollectPolicy, + MidCPUThresholdPercent: pointer.Int64(100), + MidMemoryThresholdPercent: pointer.Int64(100), + MidUnallocatedPercent: pointer.Int64(0), }, }, }, @@ -367,6 +376,9 @@ func Test_syncColocationConfigIfChanged(t *testing.T) { UpdateTimeThresholdSeconds: pointer.Int64(300), ResourceDiffThreshold: pointer.Float64(0.1), MetricMemoryCollectPolicy: &defaultNodeMemoryCollectPolicy, + MidCPUThresholdPercent: pointer.Int64(100), + MidMemoryThresholdPercent: pointer.Int64(100), + MidUnallocatedPercent: pointer.Int64(0), }, }, available: true, @@ -390,6 +402,9 @@ func Test_syncColocationConfigIfChanged(t *testing.T) { UpdateTimeThresholdSeconds: pointer.Int64(300), ResourceDiffThreshold: pointer.Float64(0.1), MetricMemoryCollectPolicy: &defaultNodeMemoryCollectPolicy, + MidCPUThresholdPercent: pointer.Int64(100), + MidMemoryThresholdPercent: pointer.Int64(100), + MidUnallocatedPercent: pointer.Int64(0), }, }, available: true, @@ -425,6 +440,9 @@ func Test_syncColocationConfigIfChanged(t *testing.T) { UpdateTimeThresholdSeconds: pointer.Int64(300), ResourceDiffThreshold: pointer.Float64(0.1), MetricMemoryCollectPolicy: &defaultNodeMemoryCollectPolicy, + MidCPUThresholdPercent: pointer.Int64(100), + MidMemoryThresholdPercent: pointer.Int64(100), + MidUnallocatedPercent: pointer.Int64(0), }, }, available: true, @@ -496,6 +514,7 @@ func Test_syncColocationConfigIfChanged(t *testing.T) { "resourceDiffThreshold": 0.1, "midCPUThresholdPercent": 45, "midMemoryThresholdPercent": 65, + "midUnallocatedPercent": 50, "nodeConfigs": [{ "nodeSelector": { "matchLabels": { @@ -526,6 +545,7 @@ func Test_syncColocationConfigIfChanged(t *testing.T) { MidCPUThresholdPercent: pointer.Int64(45), MidMemoryThresholdPercent: pointer.Int64(65), MetricMemoryCollectPolicy: &defaultNodeMemoryCollectPolicy, + MidUnallocatedPercent: pointer.Int64(50), }, NodeConfigs: []configuration.NodeColocationCfg{ { @@ -551,6 +571,7 @@ func Test_syncColocationConfigIfChanged(t *testing.T) { MidCPUThresholdPercent: pointer.Int64(45), MidMemoryThresholdPercent: pointer.Int64(65), MetricMemoryCollectPolicy: &defaultNodeMemoryCollectPolicy, + MidUnallocatedPercent: pointer.Int64(50), }, }, }, @@ -628,6 +649,9 @@ func Test_syncColocationConfigIfChanged(t *testing.T) { UpdateTimeThresholdSeconds: pointer.Int64(300), ResourceDiffThreshold: pointer.Float64(0.1), MetricMemoryCollectPolicy: &defaultNodeMemoryCollectPolicy, + MidCPUThresholdPercent: pointer.Int64(100), + MidMemoryThresholdPercent: pointer.Int64(100), + MidUnallocatedPercent: pointer.Int64(0), }, NodeConfigs: []configuration.NodeColocationCfg{ { @@ -650,6 +674,9 @@ func Test_syncColocationConfigIfChanged(t *testing.T) { UpdateTimeThresholdSeconds: pointer.Int64(300), ResourceDiffThreshold: pointer.Float64(0.1), MetricMemoryCollectPolicy: &defaultNodeMemoryCollectPolicy, + MidCPUThresholdPercent: pointer.Int64(100), + MidMemoryThresholdPercent: pointer.Int64(100), + MidUnallocatedPercent: pointer.Int64(0), //change CPUReclaimThresholdPercent: pointer.Int64(60), CPUCalculatePolicy: &cpuCalcPolicyNew, @@ -758,6 +785,9 @@ func Test_IsCfgAvailable(t *testing.T) { ResourceDiffThreshold: pointer.Float64(0.1), MetricReportIntervalSeconds: pointer.Int64(60), MetricMemoryCollectPolicy: &defaultNodeMemoryCollectPolicy, + MidCPUThresholdPercent: pointer.Int64(100), + MidMemoryThresholdPercent: pointer.Int64(100), + MidUnallocatedPercent: pointer.Int64(0), }, }, }, diff --git a/pkg/slo-controller/noderesource/plugins/midresource/plugin.go b/pkg/slo-controller/noderesource/plugins/midresource/plugin.go index d554ac9165..abaa7dc4fc 100644 --- a/pkg/slo-controller/noderesource/plugins/midresource/plugin.go +++ b/pkg/slo-controller/noderesource/plugins/midresource/plugin.go @@ -22,6 +22,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + quotav1 "k8s.io/apiserver/pkg/quota/v1" "k8s.io/klog/v2" "k8s.io/utils/clock" @@ -31,10 +32,17 @@ import ( "github.com/koordinator-sh/koordinator/pkg/slo-controller/metrics" "github.com/koordinator-sh/koordinator/pkg/slo-controller/noderesource/framework" "github.com/koordinator-sh/koordinator/pkg/util" + "github.com/koordinator-sh/koordinator/pkg/util/sloconfig" ) const PluginName = "MidResource" +const ( + MidCPUThreshold = "midCPUThreshold" + MidMemoryThreshold = "midMemoryThreshold" + MidUnallocatedPercent = "midUnallocatedPercent" +) + // ResourceNames defines the Mid-tier extended resource names to update. var ResourceNames = []corev1.ResourceName{extension.MidCPU, extension.MidMemory} @@ -104,13 +112,6 @@ func (p *Plugin) isDegradeNeeded(strategy *configuration.ColocationStrategy, nod return true } - if nodeMetric.Status.ProdReclaimableMetric == nil || - nodeMetric.Status.ProdReclaimableMetric.Resource.ResourceList == nil { - klog.V(4).Infof("need degradation for Mid-tier, err: nodeMetric %v has no valid prod reclaimable: %v", - nodeMetric.Name, nodeMetric.Status.ProdReclaimableMetric) - return true - } - now := clk.Now() if now.After(nodeMetric.Status.UpdateTime.Add(time.Duration(*strategy.DegradeTimeMinutes) * time.Minute)) { klog.V(4).Infof("need degradation for Mid-tier, err: timeout nodeMetric: %v, current timestamp: %v,"+ @@ -118,6 +119,13 @@ func (p *Plugin) isDegradeNeeded(strategy *configuration.ColocationStrategy, nod return true } + if nodeMetric.Status.ProdReclaimableMetric == nil || + nodeMetric.Status.ProdReclaimableMetric.Resource.ResourceList == nil { + klog.V(4).Infof("need degradation for Mid-tier, err: nodeMetric %v has no valid prod reclaimable, set it to zero: %v", + nodeMetric.Name, nodeMetric.Status.ProdReclaimableMetric) + return false + } + return false } @@ -125,18 +133,52 @@ func (p *Plugin) degradeCalculate(node *corev1.Node, message string) []framework return p.Reset(node, message) } +// Unallocated[Mid] = max(NodeAllocatable - Allocated[Prod], 0) +func (p *Plugin) getUnallocated(node *corev1.Node, podList *corev1.PodList) corev1.ResourceList { + allocated := corev1.ResourceList{} + for i := range podList.Items { + pod := &podList.Items[i] + priorityClass := extension.GetPodPriorityClassWithDefault(pod) + // If the pod is not marked as low priority, it is considered high priority + isHighPriority := priorityClass != extension.PriorityMid && priorityClass != extension.PriorityBatch && priorityClass != extension.PriorityFree + if !isHighPriority { + continue + } + + if pod.Status.Phase != corev1.PodRunning && pod.Status.Phase != corev1.PodPending { + continue + } + podRequest := util.GetPodRequest(pod, corev1.ResourceCPU, corev1.ResourceMemory) + allocated = quotav1.Add(allocated, podRequest) + } + + return quotav1.SubtractWithNonNegativeResult(node.Status.Allocatable, allocated) +} + func (p *Plugin) calculate(strategy *configuration.ColocationStrategy, node *corev1.Node, podList *corev1.PodList, resourceMetrics *framework.ResourceMetrics) []framework.ResourceItem { - // MidAllocatable := min(NodeAllocatable * thresholdRatio, ProdReclaimable) - prodReclaimable := resourceMetrics.NodeMetric.Status.ProdReclaimableMetric.Resource - allocatableMilliCPU := prodReclaimable.Cpu().MilliValue() - allocatableMemory := prodReclaimable.Memory().Value() + // Allocatable[Mid]' := min(Reclaimable[Mid], NodeAllocatable * thresholdRatio) + Unallocated[Mid] * midUnallocatedRatio + // Unallocated[Mid] = max(NodeAllocatable - Allocated[Prod], 0) - nodeAllocatable := node.Status.Allocatable - cpuThresholdRatio := 1.0 - if strategy != nil && strategy.MidCPUThresholdPercent != nil { - cpuThresholdRatio = float64(*strategy.MidCPUThresholdPercent) / 100 + var allocatableMilliCPU, allocatableMemory, prodReclaimableCPU int64 + var prodReclaimableMemory string = "0" + prodReclaimableMetic := resourceMetrics.NodeMetric.Status.ProdReclaimableMetric + + if prodReclaimableMetic == nil || prodReclaimableMetic.Resource.ResourceList == nil { + klog.V(4).Infof("no valid prod reclaimable, so use default zero value") + allocatableMilliCPU = 0 + allocatableMemory = 0 + } else { + prodReclaimable := resourceMetrics.NodeMetric.Status.ProdReclaimableMetric.Resource + allocatableMilliCPU = prodReclaimable.Cpu().MilliValue() + allocatableMemory = prodReclaimable.Memory().Value() + prodReclaimableCPU = allocatableMilliCPU + prodReclaimableMemory = prodReclaimable.Memory().String() } + + nodeAllocatable := node.Status.Allocatable + defaultStrategy := sloconfig.DefaultColocationStrategy() + cpuThresholdRatio := getPercentFromStrategy(strategy, &defaultStrategy, MidCPUThreshold) if maxMilliCPU := float64(nodeAllocatable.Cpu().MilliValue()) * cpuThresholdRatio; allocatableMilliCPU > int64(maxMilliCPU) { allocatableMilliCPU = int64(maxMilliCPU) } @@ -147,10 +189,7 @@ func (p *Plugin) calculate(strategy *configuration.ColocationStrategy, node *cor } cpuInMilliCores := resource.NewQuantity(allocatableMilliCPU, resource.DecimalSI) - memThresholdRatio := 1.0 - if strategy != nil && strategy.MidMemoryThresholdPercent != nil { - memThresholdRatio = float64(*strategy.MidMemoryThresholdPercent) / 100 - } + memThresholdRatio := getPercentFromStrategy(strategy, &defaultStrategy, MidMemoryThreshold) if maxMemory := float64(nodeAllocatable.Memory().Value()) * memThresholdRatio; allocatableMemory > int64(maxMemory) { allocatableMemory = int64(maxMemory) } @@ -161,6 +200,17 @@ func (p *Plugin) calculate(strategy *configuration.ColocationStrategy, node *cor } memory := resource.NewQuantity(allocatableMemory, resource.BinarySI) + // add unallocated + unallocated := p.getUnallocated(node, podList) + // CPU need turn into milli value + unallocatedCPU, unallocatedMemory := resource.NewQuantity(unallocated.Cpu().MilliValue(), resource.DecimalSI), unallocated.Memory() + midUnallocatedRatio := getPercentFromStrategy(strategy, &defaultStrategy, MidUnallocatedPercent) + adjustedUnallocatedCPU := resource.NewQuantity(int64(float64(unallocatedCPU.Value())*midUnallocatedRatio), resource.DecimalSI) + adjustedUnallocatedMemory := resource.NewQuantity(int64(float64(unallocatedMemory.Value())*midUnallocatedRatio), resource.BinarySI) + + cpuInMilliCores.Add(*adjustedUnallocatedCPU) + memory.Add(*adjustedUnallocatedMemory) + metrics.RecordNodeExtendedResourceAllocatableInternal(node, string(extension.MidCPU), metrics.UnitInteger, float64(cpuInMilliCores.MilliValue())/1000) metrics.RecordNodeExtendedResourceAllocatableInternal(node, string(extension.MidMemory), metrics.UnitByte, float64(memory.Value())) klog.V(6).Infof("calculated mid allocatable for node %s, cpu(milli-core) %v, memory(byte) %v", @@ -170,14 +220,14 @@ func (p *Plugin) calculate(strategy *configuration.ColocationStrategy, node *cor { Name: extension.MidCPU, Quantity: cpuInMilliCores, // in milli-cores - Message: fmt.Sprintf("midAllocatable[CPU(milli-core)]:%v = min(nodeAllocatable:%v * thresholdRatio:%v, ProdReclaimable:%v)", - cpuInMilliCores.Value(), nodeAllocatable.Cpu().MilliValue(), cpuThresholdRatio, prodReclaimable.Cpu().MilliValue()), + Message: fmt.Sprintf("midAllocatable[CPU(milli-core)]:%v = min(nodeAllocatable:%v * thresholdRatio:%v, ProdReclaimable:%v) + Unallocated:%v * midUnallocatedRatio:%v", + cpuInMilliCores.Value(), nodeAllocatable.Cpu().MilliValue(), cpuThresholdRatio, prodReclaimableCPU, unallocatedCPU.Value(), midUnallocatedRatio), }, { Name: extension.MidMemory, Quantity: memory, - Message: fmt.Sprintf("midAllocatable[Memory(byte)]:%s = min(nodeAllocatable:%s * thresholdRatio:%v, ProdReclaimable:%s)", - memory.String(), nodeAllocatable.Memory().String(), memThresholdRatio, prodReclaimable.Memory().String()), + Message: fmt.Sprintf("midAllocatable[Memory(byte)]:%s = min(nodeAllocatable:%s * thresholdRatio:%v, ProdReclaimable:%s) + Unallocated:%v * midUnallocatedRatio:%v", + memory.String(), nodeAllocatable.Memory().String(), memThresholdRatio, prodReclaimableMemory, unallocatedMemory.String(), midUnallocatedRatio), }, } } @@ -196,3 +246,26 @@ func prepareNodeForResource(node *corev1.Node, nr *framework.NodeResource, name node.Status.Allocatable[name] = *q } } + +func getPercentFromStrategy(strategy, defaultStrategy *configuration.ColocationStrategy, strategyType string) float64 { + switch strategyType { + case MidCPUThreshold: + if strategy == nil || strategy.MidCPUThresholdPercent == nil { + return float64(*defaultStrategy.MidCPUThresholdPercent) / 100 + } + return float64(*strategy.MidCPUThresholdPercent) / 100 + case MidMemoryThreshold: + if strategy == nil || strategy.MidMemoryThresholdPercent == nil { + return float64(*defaultStrategy.MidMemoryThresholdPercent) / 100 + } + return float64(*strategy.MidMemoryThresholdPercent) / 100 + case MidUnallocatedPercent: + if strategy == nil || strategy.MidUnallocatedPercent == nil { + return float64(*defaultStrategy.MidUnallocatedPercent) / 100 + } + return float64(*strategy.MidUnallocatedPercent) / 100 + default: + // fixme: what about default? + return -1 + } +} diff --git a/pkg/slo-controller/noderesource/plugins/midresource/plugin_test.go b/pkg/slo-controller/noderesource/plugins/midresource/plugin_test.go index 12942501e8..f9b7c0be23 100644 --- a/pkg/slo-controller/noderesource/plugins/midresource/plugin_test.go +++ b/pkg/slo-controller/noderesource/plugins/midresource/plugin_test.go @@ -269,9 +269,18 @@ func TestPluginCalculate(t *testing.T) { }, } testCPUQuant := resource.MustParse("10000") + testCPUQuant2 := resource.MustParse("18000") + testCPUQuant3 := resource.MustParse("0") + + //NOTE: if not call String, cpu String will be diff _ = testCPUQuant.String() + _ = testCPUQuant2.String() + _ = testCPUQuant3.String() testMemoryQuant := resource.MustParse("15Gi") - testMemoryQuant2 := resource.MustParse("30Gi") + testMemoryQuant2 := resource.MustParse("46Gi") + testMemoryQuant3 := resource.MustParse("0Gi") + _ = testMemoryQuant3.String() + type args struct { strategy *configuration.ColocationStrategy node *corev1.Node @@ -425,12 +434,12 @@ func TestPluginCalculate(t *testing.T) { want: []framework.ResourceItem{ { Name: extension.MidCPU, - Message: "midAllocatable[CPU(milli-core)]:10000 = min(nodeAllocatable:100000 * thresholdRatio:1, ProdReclaimable:10000)", + Message: "midAllocatable[CPU(milli-core)]:10000 = min(nodeAllocatable:100000 * thresholdRatio:1, ProdReclaimable:10000) + Unallocated:80000 * midUnallocatedRatio:0", Quantity: &testCPUQuant, }, { Name: extension.MidMemory, - Message: "midAllocatable[Memory(byte)]:15Gi = min(nodeAllocatable:200Gi * thresholdRatio:1, ProdReclaimable:15Gi)", + Message: "midAllocatable[Memory(byte)]:15Gi = min(nodeAllocatable:200Gi * thresholdRatio:1, ProdReclaimable:15Gi) + Unallocated:160Gi * midUnallocatedRatio:0", Quantity: &testMemoryQuant, }, }, @@ -444,6 +453,7 @@ func TestPluginCalculate(t *testing.T) { DegradeTimeMinutes: pointer.Int64(10), MidCPUThresholdPercent: pointer.Int64(10), MidMemoryThresholdPercent: pointer.Int64(20), + MidUnallocatedPercent: pointer.Int64(10), }, node: testNode, podList: &corev1.PodList{ @@ -504,17 +514,87 @@ func TestPluginCalculate(t *testing.T) { want: []framework.ResourceItem{ { Name: extension.MidCPU, - Message: "midAllocatable[CPU(milli-core)]:10000 = min(nodeAllocatable:100000 * thresholdRatio:0.1, ProdReclaimable:15000)", - Quantity: &testCPUQuant, + Message: "midAllocatable[CPU(milli-core)]:18000 = min(nodeAllocatable:100000 * thresholdRatio:0.1, ProdReclaimable:15000) + Unallocated:80000 * midUnallocatedRatio:0.1", + Quantity: &testCPUQuant2, }, { Name: extension.MidMemory, - Message: "midAllocatable[Memory(byte)]:30Gi = min(nodeAllocatable:200Gi * thresholdRatio:0.2, ProdReclaimable:30Gi)", + Message: "midAllocatable[Memory(byte)]:46Gi = min(nodeAllocatable:200Gi * thresholdRatio:0.2, ProdReclaimable:30Gi) + Unallocated:160Gi * midUnallocatedRatio:0.1", Quantity: &testMemoryQuant2, }, }, wantErr: false, }, + { + name: "calculate correctly when prod reclaimable is nil", + args: args{ + strategy: &configuration.ColocationStrategy{ + Enable: pointer.Bool(true), + DegradeTimeMinutes: pointer.Int64(10), + }, + node: testNode, + podList: &corev1.PodList{ + Items: []corev1.Pod{ + *testProdLSPod, + *testBatchBEPod, + }, + }, + metrics: &framework.ResourceMetrics{ + NodeMetric: &slov1alpha1.NodeMetric{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node", + }, + Status: slov1alpha1.NodeMetricStatus{ + UpdateTime: &metav1.Time{Time: time.Now().Add(-20 * time.Second)}, + NodeMetric: &slov1alpha1.NodeMetricInfo{ + NodeUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("30"), + corev1.ResourceMemory: resource.MustParse("50Gi"), + }, + }, + }, + PodsMetric: []*slov1alpha1.PodMetricInfo{ + { + Name: testProdLSPod.Name, + Namespace: testProdLSPod.Namespace, + PodUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("5"), + corev1.ResourceMemory: resource.MustParse("10Gi"), + }, + }, + }, + { + Name: testBatchBEPod.Name, + Namespace: testBatchBEPod.Namespace, + PodUsage: slov1alpha1.ResourceMap{ + ResourceList: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("15"), + corev1.ResourceMemory: resource.MustParse("30Gi"), + }, + }, + }, + }, + ProdReclaimableMetric: &slov1alpha1.ReclaimableMetric{}, + }, + }, + }, + }, + want: []framework.ResourceItem{ + { + Name: extension.MidCPU, + Message: "midAllocatable[CPU(milli-core)]:0 = min(nodeAllocatable:100000 * thresholdRatio:1, ProdReclaimable:0) + Unallocated:80000 * midUnallocatedRatio:0", + Quantity: &testCPUQuant3, + }, + { + Name: extension.MidMemory, + Message: "midAllocatable[Memory(byte)]:0 = min(nodeAllocatable:200Gi * thresholdRatio:1, ProdReclaimable:0) + Unallocated:160Gi * midUnallocatedRatio:0", + Quantity: &testMemoryQuant3, + }, + }, + wantErr: false, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -649,7 +729,7 @@ func TestPlugin_isDegradeNeeded(t *testing.T) { Status: corev1.NodeStatus{}, }, }, - want: true, + want: false, }, { name: "valid NodeMetric status should not degrade", diff --git a/pkg/slo-controller/noderesource/resource_calculator_test.go b/pkg/slo-controller/noderesource/resource_calculator_test.go index b32f4b5dad..3d20ec9f85 100644 --- a/pkg/slo-controller/noderesource/resource_calculator_test.go +++ b/pkg/slo-controller/noderesource/resource_calculator_test.go @@ -188,14 +188,14 @@ func Test_calculateNodeResource(t *testing.T) { Message: "batchAllocatable[Mem(GB)]:6 = nodeCapacity:40 - nodeSafetyMargin:14 - systemUsage:0 - podHPUsed:20", }, { - Name: extension.MidCPU, - Reset: true, - Message: "degrade node Mid resource because of abnormal nodeMetric, reason: degradedByMidResource", + Name: extension.MidCPU, + Quantity: resource.NewQuantity(0, resource.DecimalSI), + Message: "midAllocatable[CPU(milli-core)]:0 = min(nodeAllocatable:20000 * thresholdRatio:1, ProdReclaimable:0) + Unallocated:0 * midUnallocatedRatio:0", }, { - Name: extension.MidMemory, - Reset: true, - Message: "degrade node Mid resource because of abnormal nodeMetric, reason: degradedByMidResource", + Name: extension.MidMemory, + Quantity: resource.NewQuantity(0, resource.BinarySI), + Message: "midAllocatable[Memory(byte)]:0 = min(nodeAllocatable:40G * thresholdRatio:1, ProdReclaimable:0) + Unallocated:20G * midUnallocatedRatio:0", }, }...), }, @@ -395,14 +395,14 @@ func Test_calculateNodeResource(t *testing.T) { Message: "batchAllocatable[Mem(GB)]:33 = nodeCapacity:120 - nodeSafetyMargin:42 - systemUsage:12 - podHPUsed:33", }, { - Name: extension.MidCPU, - Reset: true, - Message: "degrade node Mid resource because of abnormal nodeMetric, reason: degradedByMidResource", + Name: extension.MidCPU, + Quantity: resource.NewQuantity(0, resource.DecimalSI), + Message: "midAllocatable[CPU(milli-core)]:0 = min(nodeAllocatable:100000 * thresholdRatio:1, ProdReclaimable:0) + Unallocated:60000 * midUnallocatedRatio:0", }, { - Name: extension.MidMemory, - Reset: true, - Message: "degrade node Mid resource because of abnormal nodeMetric, reason: degradedByMidResource", + Name: extension.MidMemory, + Quantity: resource.NewQuantity(0, resource.BinarySI), + Message: "midAllocatable[Memory(byte)]:0 = min(nodeAllocatable:120G * thresholdRatio:1, ProdReclaimable:0) + Unallocated:60G * midUnallocatedRatio:0", }, }...), }, @@ -605,14 +605,14 @@ func Test_calculateNodeResource(t *testing.T) { Message: "batchAllocatable[Mem(GB)]:39 = nodeCapacity:120 - nodeSafetyMargin:36 - systemUsage:12 - podHPUsed:33", }, { - Name: extension.MidCPU, - Reset: true, - Message: "degrade node Mid resource because of abnormal nodeMetric, reason: degradedByMidResource", + Name: extension.MidCPU, + Quantity: resource.NewQuantity(0, resource.DecimalSI), + Message: "midAllocatable[CPU(milli-core)]:0 = min(nodeAllocatable:100000 * thresholdRatio:1, ProdReclaimable:0) + Unallocated:60000 * midUnallocatedRatio:0", }, { - Name: extension.MidMemory, - Reset: true, - Message: "degrade node Mid resource because of abnormal nodeMetric, reason: degradedByMidResource", + Name: extension.MidMemory, + Quantity: resource.NewQuantity(0, resource.BinarySI), + Message: "midAllocatable[Memory(byte)]:0 = min(nodeAllocatable:120G * thresholdRatio:1, ProdReclaimable:0) + Unallocated:60G * midUnallocatedRatio:0", }, }...), }, @@ -815,14 +815,14 @@ func Test_calculateNodeResource(t *testing.T) { Message: "batchAllocatable[Mem(GB)]:36 = nodeCapacity:120 - nodeSafetyMargin:24 - nodeReserved:0 - podHPRequest:60", }, { - Name: extension.MidCPU, - Reset: true, - Message: "degrade node Mid resource because of abnormal nodeMetric, reason: degradedByMidResource", + Name: extension.MidCPU, + Quantity: resource.NewQuantity(0, resource.DecimalSI), + Message: "midAllocatable[CPU(milli-core)]:0 = min(nodeAllocatable:100000 * thresholdRatio:1, ProdReclaimable:0) + Unallocated:60000 * midUnallocatedRatio:0", }, { - Name: extension.MidMemory, - Reset: true, - Message: "degrade node Mid resource because of abnormal nodeMetric, reason: degradedByMidResource", + Name: extension.MidMemory, + Quantity: resource.NewQuantity(0, resource.BinarySI), + Message: "midAllocatable[Memory(byte)]:0 = min(nodeAllocatable:120G * thresholdRatio:1, ProdReclaimable:0) + Unallocated:60G * midUnallocatedRatio:0", }, }...), }, @@ -1032,12 +1032,12 @@ func Test_calculateNodeResource(t *testing.T) { { Name: extension.MidCPU, Quantity: resource.NewQuantity(10000, resource.DecimalSI), - Message: "midAllocatable[CPU(milli-core)]:10000 = min(nodeAllocatable:100000 * thresholdRatio:1, ProdReclaimable:10000)", + Message: "midAllocatable[CPU(milli-core)]:10000 = min(nodeAllocatable:100000 * thresholdRatio:1, ProdReclaimable:10000) + Unallocated:60000 * midUnallocatedRatio:0", }, { Name: extension.MidMemory, Quantity: resource.NewQuantity(20000000000, resource.BinarySI), - Message: "midAllocatable[Memory(byte)]:19531250Ki = min(nodeAllocatable:120G * thresholdRatio:1, ProdReclaimable:20G)", + Message: "midAllocatable[Memory(byte)]:19531250Ki = min(nodeAllocatable:120G * thresholdRatio:1, ProdReclaimable:20G) + Unallocated:60G * midUnallocatedRatio:0", }, }...), }, diff --git a/pkg/util/sloconfig/colocation_config.go b/pkg/util/sloconfig/colocation_config.go index 924793383a..9bca5025c0 100644 --- a/pkg/util/sloconfig/colocation_config.go +++ b/pkg/util/sloconfig/colocation_config.go @@ -68,6 +68,9 @@ func DefaultColocationStrategy() configuration.ColocationStrategy { DegradeTimeMinutes: pointer.Int64(15), UpdateTimeThresholdSeconds: pointer.Int64(300), ResourceDiffThreshold: pointer.Float64(0.1), + MidCPUThresholdPercent: pointer.Int64(100), + MidMemoryThresholdPercent: pointer.Int64(100), + MidUnallocatedPercent: pointer.Int64(0), } cfg.ColocationStrategyExtender = defaultColocationStrategyExtender return cfg @@ -82,7 +85,10 @@ func IsColocationStrategyValid(strategy *configuration.ColocationStrategy) bool (strategy.DegradeTimeMinutes == nil || *strategy.DegradeTimeMinutes > 0) && (strategy.UpdateTimeThresholdSeconds == nil || *strategy.UpdateTimeThresholdSeconds > 0) && (strategy.ResourceDiffThreshold == nil || *strategy.ResourceDiffThreshold > 0) && - (strategy.MetricMemoryCollectPolicy == nil || len(*strategy.MetricMemoryCollectPolicy) > 0) + (strategy.MetricMemoryCollectPolicy == nil || len(*strategy.MetricMemoryCollectPolicy) > 0) && + (strategy.MidCPUThresholdPercent == nil || (*strategy.MidCPUThresholdPercent >= 0 && *strategy.MidCPUThresholdPercent <= 100)) && + (strategy.MidMemoryThresholdPercent == nil || (*strategy.MidMemoryThresholdPercent >= 0 && *strategy.MidMemoryThresholdPercent <= 100)) && + (strategy.MidUnallocatedPercent == nil || (*strategy.MidUnallocatedPercent >= 0 && *strategy.MidUnallocatedPercent <= 100)) } func IsNodeColocationCfgValid(nodeCfg *configuration.NodeColocationCfg) bool { diff --git a/pkg/util/sloconfig/colocation_config_extender_test.go b/pkg/util/sloconfig/colocation_config_extender_test.go index 8d1027253a..092f5645d4 100644 --- a/pkg/util/sloconfig/colocation_config_extender_test.go +++ b/pkg/util/sloconfig/colocation_config_extender_test.go @@ -43,7 +43,7 @@ func Test_registerDefaultColocationExtension(t *testing.T) { configBytes, fmtErr := json.Marshal(defautlColocationCfg) configStr := string(configBytes) - expectStr := `{"enable":false,"metricAggregateDurationSeconds":300,"metricReportIntervalSeconds":60,"metricAggregatePolicy":{"durations":["5m0s","10m0s","30m0s"]},"metricMemoryCollectPolicy":"usageWithoutPageCache","cpuReclaimThresholdPercent":60,"cpuCalculatePolicy":"usage","memoryReclaimThresholdPercent":65,"memoryCalculatePolicy":"usage","degradeTimeMinutes":15,"updateTimeThresholdSeconds":300,"resourceDiffThreshold":0.1,"extensions":{"test-ext-key":{"testBoolVal":true}}}` + expectStr := `{"enable":false,"metricAggregateDurationSeconds":300,"metricReportIntervalSeconds":60,"metricAggregatePolicy":{"durations":["5m0s","10m0s","30m0s"]},"metricMemoryCollectPolicy":"usageWithoutPageCache","cpuReclaimThresholdPercent":60,"cpuCalculatePolicy":"usage","memoryReclaimThresholdPercent":65,"memoryCalculatePolicy":"usage","degradeTimeMinutes":15,"updateTimeThresholdSeconds":300,"resourceDiffThreshold":0.1,"midCPUThresholdPercent":100,"midMemoryThresholdPercent":100,"midUnallocatedPercent":0,"extensions":{"test-ext-key":{"testBoolVal":true}}}` assert.Equal(t, expectStr, configStr, "config json") assert.NoError(t, fmtErr, "default colocation config marshall") diff --git a/pkg/util/sloconfig/colocation_config_test.go b/pkg/util/sloconfig/colocation_config_test.go index 1cc0a59c18..5d01e66754 100644 --- a/pkg/util/sloconfig/colocation_config_test.go +++ b/pkg/util/sloconfig/colocation_config_test.go @@ -198,6 +198,9 @@ func Test_GetNodeColocationStrategy(t *testing.T) { UpdateTimeThresholdSeconds: pointer.Int64(300), ResourceDiffThreshold: pointer.Float64(0.1), MetricMemoryCollectPolicy: &defaultMemoryCollectPolicy, + MidCPUThresholdPercent: pointer.Int64(100), + MidMemoryThresholdPercent: pointer.Int64(100), + MidUnallocatedPercent: pointer.Int64(0), }, }, { @@ -614,6 +617,96 @@ func Test_IsColocationStrategyValid(t *testing.T) { }, want: true, }, + { + name: "midCPUThresholdPercent less than 0 strategy is invalid", + args: args{ + strategy: &configuration.ColocationStrategy{ + Enable: pointer.Bool(true), + CPUReclaimThresholdPercent: pointer.Int64(65), + MemoryReclaimThresholdPercent: pointer.Int64(65), + DegradeTimeMinutes: pointer.Int64(15), + UpdateTimeThresholdSeconds: pointer.Int64(300), + ResourceDiffThreshold: pointer.Float64(0.1), + MidCPUThresholdPercent: pointer.Int64(-1), + }, + }, + want: false, + }, + { + name: "midCPUThresholdPercent more than 100 strategy is invalid", + args: args{ + strategy: &configuration.ColocationStrategy{ + Enable: pointer.Bool(true), + CPUReclaimThresholdPercent: pointer.Int64(65), + MemoryReclaimThresholdPercent: pointer.Int64(65), + DegradeTimeMinutes: pointer.Int64(15), + UpdateTimeThresholdSeconds: pointer.Int64(300), + ResourceDiffThreshold: pointer.Float64(0.1), + MidCPUThresholdPercent: pointer.Int64(150), + }, + }, + want: false, + }, + { + name: "midMemoryThresholdPercent less than 0 strategy is invalid", + args: args{ + strategy: &configuration.ColocationStrategy{ + Enable: pointer.Bool(true), + CPUReclaimThresholdPercent: pointer.Int64(65), + MemoryReclaimThresholdPercent: pointer.Int64(65), + DegradeTimeMinutes: pointer.Int64(15), + UpdateTimeThresholdSeconds: pointer.Int64(300), + ResourceDiffThreshold: pointer.Float64(0.1), + MidMemoryThresholdPercent: pointer.Int64(-20), + }, + }, + want: false, + }, + { + name: "midMemoryThresholdPercent more than 100 strategy is invalid", + args: args{ + strategy: &configuration.ColocationStrategy{ + Enable: pointer.Bool(true), + CPUReclaimThresholdPercent: pointer.Int64(65), + MemoryReclaimThresholdPercent: pointer.Int64(65), + DegradeTimeMinutes: pointer.Int64(15), + UpdateTimeThresholdSeconds: pointer.Int64(300), + ResourceDiffThreshold: pointer.Float64(0.1), + MidMemoryThresholdPercent: pointer.Int64(101), + }, + }, + want: false, + }, + { + name: "midUnallocatedPercent less than 0 strategy is invalid", + args: args{ + strategy: &configuration.ColocationStrategy{ + Enable: pointer.Bool(true), + CPUReclaimThresholdPercent: pointer.Int64(65), + MemoryReclaimThresholdPercent: pointer.Int64(65), + DegradeTimeMinutes: pointer.Int64(15), + UpdateTimeThresholdSeconds: pointer.Int64(300), + ResourceDiffThreshold: pointer.Float64(0.1), + MidUnallocatedPercent: pointer.Int64(-10), + }, + }, + want: false, + }, + { + name: "midUnallocatedPercent more than 100 strategy is invalid", + args: args{ + strategy: &configuration.ColocationStrategy{ + Enable: pointer.Bool(true), + CPUReclaimThresholdPercent: pointer.Int64(65), + MemoryReclaimThresholdPercent: pointer.Int64(65), + DegradeTimeMinutes: pointer.Int64(15), + UpdateTimeThresholdSeconds: pointer.Int64(300), + ResourceDiffThreshold: pointer.Float64(0.1), + MidUnallocatedPercent: pointer.Int64(200), + }, + }, + want: false, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) {