Skip to content

Commit

Permalink
koord-manager: add unallocated resource into mid resource.
Browse files Browse the repository at this point in the history
Signed-off-by: wangyang60 <[email protected]>
  • Loading branch information
tan90github committed Sep 2, 2024
1 parent d9c40a5 commit 2bb3466
Show file tree
Hide file tree
Showing 9 changed files with 350 additions and 62 deletions.
9 changes: 5 additions & 4 deletions apis/configuration/slo_controller_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,12 +247,13 @@ type ColocationStrategy struct {
UpdateTimeThresholdSeconds *int64 `json:"updateTimeThresholdSeconds,omitempty" validate:"omitempty,min=1"`
ResourceDiffThreshold *float64 `json:"resourceDiffThreshold,omitempty" validate:"omitempty,gt=0,max=1"`

// MidCPUThresholdPercent defines the maximum percentage of the Mid-tier cpu resource dividing the node allocatable.
// MidCPUAllocatable <= NodeCPUAllocatable * MidCPUThresholdPercent / 100.
// AllocatableCPU[Mid]' := min(Reclaimable[Mid], NodeAllocatable * MidCPUThresholdPercent) + Unallocated[Mid] * midUnallocatedRatio.
MidCPUThresholdPercent *int64 `json:"midCPUThresholdPercent,omitempty" validate:"omitempty,min=0,max=100"`
// MidMemoryThresholdPercent defines the maximum percentage of the Mid-tier memory resource dividing the node allocatable.
// MidMemoryAllocatable <= NodeMemoryAllocatable * MidMemoryThresholdPercent / 100.
// AllocatableMemory[Mid]' := min(Reclaimable[Mid], NodeAllocatable * MidMemoryThresholdPercent) + Unallocated[Mid] * midUnallocatedRatio.
MidMemoryThresholdPercent *int64 `json:"midMemoryThresholdPercent,omitempty" validate:"omitempty,min=0,max=100"`
// MidUnallocatedPercent defines the percentage of unallocated resources in the Mid-tier allocable resources.
// Allocatable[Mid]' := min(Reclaimable[Mid], NodeAllocatable * thresholdRatio) + Unallocated[Mid] * midUnallocatedRatio.
MidUnallocatedPercent *int64 `json:"midUnallocatedPercent,omitempty" validate:"omitempty,min=0,max=100"`

ColocationStrategyExtender `json:",inline"` // for third-party extension
}
Expand Down
5 changes: 5 additions & 0 deletions apis/configuration/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 30 additions & 0 deletions pkg/slo-controller/config/colocation_cm_event_handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,9 @@ func Test_syncColocationConfigIfChanged(t *testing.T) {
UpdateTimeThresholdSeconds: pointer.Int64(100),
ResourceDiffThreshold: pointer.Float64(0.1),
MetricMemoryCollectPolicy: &defaultNodeMemoryCollectPolicy,
MidCPUThresholdPercent: pointer.Int64(100),
MidMemoryThresholdPercent: pointer.Int64(100),
MidUnallocatedPercent: pointer.Int64(0),
},
},
available: true,
Expand Down Expand Up @@ -299,6 +302,9 @@ func Test_syncColocationConfigIfChanged(t *testing.T) {
UpdateTimeThresholdSeconds: pointer.Int64(300),
ResourceDiffThreshold: pointer.Float64(0.1),
MetricMemoryCollectPolicy: &defaultNodeMemoryCollectPolicy,
MidCPUThresholdPercent: pointer.Int64(100),
MidMemoryThresholdPercent: pointer.Int64(100),
MidUnallocatedPercent: pointer.Int64(0),
},
NodeConfigs: []configuration.NodeColocationCfg{
{
Expand All @@ -322,6 +328,9 @@ func Test_syncColocationConfigIfChanged(t *testing.T) {
UpdateTimeThresholdSeconds: pointer.Int64(300),
ResourceDiffThreshold: pointer.Float64(0.1),
MetricMemoryCollectPolicy: &defaultNodeMemoryCollectPolicy,
MidCPUThresholdPercent: pointer.Int64(100),
MidMemoryThresholdPercent: pointer.Int64(100),
MidUnallocatedPercent: pointer.Int64(0),
},
},
},
Expand Down Expand Up @@ -367,6 +376,9 @@ func Test_syncColocationConfigIfChanged(t *testing.T) {
UpdateTimeThresholdSeconds: pointer.Int64(300),
ResourceDiffThreshold: pointer.Float64(0.1),
MetricMemoryCollectPolicy: &defaultNodeMemoryCollectPolicy,
MidCPUThresholdPercent: pointer.Int64(100),
MidMemoryThresholdPercent: pointer.Int64(100),
MidUnallocatedPercent: pointer.Int64(0),
},
},
available: true,
Expand All @@ -390,6 +402,9 @@ func Test_syncColocationConfigIfChanged(t *testing.T) {
UpdateTimeThresholdSeconds: pointer.Int64(300),
ResourceDiffThreshold: pointer.Float64(0.1),
MetricMemoryCollectPolicy: &defaultNodeMemoryCollectPolicy,
MidCPUThresholdPercent: pointer.Int64(100),
MidMemoryThresholdPercent: pointer.Int64(100),
MidUnallocatedPercent: pointer.Int64(0),
},
},
available: true,
Expand Down Expand Up @@ -425,6 +440,9 @@ func Test_syncColocationConfigIfChanged(t *testing.T) {
UpdateTimeThresholdSeconds: pointer.Int64(300),
ResourceDiffThreshold: pointer.Float64(0.1),
MetricMemoryCollectPolicy: &defaultNodeMemoryCollectPolicy,
MidCPUThresholdPercent: pointer.Int64(100),
MidMemoryThresholdPercent: pointer.Int64(100),
MidUnallocatedPercent: pointer.Int64(0),
},
},
available: true,
Expand Down Expand Up @@ -496,6 +514,7 @@ func Test_syncColocationConfigIfChanged(t *testing.T) {
"resourceDiffThreshold": 0.1,
"midCPUThresholdPercent": 45,
"midMemoryThresholdPercent": 65,
"midUnallocatedPercent": 50,
"nodeConfigs": [{
"nodeSelector": {
"matchLabels": {
Expand Down Expand Up @@ -526,6 +545,7 @@ func Test_syncColocationConfigIfChanged(t *testing.T) {
MidCPUThresholdPercent: pointer.Int64(45),
MidMemoryThresholdPercent: pointer.Int64(65),
MetricMemoryCollectPolicy: &defaultNodeMemoryCollectPolicy,
MidUnallocatedPercent: pointer.Int64(50),
},
NodeConfigs: []configuration.NodeColocationCfg{
{
Expand All @@ -551,6 +571,7 @@ func Test_syncColocationConfigIfChanged(t *testing.T) {
MidCPUThresholdPercent: pointer.Int64(45),
MidMemoryThresholdPercent: pointer.Int64(65),
MetricMemoryCollectPolicy: &defaultNodeMemoryCollectPolicy,
MidUnallocatedPercent: pointer.Int64(50),
},
},
},
Expand Down Expand Up @@ -628,6 +649,9 @@ func Test_syncColocationConfigIfChanged(t *testing.T) {
UpdateTimeThresholdSeconds: pointer.Int64(300),
ResourceDiffThreshold: pointer.Float64(0.1),
MetricMemoryCollectPolicy: &defaultNodeMemoryCollectPolicy,
MidCPUThresholdPercent: pointer.Int64(100),
MidMemoryThresholdPercent: pointer.Int64(100),
MidUnallocatedPercent: pointer.Int64(0),
},
NodeConfigs: []configuration.NodeColocationCfg{
{
Expand All @@ -650,6 +674,9 @@ func Test_syncColocationConfigIfChanged(t *testing.T) {
UpdateTimeThresholdSeconds: pointer.Int64(300),
ResourceDiffThreshold: pointer.Float64(0.1),
MetricMemoryCollectPolicy: &defaultNodeMemoryCollectPolicy,
MidCPUThresholdPercent: pointer.Int64(100),
MidMemoryThresholdPercent: pointer.Int64(100),
MidUnallocatedPercent: pointer.Int64(0),
//change
CPUReclaimThresholdPercent: pointer.Int64(60),
CPUCalculatePolicy: &cpuCalcPolicyNew,
Expand Down Expand Up @@ -758,6 +785,9 @@ func Test_IsCfgAvailable(t *testing.T) {
ResourceDiffThreshold: pointer.Float64(0.1),
MetricReportIntervalSeconds: pointer.Int64(60),
MetricMemoryCollectPolicy: &defaultNodeMemoryCollectPolicy,
MidCPUThresholdPercent: pointer.Int64(100),
MidMemoryThresholdPercent: pointer.Int64(100),
MidUnallocatedPercent: pointer.Int64(0),
},
},
},
Expand Down
119 changes: 96 additions & 23 deletions pkg/slo-controller/noderesource/plugins/midresource/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
quotav1 "k8s.io/apiserver/pkg/quota/v1"
"k8s.io/klog/v2"
"k8s.io/utils/clock"

Expand All @@ -31,10 +32,17 @@ import (
"github.com/koordinator-sh/koordinator/pkg/slo-controller/metrics"
"github.com/koordinator-sh/koordinator/pkg/slo-controller/noderesource/framework"
"github.com/koordinator-sh/koordinator/pkg/util"
"github.com/koordinator-sh/koordinator/pkg/util/sloconfig"
)

const PluginName = "MidResource"

const (
MidCPUThreshold = "midCPUThreshold"
MidMemoryThreshold = "midMemoryThreshold"
MidUnallocatedPercent = "midUnallocatedPercent"
)

// ResourceNames defines the Mid-tier extended resource names to update.
var ResourceNames = []corev1.ResourceName{extension.MidCPU, extension.MidMemory}

Expand Down Expand Up @@ -104,39 +112,73 @@ func (p *Plugin) isDegradeNeeded(strategy *configuration.ColocationStrategy, nod
return true
}

if nodeMetric.Status.ProdReclaimableMetric == nil ||
nodeMetric.Status.ProdReclaimableMetric.Resource.ResourceList == nil {
klog.V(4).Infof("need degradation for Mid-tier, err: nodeMetric %v has no valid prod reclaimable: %v",
nodeMetric.Name, nodeMetric.Status.ProdReclaimableMetric)
return true
}

now := clk.Now()
if now.After(nodeMetric.Status.UpdateTime.Add(time.Duration(*strategy.DegradeTimeMinutes) * time.Minute)) {
klog.V(4).Infof("need degradation for Mid-tier, err: timeout nodeMetric: %v, current timestamp: %v,"+
" metric last update timestamp: %v", nodeMetric.Name, now, nodeMetric.Status.UpdateTime)
return true
}

if nodeMetric.Status.ProdReclaimableMetric == nil ||
nodeMetric.Status.ProdReclaimableMetric.Resource.ResourceList == nil {
klog.V(4).Infof("need degradation for Mid-tier, err: nodeMetric %v has no valid prod reclaimable, set it to zero: %v",
nodeMetric.Name, nodeMetric.Status.ProdReclaimableMetric)
return false
}

return false
}

func (p *Plugin) degradeCalculate(node *corev1.Node, message string) []framework.ResourceItem {
return p.Reset(node, message)
}

// Unallocated[Mid] = max(NodeAllocatable - Allocated[Prod], 0)
func (p *Plugin) getUnallocated(node *corev1.Node, podList *corev1.PodList) corev1.ResourceList {
allocated := corev1.ResourceList{}
for i := range podList.Items {
pod := &podList.Items[i]
priorityClass := extension.GetPodPriorityClassWithDefault(pod)
// If the pod is not marked as low priority, it is considered high priority
isHighPriority := priorityClass != extension.PriorityMid && priorityClass != extension.PriorityBatch && priorityClass != extension.PriorityFree
if !isHighPriority {
continue
}

if pod.Status.Phase != corev1.PodRunning && pod.Status.Phase != corev1.PodPending {
continue
}
podRequest := util.GetPodRequest(pod, corev1.ResourceCPU, corev1.ResourceMemory)
allocated = quotav1.Add(allocated, podRequest)
}

return quotav1.SubtractWithNonNegativeResult(node.Status.Allocatable, allocated)
}

func (p *Plugin) calculate(strategy *configuration.ColocationStrategy, node *corev1.Node, podList *corev1.PodList,
resourceMetrics *framework.ResourceMetrics) []framework.ResourceItem {
// MidAllocatable := min(NodeAllocatable * thresholdRatio, ProdReclaimable)
prodReclaimable := resourceMetrics.NodeMetric.Status.ProdReclaimableMetric.Resource
allocatableMilliCPU := prodReclaimable.Cpu().MilliValue()
allocatableMemory := prodReclaimable.Memory().Value()
// Allocatable[Mid]' := min(Reclaimable[Mid], NodeAllocatable * thresholdRatio) + Unallocated[Mid] * midUnallocatedRatio
// Unallocated[Mid] = max(NodeAllocatable - Allocated[Prod], 0)

nodeAllocatable := node.Status.Allocatable
cpuThresholdRatio := 1.0
if strategy != nil && strategy.MidCPUThresholdPercent != nil {
cpuThresholdRatio = float64(*strategy.MidCPUThresholdPercent) / 100
var allocatableMilliCPU, allocatableMemory, prodReclaimableCPU int64
var prodReclaimableMemory string = "0"
prodReclaimableMetic := resourceMetrics.NodeMetric.Status.ProdReclaimableMetric

if prodReclaimableMetic == nil || prodReclaimableMetic.Resource.ResourceList == nil {
klog.V(4).Infof("no valid prod reclaimable, so use default zero value")
allocatableMilliCPU = 0
allocatableMemory = 0
} else {
prodReclaimable := resourceMetrics.NodeMetric.Status.ProdReclaimableMetric.Resource
allocatableMilliCPU = prodReclaimable.Cpu().MilliValue()
allocatableMemory = prodReclaimable.Memory().Value()
prodReclaimableCPU = allocatableMilliCPU
prodReclaimableMemory = prodReclaimable.Memory().String()
}

nodeAllocatable := node.Status.Allocatable
defaultStrategy := sloconfig.DefaultColocationStrategy()
cpuThresholdRatio := getPercentFromStrategy(strategy, &defaultStrategy, MidCPUThreshold)
if maxMilliCPU := float64(nodeAllocatable.Cpu().MilliValue()) * cpuThresholdRatio; allocatableMilliCPU > int64(maxMilliCPU) {
allocatableMilliCPU = int64(maxMilliCPU)
}
Expand All @@ -147,10 +189,7 @@ func (p *Plugin) calculate(strategy *configuration.ColocationStrategy, node *cor
}
cpuInMilliCores := resource.NewQuantity(allocatableMilliCPU, resource.DecimalSI)

memThresholdRatio := 1.0
if strategy != nil && strategy.MidMemoryThresholdPercent != nil {
memThresholdRatio = float64(*strategy.MidMemoryThresholdPercent) / 100
}
memThresholdRatio := getPercentFromStrategy(strategy, &defaultStrategy, MidMemoryThreshold)
if maxMemory := float64(nodeAllocatable.Memory().Value()) * memThresholdRatio; allocatableMemory > int64(maxMemory) {
allocatableMemory = int64(maxMemory)
}
Expand All @@ -161,6 +200,17 @@ func (p *Plugin) calculate(strategy *configuration.ColocationStrategy, node *cor
}
memory := resource.NewQuantity(allocatableMemory, resource.BinarySI)

// add unallocated
unallocated := p.getUnallocated(node, podList)
// CPU need turn into milli value
unallocatedCPU, unallocatedMemory := resource.NewQuantity(unallocated.Cpu().MilliValue(), resource.DecimalSI), unallocated.Memory()
midUnallocatedRatio := getPercentFromStrategy(strategy, &defaultStrategy, MidUnallocatedPercent)
adjustedUnallocatedCPU := resource.NewQuantity(int64(float64(unallocatedCPU.Value())*midUnallocatedRatio), resource.DecimalSI)
adjustedUnallocatedMemory := resource.NewQuantity(int64(float64(unallocatedMemory.Value())*midUnallocatedRatio), resource.BinarySI)

cpuInMilliCores.Add(*adjustedUnallocatedCPU)
memory.Add(*adjustedUnallocatedMemory)

metrics.RecordNodeExtendedResourceAllocatableInternal(node, string(extension.MidCPU), metrics.UnitInteger, float64(cpuInMilliCores.MilliValue())/1000)
metrics.RecordNodeExtendedResourceAllocatableInternal(node, string(extension.MidMemory), metrics.UnitByte, float64(memory.Value()))
klog.V(6).Infof("calculated mid allocatable for node %s, cpu(milli-core) %v, memory(byte) %v",
Expand All @@ -170,14 +220,14 @@ func (p *Plugin) calculate(strategy *configuration.ColocationStrategy, node *cor
{
Name: extension.MidCPU,
Quantity: cpuInMilliCores, // in milli-cores
Message: fmt.Sprintf("midAllocatable[CPU(milli-core)]:%v = min(nodeAllocatable:%v * thresholdRatio:%v, ProdReclaimable:%v)",
cpuInMilliCores.Value(), nodeAllocatable.Cpu().MilliValue(), cpuThresholdRatio, prodReclaimable.Cpu().MilliValue()),
Message: fmt.Sprintf("midAllocatable[CPU(milli-core)]:%v = min(nodeAllocatable:%v * thresholdRatio:%v, ProdReclaimable:%v) + Unallocated:%v * midUnallocatedRatio:%v",
cpuInMilliCores.Value(), nodeAllocatable.Cpu().MilliValue(), cpuThresholdRatio, prodReclaimableCPU, unallocatedCPU.Value(), midUnallocatedRatio),
},
{
Name: extension.MidMemory,
Quantity: memory,
Message: fmt.Sprintf("midAllocatable[Memory(byte)]:%s = min(nodeAllocatable:%s * thresholdRatio:%v, ProdReclaimable:%s)",
memory.String(), nodeAllocatable.Memory().String(), memThresholdRatio, prodReclaimable.Memory().String()),
Message: fmt.Sprintf("midAllocatable[Memory(byte)]:%s = min(nodeAllocatable:%s * thresholdRatio:%v, ProdReclaimable:%s) + Unallocated:%v * midUnallocatedRatio:%v",
memory.String(), nodeAllocatable.Memory().String(), memThresholdRatio, prodReclaimableMemory, unallocatedMemory.String(), midUnallocatedRatio),
},
}
}
Expand All @@ -196,3 +246,26 @@ func prepareNodeForResource(node *corev1.Node, nr *framework.NodeResource, name
node.Status.Allocatable[name] = *q
}
}

func getPercentFromStrategy(strategy, defaultStrategy *configuration.ColocationStrategy, strategyType string) float64 {
switch strategyType {
case MidCPUThreshold:
if strategy == nil || strategy.MidCPUThresholdPercent == nil {
return float64(*defaultStrategy.MidCPUThresholdPercent) / 100
}
return float64(*strategy.MidCPUThresholdPercent) / 100
case MidMemoryThreshold:
if strategy == nil || strategy.MidMemoryThresholdPercent == nil {
return float64(*defaultStrategy.MidMemoryThresholdPercent) / 100
}
return float64(*strategy.MidMemoryThresholdPercent) / 100
case MidUnallocatedPercent:
if strategy == nil || strategy.MidUnallocatedPercent == nil {
return float64(*defaultStrategy.MidUnallocatedPercent) / 100
}
return float64(*strategy.MidUnallocatedPercent) / 100
default:
// fixme: what about default?
return -1
}
}
Loading

0 comments on commit 2bb3466

Please sign in to comment.