Merge "Add kubernetes 1.30.6 patches"
This commit is contained in:
commit
8944bca456
@ -0,0 +1,200 @@
|
|||||||
|
From 63e8d996af35f797806db199728554ff13bc3fd9 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Saba Touheed Mujawar <sabatouheed.mujawar@windriver.com>
|
||||||
|
Date: Wed, 8 Jan 2025 07:28:12 -0500
|
||||||
|
Subject: [PATCH] kubeadm: create platform pods with zero CPU resources
|
||||||
|
|
||||||
|
This specifies zero CPU resources when creating the manifests
|
||||||
|
for the static platform pods, as a workaround for the lack of
|
||||||
|
separate resource tracking for platform resources.
|
||||||
|
|
||||||
|
This specifies zero CPU and Memory resources for the coredns
|
||||||
|
deployment. manifests.go is the main source file for this,
|
||||||
|
not sure if the coredns.yaml are used but they are updated to
|
||||||
|
be consistent.
|
||||||
|
|
||||||
|
This specifies CPU limit of 1 for kube-apiserver pod so that it is
|
||||||
|
treated as a burstable QoS. This gives a boost of cgroup CPUShares
|
||||||
|
since the burstable cgroup parent has significantly more CPUShares
|
||||||
|
than best-effort on typical systems. This improves kube-apiserver
|
||||||
|
API responsiveness.
|
||||||
|
|
||||||
|
This increases kube-apiserver Readiness probe periodSeconds to 10
|
||||||
|
based on WRS/SS joint recommendation for minimum probe settings.
|
||||||
|
This reduces likelihood of kube-apiserver probe failure and
|
||||||
|
subsequent pod-restart under servere load. This also reduces CPU
|
||||||
|
demand.
|
||||||
|
|
||||||
|
Signed-off-by: Daniel Safta <daniel.safta@windriver.com>
|
||||||
|
Signed-off-by: Saba Touheed Mujawar <sabatouheed.mujawar@windriver.com>
|
||||||
|
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||||
|
Signed-off-by: Jim Gauld <James.Gauld@windriver.com>
|
||||||
|
Signed-off-by: Saba Touheed Mujawar <sabatouheed.mujawar@windriver.com>
|
||||||
|
---
|
||||||
|
cluster/addons/dns/coredns/coredns.yaml.base | 4 ++--
|
||||||
|
cluster/addons/dns/coredns/coredns.yaml.in | 4 ++--
|
||||||
|
cluster/addons/dns/coredns/coredns.yaml.sed | 4 ++--
|
||||||
|
cmd/kubeadm/app/phases/addons/dns/dns_test.go | 8 ++++----
|
||||||
|
cmd/kubeadm/app/phases/addons/dns/manifests.go | 4 ++--
|
||||||
|
.../app/phases/controlplane/manifests.go | 10 ++++++----
|
||||||
|
cmd/kubeadm/app/util/staticpod/utils.go | 17 ++++++++++++++++-
|
||||||
|
7 files changed, 34 insertions(+), 17 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/cluster/addons/dns/coredns/coredns.yaml.base b/cluster/addons/dns/coredns/coredns.yaml.base
|
||||||
|
index 3d438dce445..41088f063eb 100644
|
||||||
|
--- a/cluster/addons/dns/coredns/coredns.yaml.base
|
||||||
|
+++ b/cluster/addons/dns/coredns/coredns.yaml.base
|
||||||
|
@@ -139,8 +139,8 @@ spec:
|
||||||
|
limits:
|
||||||
|
memory: __DNS__MEMORY__LIMIT__
|
||||||
|
requests:
|
||||||
|
- cpu: 100m
|
||||||
|
- memory: 70Mi
|
||||||
|
+ cpu: 0
|
||||||
|
+ memory: 0
|
||||||
|
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||||
|
volumeMounts:
|
||||||
|
- name: config-volume
|
||||||
|
diff --git a/cluster/addons/dns/coredns/coredns.yaml.in b/cluster/addons/dns/coredns/coredns.yaml.in
|
||||||
|
index 419acb0e966..906d6d28890 100644
|
||||||
|
--- a/cluster/addons/dns/coredns/coredns.yaml.in
|
||||||
|
+++ b/cluster/addons/dns/coredns/coredns.yaml.in
|
||||||
|
@@ -139,8 +139,8 @@ spec:
|
||||||
|
limits:
|
||||||
|
memory: 'dns_memory_limit'
|
||||||
|
requests:
|
||||||
|
- cpu: 100m
|
||||||
|
- memory: 70Mi
|
||||||
|
+ cpu: 0
|
||||||
|
+ memory: 0
|
||||||
|
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||||
|
volumeMounts:
|
||||||
|
- name: config-volume
|
||||||
|
diff --git a/cluster/addons/dns/coredns/coredns.yaml.sed b/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||||
|
index a35df71454f..af0fae57dbd 100644
|
||||||
|
--- a/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||||
|
+++ b/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||||
|
@@ -139,8 +139,8 @@ spec:
|
||||||
|
limits:
|
||||||
|
memory: $DNS_MEMORY_LIMIT
|
||||||
|
requests:
|
||||||
|
- cpu: 100m
|
||||||
|
- memory: 70Mi
|
||||||
|
+ cpu: 0
|
||||||
|
+ memory: 0
|
||||||
|
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||||
|
volumeMounts:
|
||||||
|
- name: config-volume
|
||||||
|
diff --git a/cmd/kubeadm/app/phases/addons/dns/dns_test.go b/cmd/kubeadm/app/phases/addons/dns/dns_test.go
|
||||||
|
index 0b6c12eb41b..4f2022d7105 100644
|
||||||
|
--- a/cmd/kubeadm/app/phases/addons/dns/dns_test.go
|
||||||
|
+++ b/cmd/kubeadm/app/phases/addons/dns/dns_test.go
|
||||||
|
@@ -705,8 +705,8 @@ spec:
|
||||||
|
limits:
|
||||||
|
memory: 170Mi
|
||||||
|
requests:
|
||||||
|
- cpu: 100m
|
||||||
|
- memory: 70Mi
|
||||||
|
+ cpu: 0
|
||||||
|
+ memory: 0
|
||||||
|
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||||
|
volumeMounts:
|
||||||
|
- name: config-volume
|
||||||
|
@@ -970,8 +970,8 @@ spec:
|
||||||
|
limits:
|
||||||
|
memory: 170Mi
|
||||||
|
requests:
|
||||||
|
- cpu: 100m
|
||||||
|
- memory: 70Mi
|
||||||
|
+ cpu: 0
|
||||||
|
+ memory: 0
|
||||||
|
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||||
|
volumeMounts:
|
||||||
|
- name: config-volume
|
||||||
|
diff --git a/cmd/kubeadm/app/phases/addons/dns/manifests.go b/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||||
|
index 905a2e050e6..2a2212d5d37 100644
|
||||||
|
--- a/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||||
|
+++ b/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||||
|
@@ -104,8 +104,8 @@ spec:
|
||||||
|
limits:
|
||||||
|
memory: 170Mi
|
||||||
|
requests:
|
||||||
|
- cpu: 100m
|
||||||
|
- memory: 70Mi
|
||||||
|
+ cpu: 0
|
||||||
|
+ memory: 0
|
||||||
|
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||||
|
volumeMounts:
|
||||||
|
- name: config-volume
|
||||||
|
diff --git a/cmd/kubeadm/app/phases/controlplane/manifests.go b/cmd/kubeadm/app/phases/controlplane/manifests.go
|
||||||
|
index 11b93e083db..3458efe3a03 100644
|
||||||
|
--- a/cmd/kubeadm/app/phases/controlplane/manifests.go
|
||||||
|
+++ b/cmd/kubeadm/app/phases/controlplane/manifests.go
|
||||||
|
@@ -67,8 +67,10 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
|
||||||
|
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/livez", endpoint.BindPort, v1.URISchemeHTTPS),
|
||||||
|
ReadinessProbe: staticpodutil.ReadinessProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/readyz", endpoint.BindPort, v1.URISchemeHTTPS),
|
||||||
|
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/livez", endpoint.BindPort, v1.URISchemeHTTPS, componentHealthCheckTimeout),
|
||||||
|
- Resources: staticpodutil.ComponentResources("250m"),
|
||||||
|
- Env: kubeadmutil.MergeKubeadmEnvVars(proxyEnvs, cfg.APIServer.ExtraEnvs),
|
||||||
|
+ // WRS: Increase kube-apiserver cgroup CPUShares to improve API responsiveness;
|
||||||
|
+ // achieved by setting CPU Limits to make it burstable QoS.
|
||||||
|
+ Resources: staticpodutil.ComponentLimitResources("0", "1"),
|
||||||
|
+ Env: kubeadmutil.MergeKubeadmEnvVars(proxyEnvs, cfg.APIServer.ExtraEnvs),
|
||||||
|
}, mounts.GetVolumes(kubeadmconstants.KubeAPIServer),
|
||||||
|
map[string]string{kubeadmconstants.KubeAPIServerAdvertiseAddressEndpointAnnotationKey: endpoint.String()}),
|
||||||
|
kubeadmconstants.KubeControllerManager: staticpodutil.ComponentPod(v1.Container{
|
||||||
|
@@ -79,7 +81,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
|
||||||
|
VolumeMounts: staticpodutil.VolumeMountMapToSlice(mounts.GetVolumeMounts(kubeadmconstants.KubeControllerManager)),
|
||||||
|
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetControllerManagerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeControllerManagerPort, v1.URISchemeHTTPS),
|
||||||
|
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetControllerManagerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeControllerManagerPort, v1.URISchemeHTTPS, componentHealthCheckTimeout),
|
||||||
|
- Resources: staticpodutil.ComponentResources("200m"),
|
||||||
|
+ Resources: staticpodutil.ComponentResources("0"),
|
||||||
|
Env: kubeadmutil.MergeKubeadmEnvVars(proxyEnvs, cfg.ControllerManager.ExtraEnvs),
|
||||||
|
}, mounts.GetVolumes(kubeadmconstants.KubeControllerManager), nil),
|
||||||
|
kubeadmconstants.KubeScheduler: staticpodutil.ComponentPod(v1.Container{
|
||||||
|
@@ -90,7 +92,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
|
||||||
|
VolumeMounts: staticpodutil.VolumeMountMapToSlice(mounts.GetVolumeMounts(kubeadmconstants.KubeScheduler)),
|
||||||
|
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetSchedulerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeSchedulerPort, v1.URISchemeHTTPS),
|
||||||
|
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetSchedulerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeSchedulerPort, v1.URISchemeHTTPS, componentHealthCheckTimeout),
|
||||||
|
- Resources: staticpodutil.ComponentResources("100m"),
|
||||||
|
+ Resources: staticpodutil.ComponentResources("0"),
|
||||||
|
Env: kubeadmutil.MergeKubeadmEnvVars(proxyEnvs, cfg.Scheduler.ExtraEnvs),
|
||||||
|
}, mounts.GetVolumes(kubeadmconstants.KubeScheduler), nil),
|
||||||
|
}
|
||||||
|
diff --git a/cmd/kubeadm/app/util/staticpod/utils.go b/cmd/kubeadm/app/util/staticpod/utils.go
|
||||||
|
index fcd28eff4d0..c87d6c954eb 100644
|
||||||
|
--- a/cmd/kubeadm/app/util/staticpod/utils.go
|
||||||
|
+++ b/cmd/kubeadm/app/util/staticpod/utils.go
|
||||||
|
@@ -99,6 +99,18 @@ func ComponentResources(cpu string) v1.ResourceRequirements {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+// ComponentLimitResources returns the v1.ResourceRequirements object needed for allocating a specified amount of the CPU with Limits
|
||||||
|
+func ComponentLimitResources(cpu string, lcpu string) v1.ResourceRequirements {
|
||||||
|
+ return v1.ResourceRequirements{
|
||||||
|
+ Requests: v1.ResourceList{
|
||||||
|
+ v1.ResourceCPU: resource.MustParse(cpu),
|
||||||
|
+ },
|
||||||
|
+ Limits: v1.ResourceList{
|
||||||
|
+ v1.ResourceCPU: resource.MustParse(lcpu),
|
||||||
|
+ },
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
// NewVolume creates a v1.Volume with a hostPath mount to the specified location
|
||||||
|
func NewVolume(name, path string, pathType *v1.HostPathType) v1.Volume {
|
||||||
|
return v1.Volume{
|
||||||
|
@@ -255,7 +267,10 @@ func LivenessProbe(host, path string, port int32, scheme v1.URIScheme) *v1.Probe
|
||||||
|
func ReadinessProbe(host, path string, port int32, scheme v1.URIScheme) *v1.Probe {
|
||||||
|
// sets initialDelaySeconds as '0' because we don't want to delay user infrastructure checks
|
||||||
|
// looking for "ready" status on kubeadm static Pods
|
||||||
|
- return createHTTPProbe(host, path, port, scheme, 0, 15, 3, 1)
|
||||||
|
+ // WRS/SS joint recommendation: All pods probes should have following minimum probe
|
||||||
|
+ // settings unless required by the service (initialDelaySecond 0, periodSeconds 10,
|
||||||
|
+ // timeoutSeconds 5, successThreshold 1, failureThreshold 3)
|
||||||
|
+ return createHTTPProbe(host, path, port, scheme, 0, 15, 3, 10)
|
||||||
|
}
|
||||||
|
|
||||||
|
// StartupProbe creates a Probe object with a HTTPGet handler
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,88 @@
|
|||||||
|
From 6725ab07375fc9fc8035b8919b6f2b7f601168c5 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ferdinando Terada <Ferdinando.GodoyTerada@windriver.com>
|
||||||
|
Date: Mon, 23 Dec 2024 17:53:09 -0300
|
||||||
|
Subject: [PATCH] Adjust timeout for coredns readinessProbe
|
||||||
|
|
||||||
|
The timeout value for the readinessProbe of CoreDNS was increased.
|
||||||
|
This adjustment was necessary to avoid issues during stress testing,
|
||||||
|
ensuring that the component can properly handle high-load situations
|
||||||
|
and prevent premature failure in readiness checks.
|
||||||
|
---
|
||||||
|
cluster/addons/dns/coredns/coredns.yaml.base | 1 +
|
||||||
|
cluster/addons/dns/coredns/coredns.yaml.in | 1 +
|
||||||
|
cluster/addons/dns/coredns/coredns.yaml.sed | 1 +
|
||||||
|
cmd/kubeadm/app/phases/addons/dns/dns_test.go | 2 ++
|
||||||
|
cmd/kubeadm/app/phases/addons/dns/manifests.go | 1 +
|
||||||
|
5 files changed, 6 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/cluster/addons/dns/coredns/coredns.yaml.base b/cluster/addons/dns/coredns/coredns.yaml.base
|
||||||
|
index 41088f063eb..4bcb2b2e4fe 100644
|
||||||
|
--- a/cluster/addons/dns/coredns/coredns.yaml.base
|
||||||
|
+++ b/cluster/addons/dns/coredns/coredns.yaml.base
|
||||||
|
@@ -170,6 +170,7 @@ spec:
|
||||||
|
path: /ready
|
||||||
|
port: 8181
|
||||||
|
scheme: HTTP
|
||||||
|
+ timeoutSeconds: 5
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
diff --git a/cluster/addons/dns/coredns/coredns.yaml.in b/cluster/addons/dns/coredns/coredns.yaml.in
|
||||||
|
index 906d6d28890..744f8cb730a 100644
|
||||||
|
--- a/cluster/addons/dns/coredns/coredns.yaml.in
|
||||||
|
+++ b/cluster/addons/dns/coredns/coredns.yaml.in
|
||||||
|
@@ -170,6 +170,7 @@ spec:
|
||||||
|
path: /ready
|
||||||
|
port: 8181
|
||||||
|
scheme: HTTP
|
||||||
|
+ timeoutSeconds: 5
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
diff --git a/cluster/addons/dns/coredns/coredns.yaml.sed b/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||||
|
index af0fae57dbd..844f21f2abf 100644
|
||||||
|
--- a/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||||
|
+++ b/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||||
|
@@ -170,6 +170,7 @@ spec:
|
||||||
|
path: /ready
|
||||||
|
port: 8181
|
||||||
|
scheme: HTTP
|
||||||
|
+ timeoutSeconds: 5
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
diff --git a/cmd/kubeadm/app/phases/addons/dns/dns_test.go b/cmd/kubeadm/app/phases/addons/dns/dns_test.go
|
||||||
|
index 4f2022d7105..1f571f48bb8 100644
|
||||||
|
--- a/cmd/kubeadm/app/phases/addons/dns/dns_test.go
|
||||||
|
+++ b/cmd/kubeadm/app/phases/addons/dns/dns_test.go
|
||||||
|
@@ -736,6 +736,7 @@ spec:
|
||||||
|
path: /ready
|
||||||
|
port: 8181
|
||||||
|
scheme: HTTP
|
||||||
|
+ timeoutSeconds: 5
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
@@ -1001,6 +1002,7 @@ spec:
|
||||||
|
path: /ready
|
||||||
|
port: 8181
|
||||||
|
scheme: HTTP
|
||||||
|
+ timeoutSeconds: 5
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
diff --git a/cmd/kubeadm/app/phases/addons/dns/manifests.go b/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||||
|
index 2a2212d5d37..c0be57357e4 100644
|
||||||
|
--- a/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||||
|
+++ b/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||||
|
@@ -135,6 +135,7 @@ spec:
|
||||||
|
path: /ready
|
||||||
|
port: 8181
|
||||||
|
scheme: HTTP
|
||||||
|
+ timeoutSeconds: 5
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,33 @@
|
|||||||
|
From 08425e565a3dff5d8b5b7cb2e9991280398cf21f Mon Sep 17 00:00:00 2001
|
||||||
|
From: Saba Touheed Mujawar <sabatouheed.mujawar@windriver.com>
|
||||||
|
Date: Fri, 17 Jan 2025 05:09:55 -0500
|
||||||
|
Subject: [PATCH] kubeadm: reduce UpgradeManifestTimeout
|
||||||
|
|
||||||
|
This modifies kubeadm UpgradeManifestTimeout from 5 minutes default
|
||||||
|
to 3 minutes to reduce the unnecessary delay in retries during
|
||||||
|
kubeadm-upgrade-apply failures.
|
||||||
|
|
||||||
|
The typical control-plane upgrade of static pods is 75 to 85 seconds,
|
||||||
|
so 3 minutes gives adequate buffer to complete the operation.
|
||||||
|
|
||||||
|
Signed-off-by: Saba Touheed Mujawar <sabatouheed.mujawar@windriver.com>
|
||||||
|
---
|
||||||
|
cmd/kubeadm/app/constants/constants.go | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/cmd/kubeadm/app/constants/constants.go b/cmd/kubeadm/app/constants/constants.go
|
||||||
|
index 0a3e0a15fce..02357d557da 100644
|
||||||
|
--- a/cmd/kubeadm/app/constants/constants.go
|
||||||
|
+++ b/cmd/kubeadm/app/constants/constants.go
|
||||||
|
@@ -235,7 +235,7 @@ const (
|
||||||
|
KubeletHealthCheckTimeout = 4 * time.Minute
|
||||||
|
|
||||||
|
// UpgradeManifestsTimeout specifies the default timeout for upgradring static Pod manifests
|
||||||
|
- UpgradeManifestsTimeout = 5 * time.Minute
|
||||||
|
+ UpgradeManifestsTimeout = 3 * time.Minute
|
||||||
|
|
||||||
|
// PullImageRetry specifies how many times ContainerRuntime retries when pulling image failed
|
||||||
|
PullImageRetry = 5
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,30 @@
|
|||||||
|
From 45806d8b57d2bfdc24165264ea2f6aabadcd2e1a Mon Sep 17 00:00:00 2001
|
||||||
|
From: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||||
|
Date: Mon, 4 Sep 2023 08:05:29 -0400
|
||||||
|
Subject: [PATCH] kubelet CFS quota throttling for non integer cpulimit
|
||||||
|
|
||||||
|
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||||
|
---
|
||||||
|
pkg/kubelet/cm/internal_container_lifecycle_linux.go | 6 +++++-
|
||||||
|
1 file changed, 5 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/pkg/kubelet/cm/internal_container_lifecycle_linux.go b/pkg/kubelet/cm/internal_container_lifecycle_linux.go
|
||||||
|
index 376efa9e0ef..a7e56ac6911 100644
|
||||||
|
--- a/pkg/kubelet/cm/internal_container_lifecycle_linux.go
|
||||||
|
+++ b/pkg/kubelet/cm/internal_container_lifecycle_linux.go
|
||||||
|
@@ -40,7 +40,11 @@ func (i *internalContainerLifecycleImpl) PreCreateContainer(pod *v1.Pod, contain
|
||||||
|
// Disable cgroup CFS throttle at the container level.
|
||||||
|
// /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/<container>/cpu.cfs_quota_us
|
||||||
|
// /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/<container>/cpu.cfs_period_us
|
||||||
|
- if i.cpuManager.GetCPUPolicy() == "static" && v1qos.GetPodQOS(pod) == v1.PodQOSGuaranteed {
|
||||||
|
+ // We can only set CpuQuota to -1 if we're allocating the entire CPU.
|
||||||
|
+ // For fractional CPUs the CpuQuota is needed to enforce the limit.
|
||||||
|
+ cpuQuantity := container.Resources.Requests[v1.ResourceCPU]
|
||||||
|
+ fractionalCpuQuantity := cpuQuantity.MilliValue()%1000
|
||||||
|
+ if i.cpuManager.GetCPUPolicy() == "static" && v1qos.GetPodQOS(pod) == v1.PodQOSGuaranteed && fractionalCpuQuantity == 0 {
|
||||||
|
containerConfig.Linux.Resources.CpuPeriod = int64(100000)
|
||||||
|
containerConfig.Linux.Resources.CpuQuota = int64(-1)
|
||||||
|
}
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,256 @@
|
|||||||
|
From 521099d83512e74d11829b464d899c05b595d482 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||||
|
Date: Mon, 26 Feb 2024 06:27:48 -0500
|
||||||
|
Subject: [PATCH] kubelet cpumanager disable CFS quota throttling
|
||||||
|
|
||||||
|
This disables CFS CPU quota to avoid performance degradation due to
|
||||||
|
Linux kernel CFS quota implementation. Note that 4.18 kernel attempts
|
||||||
|
to solve the CFS throttling problem, but there are reports that it is
|
||||||
|
not completely effective.
|
||||||
|
|
||||||
|
This disables CFS quota throttling for Guaranteed pods for both
|
||||||
|
parent and container cgroups by writing -1 to cgroup cpu.cfs_quota_us.
|
||||||
|
Disabling has a dramatic latency improvement for HTTP response times.
|
||||||
|
|
||||||
|
This patch is refactored in 1.22.5 due to new internal_container_lifecycle
|
||||||
|
framework. We leverage the same mechanism to set Linux resources as:
|
||||||
|
cpu manager: specify the container CPU set during the creation
|
||||||
|
|
||||||
|
Co-authored-by: Jim Gauld <james.gauld@windriver.com>
|
||||||
|
Signed-off-by: Sachin Gopala Krishna <saching.krishna@windriver.com>
|
||||||
|
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||||
|
---
|
||||||
|
pkg/kubelet/cm/cpumanager/cpu_manager.go | 7 +++
|
||||||
|
pkg/kubelet/cm/cpumanager/fake_cpu_manager.go | 10 ++++-
|
||||||
|
pkg/kubelet/cm/helpers_linux.go | 10 +++++
|
||||||
|
pkg/kubelet/cm/helpers_linux_test.go | 43 ++++++++++---------
|
||||||
|
.../cm/internal_container_lifecycle_linux.go | 9 ++++
|
||||||
|
5 files changed, 57 insertions(+), 22 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||||
|
index 8b5049d7d74..2d1b6eefebe 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||||
|
@@ -73,6 +73,9 @@ type Manager interface {
|
||||||
|
// State returns a read-only interface to the internal CPU manager state.
|
||||||
|
State() state.Reader
|
||||||
|
|
||||||
|
+ // GetCPUPolicy returns the assigned CPU manager policy
|
||||||
|
+ GetCPUPolicy() string
|
||||||
|
+
|
||||||
|
// GetTopologyHints implements the topologymanager.HintProvider Interface
|
||||||
|
// and is consulted to achieve NUMA aware resource alignment among this
|
||||||
|
// and other resource controllers.
|
||||||
|
@@ -243,6 +246,10 @@ func (m *manager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesRe
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
+func (m *manager) GetCPUPolicy() string {
|
||||||
|
+ return m.policy.Name()
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
func (m *manager) Allocate(p *v1.Pod, c *v1.Container) error {
|
||||||
|
// The pod is during the admission phase. We need to save the pod to avoid it
|
||||||
|
// being cleaned before the admission ended
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go b/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go
|
||||||
|
index 4a03f3dd23f..b36fb0da3b4 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go
|
||||||
|
@@ -28,7 +28,8 @@ import (
|
||||||
|
)
|
||||||
|
|
||||||
|
type fakeManager struct {
|
||||||
|
- state state.State
|
||||||
|
+ policy Policy
|
||||||
|
+ state state.State
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *fakeManager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService, initialContainers containermap.ContainerMap) error {
|
||||||
|
@@ -70,6 +71,10 @@ func (m *fakeManager) State() state.Reader {
|
||||||
|
return m.state
|
||||||
|
}
|
||||||
|
|
||||||
|
+func (m *fakeManager) GetCPUPolicy() string {
|
||||||
|
+ return m.policy.Name()
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
func (m *fakeManager) GetExclusiveCPUs(podUID, containerName string) cpuset.CPUSet {
|
||||||
|
klog.InfoS("GetExclusiveCPUs", "podUID", podUID, "containerName", containerName)
|
||||||
|
return cpuset.CPUSet{}
|
||||||
|
@@ -88,6 +93,7 @@ func (m *fakeManager) GetCPUAffinity(podUID, containerName string) cpuset.CPUSet
|
||||||
|
// NewFakeManager creates empty/fake cpu manager
|
||||||
|
func NewFakeManager() Manager {
|
||||||
|
return &fakeManager{
|
||||||
|
- state: state.NewMemoryState(),
|
||||||
|
+ policy: &nonePolicy{},
|
||||||
|
+ state: state.NewMemoryState(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
diff --git a/pkg/kubelet/cm/helpers_linux.go b/pkg/kubelet/cm/helpers_linux.go
|
||||||
|
index 8a144e7a73c..008b955ee98 100644
|
||||||
|
--- a/pkg/kubelet/cm/helpers_linux.go
|
||||||
|
+++ b/pkg/kubelet/cm/helpers_linux.go
|
||||||
|
@@ -170,6 +170,16 @@ func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64,
|
||||||
|
// build the result
|
||||||
|
result := &ResourceConfig{}
|
||||||
|
if qosClass == v1.PodQOSGuaranteed {
|
||||||
|
+ // Disable CFS CPU quota to avoid performance degradation due to
|
||||||
|
+ // Linux kernel CFS throttle implementation.
|
||||||
|
+ // NOTE: 4.18 kernel attempts to solve CFS throttling problem,
|
||||||
|
+ // but there are reports that it is not completely effective.
|
||||||
|
+ // This will configure cgroup CFS parameters at pod level:
|
||||||
|
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/cpu.cfs_quota_us
|
||||||
|
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/cpu.cfs_period_us
|
||||||
|
+ cpuQuota = int64(-1)
|
||||||
|
+ cpuPeriod = uint64(100000)
|
||||||
|
+
|
||||||
|
result.CPUShares = &cpuShares
|
||||||
|
result.CPUQuota = &cpuQuota
|
||||||
|
result.CPUPeriod = &cpuPeriod
|
||||||
|
diff --git a/pkg/kubelet/cm/helpers_linux_test.go b/pkg/kubelet/cm/helpers_linux_test.go
|
||||||
|
index fba41fd49be..60609394659 100644
|
||||||
|
--- a/pkg/kubelet/cm/helpers_linux_test.go
|
||||||
|
+++ b/pkg/kubelet/cm/helpers_linux_test.go
|
||||||
|
@@ -64,8 +64,9 @@ func TestResourceConfigForPod(t *testing.T) {
|
||||||
|
burstablePartialShares := MilliCPUToShares(200)
|
||||||
|
burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod))
|
||||||
|
guaranteedShares := MilliCPUToShares(100)
|
||||||
|
- guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod))
|
||||||
|
- guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod))
|
||||||
|
+ guaranteedQuotaPeriod := uint64(100000)
|
||||||
|
+ guaranteedQuota := int64(-1)
|
||||||
|
+ guaranteedTunedQuota := int64(-1)
|
||||||
|
memoryQuantity = resource.MustParse("100Mi")
|
||||||
|
cpuNoLimit := int64(-1)
|
||||||
|
guaranteedMemory := memoryQuantity.Value()
|
||||||
|
@@ -204,8 +205,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
||||||
|
},
|
||||||
|
},
|
||||||
|
enforceCPULimits: true,
|
||||||
|
- quotaPeriod: defaultQuotaPeriod,
|
||||||
|
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedQuota, CPUPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
|
||||||
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||||
|
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedQuota, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||||
|
},
|
||||||
|
"guaranteed-no-cpu-enforcement": {
|
||||||
|
pod: &v1.Pod{
|
||||||
|
@@ -218,8 +219,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
||||||
|
},
|
||||||
|
},
|
||||||
|
enforceCPULimits: false,
|
||||||
|
- quotaPeriod: defaultQuotaPeriod,
|
||||||
|
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
|
||||||
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||||
|
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||||
|
},
|
||||||
|
"guaranteed-with-tuned-quota": {
|
||||||
|
pod: &v1.Pod{
|
||||||
|
@@ -232,8 +233,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
||||||
|
},
|
||||||
|
},
|
||||||
|
enforceCPULimits: true,
|
||||||
|
- quotaPeriod: tunedQuotaPeriod,
|
||||||
|
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedTunedQuota, CPUPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
|
||||||
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||||
|
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedTunedQuota, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||||
|
},
|
||||||
|
"guaranteed-no-cpu-enforcement-with-tuned-quota": {
|
||||||
|
pod: &v1.Pod{
|
||||||
|
@@ -246,8 +247,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
||||||
|
},
|
||||||
|
},
|
||||||
|
enforceCPULimits: false,
|
||||||
|
- quotaPeriod: tunedQuotaPeriod,
|
||||||
|
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
|
||||||
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||||
|
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||||
|
},
|
||||||
|
"burstable-partial-limits-with-init-containers": {
|
||||||
|
pod: &v1.Pod{
|
||||||
|
@@ -309,8 +310,10 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
||||||
|
burstablePartialShares := MilliCPUToShares(200)
|
||||||
|
burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod))
|
||||||
|
guaranteedShares := MilliCPUToShares(100)
|
||||||
|
- guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod))
|
||||||
|
- guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod))
|
||||||
|
+ guaranteedQuotaPeriod := uint64(100000)
|
||||||
|
+ guaranteedQuota := int64(-1)
|
||||||
|
+ guaranteedTunedQuota := int64(-1)
|
||||||
|
+
|
||||||
|
memoryQuantity = resource.MustParse("100Mi")
|
||||||
|
cpuNoLimit := int64(-1)
|
||||||
|
guaranteedMemory := memoryQuantity.Value()
|
||||||
|
@@ -449,8 +452,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
||||||
|
},
|
||||||
|
},
|
||||||
|
enforceCPULimits: true,
|
||||||
|
- quotaPeriod: defaultQuotaPeriod,
|
||||||
|
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedQuota, CPUPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
|
||||||
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||||
|
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedQuota, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||||
|
},
|
||||||
|
"guaranteed-no-cpu-enforcement": {
|
||||||
|
pod: &v1.Pod{
|
||||||
|
@@ -463,8 +466,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
||||||
|
},
|
||||||
|
},
|
||||||
|
enforceCPULimits: false,
|
||||||
|
- quotaPeriod: defaultQuotaPeriod,
|
||||||
|
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
|
||||||
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||||
|
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||||
|
},
|
||||||
|
"guaranteed-with-tuned-quota": {
|
||||||
|
pod: &v1.Pod{
|
||||||
|
@@ -477,8 +480,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
||||||
|
},
|
||||||
|
},
|
||||||
|
enforceCPULimits: true,
|
||||||
|
- quotaPeriod: tunedQuotaPeriod,
|
||||||
|
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedTunedQuota, CPUPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
|
||||||
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||||
|
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedTunedQuota, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||||
|
},
|
||||||
|
"guaranteed-no-cpu-enforcement-with-tuned-quota": {
|
||||||
|
pod: &v1.Pod{
|
||||||
|
@@ -491,8 +494,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
||||||
|
},
|
||||||
|
},
|
||||||
|
enforceCPULimits: false,
|
||||||
|
- quotaPeriod: tunedQuotaPeriod,
|
||||||
|
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
|
||||||
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||||
|
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/pkg/kubelet/cm/internal_container_lifecycle_linux.go b/pkg/kubelet/cm/internal_container_lifecycle_linux.go
|
||||||
|
index 0c3bb2e4999..376efa9e0ef 100644
|
||||||
|
--- a/pkg/kubelet/cm/internal_container_lifecycle_linux.go
|
||||||
|
+++ b/pkg/kubelet/cm/internal_container_lifecycle_linux.go
|
||||||
|
@@ -26,6 +26,7 @@ import (
|
||||||
|
v1 "k8s.io/api/core/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/util/sets"
|
||||||
|
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||||
|
+ v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (i *internalContainerLifecycleImpl) PreCreateContainer(pod *v1.Pod, container *v1.Container, containerConfig *runtimeapi.ContainerConfig) error {
|
||||||
|
@@ -36,6 +37,14 @@ func (i *internalContainerLifecycleImpl) PreCreateContainer(pod *v1.Pod, contain
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+ // Disable cgroup CFS throttle at the container level.
|
||||||
|
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/<container>/cpu.cfs_quota_us
|
||||||
|
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/<container>/cpu.cfs_period_us
|
||||||
|
+ if i.cpuManager.GetCPUPolicy() == "static" && v1qos.GetPodQOS(pod) == v1.PodQOSGuaranteed {
|
||||||
|
+ containerConfig.Linux.Resources.CpuPeriod = int64(100000)
|
||||||
|
+ containerConfig.Linux.Resources.CpuQuota = int64(-1)
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
if i.memoryManager != nil {
|
||||||
|
numaNodes := i.memoryManager.GetMemoryNUMANodes(pod, container)
|
||||||
|
if numaNodes.Len() > 0 {
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,789 @@
|
|||||||
|
From af61145f1ccd068da8937e6b20107f25023cdcb0 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Saba Touheed Mujawar <sabatouheed.mujawar@windriver.com>
|
||||||
|
Date: Tue, 11 Feb 2025 05:27:58 -0500
|
||||||
|
Subject: [PATCH] kubelet cpumanager introduce concept of isolated CPUs
|
||||||
|
|
||||||
|
This introduces the concept of "isolated CPUs", which are CPUs that
|
||||||
|
have been isolated at the kernel level via the "isolcpus" kernel boot
|
||||||
|
parameter.
|
||||||
|
|
||||||
|
When starting the kubelet process, two separate sets of reserved CPUs
|
||||||
|
may be specified. With this change CPUs reserved via
|
||||||
|
'--system-reserved=cpu' will be used for infrastructure pods while the
|
||||||
|
isolated CPUs should be reserved via '--kube-reserved=cpu' to cause
|
||||||
|
kubelet to skip over them for "normal" CPU resource tracking. The
|
||||||
|
kubelet code will double-check that the specified isolated CPUs match
|
||||||
|
what the kernel exposes in "/sys/devices/system/cpu/isolated".
|
||||||
|
|
||||||
|
A plugin (outside the scope of this commit) will expose the isolated
|
||||||
|
CPUs to kubelet via the device plugin API.
|
||||||
|
|
||||||
|
If a pod specifies some number of "isolcpus" resources, the device
|
||||||
|
manager will allocate them. In this code we check whether such
|
||||||
|
resources have been allocated, and if so we set the container cpuset to
|
||||||
|
the isolated CPUs. This does mean that it really only makes sense to
|
||||||
|
specify "isolcpus" resources for best-effort or burstable pods, not for
|
||||||
|
guaranteed ones since that would throw off the accounting code. In
|
||||||
|
order to ensure the accounting still works as designed, if "isolcpus"
|
||||||
|
are specified for guaranteed pods, the affinity will be set to the
|
||||||
|
non-isolated CPUs.
|
||||||
|
|
||||||
|
This patch was refactored in 1.21.3 due to upstream API change
|
||||||
|
node: podresources: make GetDevices() consistent
|
||||||
|
(commit ad68f9588c72d6477b5a290c548a9031063ac659).
|
||||||
|
|
||||||
|
The routine podIsolCPUs() was refactored in 1.21.3 since the API
|
||||||
|
p.deviceManager.GetDevices() is returning multiple devices with
|
||||||
|
a device per cpu. The resultant cpuset needs to be the aggregate.
|
||||||
|
|
||||||
|
The routine NewStaticPolicy was refactored in 1.22.5, adding a new argument
|
||||||
|
in its signature: cpuPolicyOptions map[string]string. This change is implies
|
||||||
|
shifting the new arguments(deviceManager, excludeReserved) with one position
|
||||||
|
to the right.
|
||||||
|
|
||||||
|
Co-authored-by: Jim Gauld <james.gauld@windriver.com>
|
||||||
|
Co-authored-by: Chris Friesen <chris.friesen@windriver.com>
|
||||||
|
Signed-off-by: Gleb Aronsky <gleb.aronsky@windriver.com>
|
||||||
|
Signed-off-by: Ramesh Kumar Sivanandam <rameshkumar.sivanandam@windriver.com>
|
||||||
|
Signed-off-by: Sachin Gopala Krishna <saching.krishna@windriver.com>
|
||||||
|
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||||
|
Signed-off-by: Saba Touheed Mujawar <sabatouheed.mujawar@windriver.com>
|
||||||
|
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||||
|
Signed-off-by: Saba Touheed Mujawar <sabatouheed.mujawar@windriver.com>
|
||||||
|
---
|
||||||
|
pkg/kubelet/cm/container_manager_linux.go | 1 +
|
||||||
|
pkg/kubelet/cm/cpumanager/cpu_manager.go | 35 +++++-
|
||||||
|
pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 31 +++--
|
||||||
|
pkg/kubelet/cm/cpumanager/policy_static.go | 90 +++++++++++++-
|
||||||
|
.../cm/cpumanager/policy_static_test.go | 57 +++++++--
|
||||||
|
pkg/kubelet/cm/devicemanager/manager_stub.go | 110 ++++++++++++++++++
|
||||||
|
6 files changed, 298 insertions(+), 26 deletions(-)
|
||||||
|
create mode 100644 pkg/kubelet/cm/devicemanager/manager_stub.go
|
||||||
|
|
||||||
|
diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go
|
||||||
|
index 4ed1aa7b591..7eb76b4c20f 100644
|
||||||
|
--- a/pkg/kubelet/cm/container_manager_linux.go
|
||||||
|
+++ b/pkg/kubelet/cm/container_manager_linux.go
|
||||||
|
@@ -324,6 +324,7 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
|
||||||
|
cm.GetNodeAllocatableReservation(),
|
||||||
|
nodeConfig.KubeletRootDir,
|
||||||
|
cm.topologyManager,
|
||||||
|
+ cm.deviceManager,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
klog.ErrorS(err, "Failed to initialize cpu manager")
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||||
|
index e0a359932b7..acd51fdef8d 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||||
|
@@ -20,6 +20,8 @@ import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
+ "os"
|
||||||
|
+ "strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
@@ -32,6 +34,7 @@ import (
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/containermap"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
|
||||||
|
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||||
|
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||||
|
@@ -51,6 +54,25 @@ type policyName string
|
||||||
|
// cpuManagerStateFileName is the file name where cpu manager stores its state
|
||||||
|
const cpuManagerStateFileName = "cpu_manager_state"
|
||||||
|
|
||||||
|
+// get the system-level isolated CPUs
|
||||||
|
+func getIsolcpus() cpuset.CPUSet {
|
||||||
|
+ dat, err := os.ReadFile("/sys/devices/system/cpu/isolated")
|
||||||
|
+ if err != nil {
|
||||||
|
+ klog.Errorf("[cpumanager] unable to read sysfs isolcpus subdir")
|
||||||
|
+ return cpuset.New()
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // The isolated cpus string ends in a newline
|
||||||
|
+ cpustring := strings.TrimSuffix(string(dat), "\n")
|
||||||
|
+ cset, err := cpuset.Parse(cpustring)
|
||||||
|
+ if err != nil {
|
||||||
|
+ klog.Errorf("[cpumanager] unable to parse sysfs isolcpus string to cpuset")
|
||||||
|
+ return cpuset.New()
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return cset
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
// Manager interface provides methods for Kubelet to manage pod cpus.
|
||||||
|
type Manager interface {
|
||||||
|
// Start is called during Kubelet initialization.
|
||||||
|
@@ -154,7 +176,8 @@ func (s *sourcesReadyStub) AddSource(source string) {}
|
||||||
|
func (s *sourcesReadyStub) AllReady() bool { return true }
|
||||||
|
|
||||||
|
// NewManager creates new cpu manager based on provided policy
|
||||||
|
-func NewManager(cpuPolicyName string, cpuPolicyOptions map[string]string, reconcilePeriod time.Duration, machineInfo *cadvisorapi.MachineInfo, specificCPUs cpuset.CPUSet, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store) (Manager, error) {
|
||||||
|
+func NewManager(cpuPolicyName string, cpuPolicyOptions map[string]string, reconcilePeriod time.Duration, machineInfo *cadvisorapi.MachineInfo, specificCPUs cpuset.CPUSet, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store, deviceManager devicemanager.Manager) (Manager, error) {
|
||||||
|
+
|
||||||
|
var topo *topology.CPUTopology
|
||||||
|
var policy Policy
|
||||||
|
var err error
|
||||||
|
@@ -195,7 +218,15 @@ func NewManager(cpuPolicyName string, cpuPolicyOptions map[string]string, reconc
|
||||||
|
// NOTE: Set excludeReserved unconditionally to exclude reserved CPUs from default cpuset.
|
||||||
|
// This variable is primarily to make testing easier.
|
||||||
|
excludeReserved := true
|
||||||
|
- policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, cpuPolicyOptions, excludeReserved)
|
||||||
|
+
|
||||||
|
+ // isolCPUs is the set of kernel-isolated CPUs. They should be a subset of specificCPUs or
|
||||||
|
+ // of the CPUs that NewStaticPolicy() will pick if numReservedCPUs is set. It's only in the
|
||||||
|
+ // argument list here for ease of testing, it's really internal to the policy.
|
||||||
|
+ isolCPUs := getIsolcpus()
|
||||||
|
+ policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, isolCPUs, affinity, cpuPolicyOptions, deviceManager, excludeReserved)
|
||||||
|
+ if err != nil {
|
||||||
|
+ return nil, fmt.Errorf("new static policy error: %v", err)
|
||||||
|
+ }
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("new static policy error: %w", err)
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||||
|
index e5f5d07a2ad..8abf173ed5c 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||||
|
@@ -36,6 +36,7 @@ import (
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/containermap"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
|
||||||
|
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||||
|
"k8s.io/utils/cpuset"
|
||||||
|
)
|
||||||
|
@@ -263,6 +264,7 @@ func makeMultiContainerPodWithOptions(initCPUs, appCPUs []*containerOptions) *v1
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCPUManagerAdd(t *testing.T) {
|
||||||
|
+ testDM, _ := devicemanager.NewManagerStub()
|
||||||
|
testExcl := false
|
||||||
|
testPolicy, _ := NewStaticPolicy(
|
||||||
|
&topology.CPUTopology{
|
||||||
|
@@ -278,9 +280,11 @@ func TestCPUManagerAdd(t *testing.T) {
|
||||||
|
},
|
||||||
|
0,
|
||||||
|
cpuset.New(),
|
||||||
|
+ cpuset.New(),
|
||||||
|
topologymanager.NewFakeManager(),
|
||||||
|
nil,
|
||||||
|
- testExcl)
|
||||||
|
+ testDM,
|
||||||
|
+ testExcl)
|
||||||
|
testCases := []struct {
|
||||||
|
description string
|
||||||
|
updateErr error
|
||||||
|
@@ -530,8 +534,9 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
testExcl := false
|
||||||
|
+ testDM, _ := devicemanager.NewManagerStub()
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, testExcl)
|
||||||
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), cpuset.New(), topologymanager.NewFakeManager(), nil, testDM, testExcl)
|
||||||
|
|
||||||
|
mockState := &mockState{
|
||||||
|
assignments: testCase.stAssignments,
|
||||||
|
@@ -686,7 +691,9 @@ func TestCPUManagerGenerate(t *testing.T) {
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(sDir)
|
||||||
|
|
||||||
|
- mgr, err := NewManager(testCase.cpuPolicyName, nil, 5*time.Second, machineInfo, cpuset.New(), testCase.nodeAllocatableReservation, sDir, topologymanager.NewFakeManager())
|
||||||
|
+ testDM, err := devicemanager.NewManagerStub()
|
||||||
|
+ mgr, err := NewManager(testCase.cpuPolicyName, nil, 5*time.Second, machineInfo, cpuset.New(), testCase.nodeAllocatableReservation, sDir, topologymanager.NewFakeManager(), testDM)
|
||||||
|
+
|
||||||
|
if testCase.expectedError != nil {
|
||||||
|
if !strings.Contains(err.Error(), testCase.expectedError.Error()) {
|
||||||
|
t.Errorf("Unexpected error message. Have: %s wants %s", err.Error(), testCase.expectedError.Error())
|
||||||
|
@@ -757,6 +764,7 @@ func TestCPUManagerRemove(t *testing.T) {
|
||||||
|
|
||||||
|
func TestReconcileState(t *testing.T) {
|
||||||
|
testExcl := false
|
||||||
|
+ testDM, _ := devicemanager.NewManagerStub()
|
||||||
|
testPolicy, _ := NewStaticPolicy(
|
||||||
|
&topology.CPUTopology{
|
||||||
|
NumCPUs: 8,
|
||||||
|
@@ -775,9 +783,11 @@ func TestReconcileState(t *testing.T) {
|
||||||
|
},
|
||||||
|
0,
|
||||||
|
cpuset.New(),
|
||||||
|
+ cpuset.New(),
|
||||||
|
topologymanager.NewFakeManager(),
|
||||||
|
nil,
|
||||||
|
- testExcl)
|
||||||
|
+ testDM,
|
||||||
|
+ testExcl)
|
||||||
|
|
||||||
|
testCases := []struct {
|
||||||
|
description string
|
||||||
|
@@ -1282,6 +1292,7 @@ func TestReconcileState(t *testing.T) {
|
||||||
|
// the following tests are with --reserved-cpus configured
|
||||||
|
func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||||
|
testExcl := false
|
||||||
|
+ testDM, _ := devicemanager.NewManagerStub()
|
||||||
|
testPolicy, _ := NewStaticPolicy(
|
||||||
|
&topology.CPUTopology{
|
||||||
|
NumCPUs: 4,
|
||||||
|
@@ -1296,9 +1307,11 @@ func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||||
|
},
|
||||||
|
1,
|
||||||
|
cpuset.New(0),
|
||||||
|
+ cpuset.New(),
|
||||||
|
topologymanager.NewFakeManager(),
|
||||||
|
nil,
|
||||||
|
- testExcl)
|
||||||
|
+ testDM,
|
||||||
|
+ testExcl)
|
||||||
|
testCases := []struct {
|
||||||
|
description string
|
||||||
|
updateErr error
|
||||||
|
@@ -1410,7 +1423,8 @@ func TestCPUManagerHandlePolicyOptions(t *testing.T) {
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(sDir)
|
||||||
|
|
||||||
|
- _, err = NewManager(testCase.cpuPolicyName, testCase.cpuPolicyOptions, 5*time.Second, machineInfo, cpuset.New(), nodeAllocatableReservation, sDir, topologymanager.NewFakeManager())
|
||||||
|
+ testDM, err := devicemanager.NewManagerStub()
|
||||||
|
+ _, err = NewManager(testCase.cpuPolicyName, testCase.cpuPolicyOptions, 5*time.Second, machineInfo, cpuset.New(), nodeAllocatableReservation, sDir, topologymanager.NewFakeManager(), testDM)
|
||||||
|
if err == nil {
|
||||||
|
t.Errorf("Expected error, but NewManager succeeded")
|
||||||
|
}
|
||||||
|
@@ -1424,6 +1438,7 @@ func TestCPUManagerHandlePolicyOptions(t *testing.T) {
|
||||||
|
|
||||||
|
func TestCPUManagerGetAllocatableCPUs(t *testing.T) {
|
||||||
|
testExcl := false
|
||||||
|
+ testDm, _ := devicemanager.NewManagerStub()
|
||||||
|
nonePolicy, _ := NewNonePolicy(nil)
|
||||||
|
staticPolicy, _ := NewStaticPolicy(
|
||||||
|
&topology.CPUTopology{
|
||||||
|
@@ -1439,9 +1454,11 @@ func TestCPUManagerGetAllocatableCPUs(t *testing.T) {
|
||||||
|
},
|
||||||
|
1,
|
||||||
|
cpuset.New(0),
|
||||||
|
+ cpuset.New(),
|
||||||
|
topologymanager.NewFakeManager(),
|
||||||
|
nil,
|
||||||
|
- testExcl)
|
||||||
|
+ testDm,
|
||||||
|
+ testExcl)
|
||||||
|
|
||||||
|
testCases := []struct {
|
||||||
|
description string
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||||
|
index fff571491f0..22f2962de83 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||||
|
@@ -19,6 +19,7 @@ package cpumanager
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
+ "strconv"
|
||||||
|
|
||||||
|
v1 "k8s.io/api/core/v1"
|
||||||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
@@ -33,6 +34,7 @@ import (
|
||||||
|
"k8s.io/kubernetes/pkg/features"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
|
||||||
|
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||||
|
@@ -128,6 +130,10 @@ type staticPolicy struct {
|
||||||
|
topology *topology.CPUTopology
|
||||||
|
// set of CPUs that is not available for exclusive assignment
|
||||||
|
reservedCPUs cpuset.CPUSet
|
||||||
|
+ // subset of reserved CPUs with isolcpus attribute
|
||||||
|
+ isolcpus cpuset.CPUSet
|
||||||
|
+ // parent containerManager, used to get device list
|
||||||
|
+ deviceManager devicemanager.Manager
|
||||||
|
// If true, default CPUSet should exclude reserved CPUs
|
||||||
|
excludeReserved bool
|
||||||
|
// Superset of reservedCPUs. It includes not just the reservedCPUs themselves,
|
||||||
|
@@ -150,7 +156,8 @@ var _ Policy = &staticPolicy{}
|
||||||
|
// NewStaticPolicy returns a CPU manager policy that does not change CPU
|
||||||
|
// assignments for exclusively pinned guaranteed containers after the main
|
||||||
|
// container process starts.
|
||||||
|
-func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, cpuPolicyOptions map[string]string, excludeReserved bool) (Policy, error) {
|
||||||
|
+func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, isolCPUs cpuset.CPUSet, affinity topologymanager.Store, cpuPolicyOptions map[string]string, deviceManager devicemanager.Manager, excludeReserved bool) (Policy, error) {
|
||||||
|
+
|
||||||
|
opts, err := NewStaticPolicyOptions(cpuPolicyOptions)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
@@ -165,6 +172,8 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
||||||
|
policy := &staticPolicy{
|
||||||
|
topology: topology,
|
||||||
|
affinity: affinity,
|
||||||
|
+ isolcpus: isolCPUs,
|
||||||
|
+ deviceManager: deviceManager,
|
||||||
|
excludeReserved: excludeReserved,
|
||||||
|
cpusToReuse: make(map[string]cpuset.CPUSet),
|
||||||
|
options: opts,
|
||||||
|
@@ -201,6 +210,12 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
||||||
|
policy.reservedCPUs = reserved
|
||||||
|
policy.reservedPhysicalCPUs = reservedPhysicalCPUs
|
||||||
|
|
||||||
|
+ if !isolCPUs.IsSubsetOf(reserved) {
|
||||||
|
+ klog.Errorf("[cpumanager] isolCPUs %v is not a subset of reserved %v", isolCPUs, reserved)
|
||||||
|
+ reserved = reserved.Union(isolCPUs)
|
||||||
|
+ klog.Warningf("[cpumanager] mismatch isolCPUs %v, force reserved %v", isolCPUs, reserved)
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
return policy, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -234,8 +249,9 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||||
|
} else {
|
||||||
|
s.SetDefaultCPUSet(allCPUs)
|
||||||
|
}
|
||||||
|
- klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, default:%v\n",
|
||||||
|
- allCPUs, p.reservedCPUs, s.GetDefaultCPUSet())
|
||||||
|
+ klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, isolcpus:%v, default:%v\n",
|
||||||
|
+ allCPUs, p.reservedCPUs, p.isolcpus, s.GetDefaultCPUSet())
|
||||||
|
+
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -340,16 +356,46 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
- cpuset := p.reservedCPUs
|
||||||
|
+ cpuset := p.reservedCPUs.Clone().Difference(p.isolcpus)
|
||||||
|
if cpuset.IsEmpty() {
|
||||||
|
// If this happens then someone messed up.
|
||||||
|
- return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reservedCPUs)
|
||||||
|
+ return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v, isolcpus:%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reservedCPUs, p.isolcpus)
|
||||||
|
+
|
||||||
|
}
|
||||||
|
s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
||||||
|
klog.Infof("[cpumanager] static policy: reserved: AddContainer (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
+ // This corrects kubelet cpumanager static cpuset tracking for isolcpus,
|
||||||
|
+ // since version 1.26.1 . This ensures that pods specified with
|
||||||
|
+ // isolcpus + guaranteed QoS + integer cpu requests, are affined to
|
||||||
|
+ // exclusive cpuset and tracked as non-isolated cpus.
|
||||||
|
+ cpuQuantity := container.Resources.Requests[v1.ResourceCPU]
|
||||||
|
+ fractionalCpuQuantity := cpuQuantity.MilliValue() % 1000
|
||||||
|
+ if isolcpus := p.podIsolCPUs(pod, container); isolcpus.Size() > 0 &&
|
||||||
|
+ v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed && fractionalCpuQuantity == 0 {
|
||||||
|
+ // container has requested isolated CPUs
|
||||||
|
+ if set, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
|
||||||
|
+ if set.Equals(isolcpus) {
|
||||||
|
+ klog.Infof("[cpumanager] isolcpus container already present in state, skipping (namespace: %s, pod UID: %s, pod: %s, container: %s)",
|
||||||
|
+ pod.Namespace, string(pod.UID), pod.Name, container.Name)
|
||||||
|
+ return nil
|
||||||
|
+ } else {
|
||||||
|
+ klog.Infof("[cpumanager] isolcpus container state has cpus %v, should be %v (namespace: %s, pod UID: %s, pod: %s, container: %s)",
|
||||||
|
+ isolcpus, set, pod.Namespace, string(pod.UID), pod.Name, container.Name)
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ // Note that we do not do anything about init containers here.
|
||||||
|
+ // It looks like devices are allocated per-pod based on effective requests/limits
|
||||||
|
+ // and extra devices from initContainers are not freed up when the regular containers start.
|
||||||
|
+ // TODO: confirm this is still true for 1.20
|
||||||
|
+ s.SetCPUSet(string(pod.UID), container.Name, isolcpus)
|
||||||
|
+ klog.Infof("[cpumanager] isolcpus: AddContainer (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v",
|
||||||
|
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, isolcpus)
|
||||||
|
+ return nil
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
numCPUs := p.guaranteedCPUs(pod, container)
|
||||||
|
if numCPUs == 0 {
|
||||||
|
// container belongs in the shared pool (nothing to do; use default cpuset)
|
||||||
|
@@ -415,7 +461,9 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
||||||
|
}
|
||||||
|
s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
||||||
|
p.updateCPUsToReuse(pod, container, cpuset)
|
||||||
|
-
|
||||||
|
+ klog.Infof("[cpumanager] guaranteed: AddContainer "+
|
||||||
|
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); numCPUS=%d, cpuset=%v",
|
||||||
|
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, numCPUs, cpuset)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -794,6 +842,36 @@ func isKubeInfra(pod *v1.Pod) bool {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
+// get the isolated CPUs (if any) from the devices associated with a specific container
|
||||||
|
+func (p *staticPolicy) podIsolCPUs(pod *v1.Pod, container *v1.Container) cpuset.CPUSet {
|
||||||
|
+ // NOTE: This is required for TestStaticPolicyAdd() since makePod() does
|
||||||
|
+ // not create UID. We also need a way to properly stub devicemanager.
|
||||||
|
+ if len(string(pod.UID)) == 0 {
|
||||||
|
+ return cpuset.New()
|
||||||
|
+ }
|
||||||
|
+ resContDevices := p.deviceManager.GetDevices(string(pod.UID), container.Name)
|
||||||
|
+ cpuSet := cpuset.New()
|
||||||
|
+ for resourceName, resourceDevs := range resContDevices {
|
||||||
|
+ // this resource name needs to match the isolcpus device plugin
|
||||||
|
+ if resourceName == "windriver.com/isolcpus" {
|
||||||
|
+ for devID, _ := range resourceDevs {
|
||||||
|
+ cpuStrList := []string{devID}
|
||||||
|
+ if len(cpuStrList) > 0 {
|
||||||
|
+ // loop over the list of strings, convert each one to int, add to cpuset
|
||||||
|
+ for _, cpuStr := range cpuStrList {
|
||||||
|
+ cpu, err := strconv.Atoi(cpuStr)
|
||||||
|
+ if err != nil {
|
||||||
|
+ panic(err)
|
||||||
|
+ }
|
||||||
|
+ cpuSet = cpuSet.Union(cpuset.New(cpu))
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ return cpuSet
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
// isHintSocketAligned function return true if numa nodes in hint are socket aligned.
|
||||||
|
func (p *staticPolicy) isHintSocketAligned(hint topologymanager.TopologyHint, minAffinitySize int) bool {
|
||||||
|
numaNodesBitMask := hint.NUMANodeAffinity.GetBits()
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||||
|
index 83614619550..ee70a833d33 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||||
|
@@ -30,6 +30,7 @@ import (
|
||||||
|
pkgfeatures "k8s.io/kubernetes/pkg/features"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
|
||||||
|
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
|
||||||
|
"k8s.io/utils/cpuset"
|
||||||
|
@@ -73,8 +74,9 @@ func (spt staticPolicyTest) PseudoClone() staticPolicyTest {
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStaticPolicyName(t *testing.T) {
|
||||||
|
+ testDM, _ := devicemanager.NewManagerStub()
|
||||||
|
testExcl := false
|
||||||
|
- policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.New(), topologymanager.NewFakeManager(), nil, testExcl)
|
||||||
|
+ policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.New(), cpuset.New(), topologymanager.NewFakeManager(), nil, testDM, testExcl)
|
||||||
|
|
||||||
|
policyName := policy.Name()
|
||||||
|
if policyName != "static" {
|
||||||
|
@@ -84,6 +86,7 @@ func TestStaticPolicyName(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStaticPolicyStart(t *testing.T) {
|
||||||
|
+ testDM, _ := devicemanager.NewManagerStub()
|
||||||
|
testCases := []staticPolicyTest{
|
||||||
|
{
|
||||||
|
description: "non-corrupted state",
|
||||||
|
@@ -159,7 +162,7 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||||
|
}
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
t.Run(testCase.description, func(t *testing.T) {
|
||||||
|
- p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, testCase.excludeReserved)
|
||||||
|
+ p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), cpuset.New(), topologymanager.NewFakeManager(), nil, testDM, testCase.excludeReserved)
|
||||||
|
|
||||||
|
policy := p.(*staticPolicy)
|
||||||
|
st := &mockState{
|
||||||
|
@@ -207,7 +210,6 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||||
|
largeTopoCPUSet := cpuset.New(largeTopoCPUids...)
|
||||||
|
largeTopoSock0CPUSet := cpuset.New(largeTopoSock0CPUids...)
|
||||||
|
largeTopoSock1CPUSet := cpuset.New(largeTopoSock1CPUids...)
|
||||||
|
-
|
||||||
|
// these are the cases which must behave the same regardless the policy options.
|
||||||
|
// So we will permutate the options to ensure this holds true.
|
||||||
|
|
||||||
|
@@ -630,7 +632,8 @@ func runStaticPolicyTestCase(t *testing.T, testCase staticPolicyTest) {
|
||||||
|
cpus = testCase.reservedCPUs.Clone()
|
||||||
|
}
|
||||||
|
testExcl := false
|
||||||
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpus, tm, testCase.options, testExcl)
|
||||||
|
+ testDM, _ := devicemanager.NewManagerStub()
|
||||||
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpus, cpus, tm, testCase.options, testDM, testExcl)
|
||||||
|
|
||||||
|
st := &mockState{
|
||||||
|
assignments: testCase.stAssignments,
|
||||||
|
@@ -677,6 +680,8 @@ func runStaticPolicyTestCaseWithFeatureGate(t *testing.T, testCase staticPolicyT
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStaticPolicyReuseCPUs(t *testing.T) {
|
||||||
|
+ excludeReserved := false
|
||||||
|
+ testDM, _ := devicemanager.NewManagerStub()
|
||||||
|
testCases := []struct {
|
||||||
|
staticPolicyTest
|
||||||
|
expCSetAfterAlloc cpuset.CPUSet
|
||||||
|
@@ -701,7 +706,7 @@ func TestStaticPolicyReuseCPUs(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, testCase.excludeReserved)
|
||||||
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), cpuset.New(), topologymanager.NewFakeManager(), nil, testDM, excludeReserved)
|
||||||
|
|
||||||
|
st := &mockState{
|
||||||
|
assignments: testCase.stAssignments,
|
||||||
|
@@ -733,6 +738,8 @@ func TestStaticPolicyReuseCPUs(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStaticPolicyDoNotReuseCPUs(t *testing.T) {
|
||||||
|
+ excludeReserved := false
|
||||||
|
+ testDM, _ := devicemanager.NewManagerStub()
|
||||||
|
testCases := []struct {
|
||||||
|
staticPolicyTest
|
||||||
|
expCSetAfterAlloc cpuset.CPUSet
|
||||||
|
@@ -754,7 +761,7 @@ func TestStaticPolicyDoNotReuseCPUs(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, testCase.excludeReserved)
|
||||||
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), cpuset.New(), topologymanager.NewFakeManager(), nil, testDM, excludeReserved)
|
||||||
|
|
||||||
|
st := &mockState{
|
||||||
|
assignments: testCase.stAssignments,
|
||||||
|
@@ -779,6 +786,7 @@ func TestStaticPolicyDoNotReuseCPUs(t *testing.T) {
|
||||||
|
|
||||||
|
func TestStaticPolicyRemove(t *testing.T) {
|
||||||
|
excludeReserved := false
|
||||||
|
+ testDM, _ := devicemanager.NewManagerStub()
|
||||||
|
testCases := []staticPolicyTest{
|
||||||
|
{
|
||||||
|
description: "SingleSocketHT, DeAllocOneContainer",
|
||||||
|
@@ -837,7 +845,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, excludeReserved)
|
||||||
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), cpuset.New(), topologymanager.NewFakeManager(), nil, testDM, excludeReserved)
|
||||||
|
|
||||||
|
st := &mockState{
|
||||||
|
assignments: testCase.stAssignments,
|
||||||
|
@@ -860,6 +868,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||||
|
|
||||||
|
func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||||
|
excludeReserved := false
|
||||||
|
+ testDM, _ := devicemanager.NewManagerStub()
|
||||||
|
testCases := []struct {
|
||||||
|
description string
|
||||||
|
topo *topology.CPUTopology
|
||||||
|
@@ -928,7 +937,8 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tc := range testCases {
|
||||||
|
- p, _ := NewStaticPolicy(tc.topo, 0, cpuset.New(), topologymanager.NewFakeManager(), nil, excludeReserved)
|
||||||
|
+ p, _ := NewStaticPolicy(tc.topo, 0, cpuset.New(), cpuset.New(), topologymanager.NewFakeManager(), nil, testDM, excludeReserved)
|
||||||
|
+
|
||||||
|
policy := p.(*staticPolicy)
|
||||||
|
st := &mockState{
|
||||||
|
assignments: tc.stAssignments,
|
||||||
|
@@ -961,6 +971,7 @@ type staticPolicyTestWithResvList struct {
|
||||||
|
topo *topology.CPUTopology
|
||||||
|
numReservedCPUs int
|
||||||
|
reserved cpuset.CPUSet
|
||||||
|
+ isolcpus cpuset.CPUSet
|
||||||
|
stAssignments state.ContainerCPUAssignments
|
||||||
|
stDefaultCPUSet cpuset.CPUSet
|
||||||
|
pod *v1.Pod
|
||||||
|
@@ -972,6 +983,8 @@ type staticPolicyTestWithResvList struct {
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||||
|
+ testDM, _ := devicemanager.NewManagerStub()
|
||||||
|
+ testExcl := false
|
||||||
|
testCases := []staticPolicyTestWithResvList{
|
||||||
|
{
|
||||||
|
description: "empty cpuset",
|
||||||
|
@@ -1001,10 +1014,9 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||||
|
expNewErr: fmt.Errorf("[cpumanager] unable to reserve the required amount of CPUs (size of 0-1 did not equal 1)"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
- testExcl := false
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
t.Run(testCase.description, func(t *testing.T) {
|
||||||
|
- p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil, testExcl)
|
||||||
|
+ p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, cpuset.New(), topologymanager.NewFakeManager(), nil, testDM, testExcl)
|
||||||
|
|
||||||
|
if !reflect.DeepEqual(err, testCase.expNewErr) {
|
||||||
|
t.Errorf("StaticPolicy Start() error (%v). expected error: %v but got: %v",
|
||||||
|
@@ -1053,6 +1065,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||||
|
topo: topoSingleSocketHT,
|
||||||
|
numReservedCPUs: 1,
|
||||||
|
reserved: cpuset.New(0),
|
||||||
|
+ isolcpus: cpuset.New(),
|
||||||
|
stAssignments: state.ContainerCPUAssignments{},
|
||||||
|
stDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7),
|
||||||
|
pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"),
|
||||||
|
@@ -1066,6 +1079,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||||
|
topo: topoSingleSocketHT,
|
||||||
|
numReservedCPUs: 2,
|
||||||
|
reserved: cpuset.New(0, 1),
|
||||||
|
+ isolcpus: cpuset.New(),
|
||||||
|
stAssignments: state.ContainerCPUAssignments{},
|
||||||
|
stDefaultCPUSet: cpuset.New(2, 3, 4, 5, 6, 7),
|
||||||
|
pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"),
|
||||||
|
@@ -1079,6 +1093,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||||
|
topo: topoSingleSocketHT,
|
||||||
|
numReservedCPUs: 2,
|
||||||
|
reserved: cpuset.New(0, 1),
|
||||||
|
+ isolcpus: cpuset.New(),
|
||||||
|
stAssignments: state.ContainerCPUAssignments{
|
||||||
|
"fakePod": map[string]cpuset.CPUSet{
|
||||||
|
"fakeContainer100": cpuset.New(2, 3, 6, 7),
|
||||||
|
@@ -1096,6 +1111,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||||
|
topo: topoSingleSocketHT,
|
||||||
|
numReservedCPUs: 2,
|
||||||
|
reserved: cpuset.New(0, 1),
|
||||||
|
+ isolcpus: cpuset.New(),
|
||||||
|
stAssignments: state.ContainerCPUAssignments{
|
||||||
|
"fakePod": map[string]cpuset.CPUSet{
|
||||||
|
"fakeContainer100": cpuset.New(2, 3, 6, 7),
|
||||||
|
@@ -1108,11 +1124,30 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||||
|
expCPUAlloc: true,
|
||||||
|
expCSet: cpuset.New(0, 1),
|
||||||
|
},
|
||||||
|
+ {
|
||||||
|
+ description: "InfraPod, SingleSocketHT, Isolcpus, ExpectAllocReserved",
|
||||||
|
+ topo: topoSingleSocketHT,
|
||||||
|
+ numReservedCPUs: 2,
|
||||||
|
+ reserved: cpuset.New(0, 1),
|
||||||
|
+ isolcpus: cpuset.New(1),
|
||||||
|
+ stAssignments: state.ContainerCPUAssignments{
|
||||||
|
+ "fakePod": map[string]cpuset.CPUSet{
|
||||||
|
+ "fakeContainer100": cpuset.New(2, 3, 6, 7),
|
||||||
|
+ },
|
||||||
|
+ },
|
||||||
|
+ stDefaultCPUSet: cpuset.New(4, 5),
|
||||||
|
+ pod: infraPod,
|
||||||
|
+ isKubeInfraPodfunc: fakeIsKubeInfraTrue,
|
||||||
|
+ expErr: nil,
|
||||||
|
+ expCPUAlloc: true,
|
||||||
|
+ expCSet: cpuset.New(0),
|
||||||
|
+ },
|
||||||
|
}
|
||||||
|
|
||||||
|
testExcl := true
|
||||||
|
+ testDM, _ := devicemanager.NewManagerStub()
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil, testExcl)
|
||||||
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, testCase.isolcpus, topologymanager.NewFakeManager(), nil, testDM, testExcl)
|
||||||
|
|
||||||
|
st := &mockState{
|
||||||
|
assignments: testCase.stAssignments,
|
||||||
|
diff --git a/pkg/kubelet/cm/devicemanager/manager_stub.go b/pkg/kubelet/cm/devicemanager/manager_stub.go
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..f0e725c6c9f
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/pkg/kubelet/cm/devicemanager/manager_stub.go
|
||||||
|
@@ -0,0 +1,110 @@
|
||||||
|
+/*
|
||||||
|
+Copyright 2017 The Kubernetes Authors.
|
||||||
|
+
|
||||||
|
+Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
+you may not use this file except in compliance with the License.
|
||||||
|
+You may obtain a copy of the License at
|
||||||
|
+
|
||||||
|
+ http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
+
|
||||||
|
+Unless required by applicable law or agreed to in writing, software
|
||||||
|
+distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
+See the License for the specific language governing permissions and
|
||||||
|
+limitations under the License.
|
||||||
|
+*/
|
||||||
|
+
|
||||||
|
+package devicemanager
|
||||||
|
+
|
||||||
|
+import (
|
||||||
|
+ v1 "k8s.io/api/core/v1"
|
||||||
|
+ "k8s.io/apimachinery/pkg/util/sets"
|
||||||
|
+ "k8s.io/kubernetes/pkg/kubelet/cm/containermap"
|
||||||
|
+ "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||||
|
+ "k8s.io/kubernetes/pkg/kubelet/config"
|
||||||
|
+ "k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||||
|
+ "k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
|
||||||
|
+ schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
|
||||||
|
+)
|
||||||
|
+
|
||||||
|
+// ManagerStub provides a simple stub implementation for the Device Manager.
|
||||||
|
+type ManagerStub struct {
|
||||||
|
+ // containerMap provides a mapping from (pod, container) -> containerID
|
||||||
|
+ // for all containers in a pod. Used to detect pods running across a restart
|
||||||
|
+ containerMap containermap.ContainerMap
|
||||||
|
+
|
||||||
|
+ // containerRunningSet identifies which container among those present in `containerMap`
|
||||||
|
+ // was reported running by the container runtime when `containerMap` was computed.
|
||||||
|
+ // Used to detect pods running across a restart
|
||||||
|
+ containerRunningSet sets.Set[string]
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+// NewManagerStub creates a ManagerStub.
|
||||||
|
+func NewManagerStub() (*ManagerStub, error) {
|
||||||
|
+ return &ManagerStub{}, nil
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+// Start simply returns nil.
|
||||||
|
+func (h *ManagerStub) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, initialContainers containermap.ContainerMap, initialContainerRunningSet sets.Set[string]) error {
|
||||||
|
+ return nil
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+// Stop simply returns nil.
|
||||||
|
+func (h *ManagerStub) Stop() error {
|
||||||
|
+ return nil
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+// Allocate simply returns nil.
|
||||||
|
+func (h *ManagerStub) Allocate(pod *v1.Pod, container *v1.Container) error {
|
||||||
|
+ return nil
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+// UpdatePluginResources simply returns nil.
|
||||||
|
+func (h *ManagerStub) UpdatePluginResources(node *schedulerframework.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error {
|
||||||
|
+ return nil
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+// GetDeviceRunContainerOptions simply returns nil.
|
||||||
|
+func (h *ManagerStub) GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Container) (*DeviceRunContainerOptions, error) {
|
||||||
|
+ return nil, nil
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+// GetCapacity simply returns nil capacity and empty removed resource list.
|
||||||
|
+func (h *ManagerStub) GetCapacity() (v1.ResourceList, v1.ResourceList, []string) {
|
||||||
|
+ return nil, nil, []string{}
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+// GetWatcherHandler returns plugin watcher interface
|
||||||
|
+func (h *ManagerStub) GetWatcherHandler() cache.PluginHandler {
|
||||||
|
+ return nil
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+// GetTopologyHints returns an empty TopologyHint map
|
||||||
|
+func (h *ManagerStub) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
|
||||||
|
+ return map[string][]topologymanager.TopologyHint{}
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+// GetPodTopologyHints returns an empty TopologyHint map
|
||||||
|
+func (h *ManagerStub) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint {
|
||||||
|
+ return map[string][]topologymanager.TopologyHint{}
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+// GetDevices returns nil
|
||||||
|
+func (h *ManagerStub) GetDevices(_, _ string) ResourceDeviceInstances {
|
||||||
|
+ return nil
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+// GetAllocatableDevices returns nothing
|
||||||
|
+func (h *ManagerStub) GetAllocatableDevices() ResourceDeviceInstances {
|
||||||
|
+ return nil
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+// ShouldResetExtendedResourceCapacity returns false
|
||||||
|
+func (h *ManagerStub) ShouldResetExtendedResourceCapacity() bool {
|
||||||
|
+ return false
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+// UpdateAllocatedDevices returns nothing
|
||||||
|
+func (h *ManagerStub) UpdateAllocatedDevices() {
|
||||||
|
+ return
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,366 @@
|
|||||||
|
From bd2514a9b62c61f2e98f199de98dca76348a8891 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||||
|
Date: Wed, 6 Mar 2024 03:50:23 -0500
|
||||||
|
Subject: [PATCH] kubelet cpumanager keep normal containers off reserved CPUs
|
||||||
|
|
||||||
|
When starting the kubelet process, two separate sets of reserved CPUs
|
||||||
|
may be specified. With this change CPUs reserved via
|
||||||
|
'--system-reserved=cpu'
|
||||||
|
or '--kube-reserved=cpu' will be ignored by kubernetes itself. This
|
||||||
|
explicitly excludes the reserved CPUS from the DefaultCPUset so
|
||||||
|
that pods cannot run there.
|
||||||
|
|
||||||
|
Co-authored-by: Jim Gauld <james.gauld@windriver.com>
|
||||||
|
Signed-off-by: Sachin Gopala Krishna <saching.krishna@windriver.com>
|
||||||
|
Signed-off-by: Ramesh Kumar Sivanandam <rameshkumar.sivanandam@windriver.com>
|
||||||
|
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||||
|
---
|
||||||
|
pkg/kubelet/cm/cpumanager/cpu_manager.go | 6 ++-
|
||||||
|
pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 19 ++++++---
|
||||||
|
pkg/kubelet/cm/cpumanager/policy_static.go | 30 +++++++++++---
|
||||||
|
.../cm/cpumanager/policy_static_test.go | 40 ++++++++++++++-----
|
||||||
|
4 files changed, 72 insertions(+), 23 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||||
|
index 2d1b6eefebe..e0a359932b7 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||||
|
@@ -192,7 +192,11 @@ func NewManager(cpuPolicyName string, cpuPolicyOptions map[string]string, reconc
|
||||||
|
// exclusively allocated.
|
||||||
|
reservedCPUsFloat := float64(reservedCPUs.MilliValue()) / 1000
|
||||||
|
numReservedCPUs := int(math.Ceil(reservedCPUsFloat))
|
||||||
|
- policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, cpuPolicyOptions)
|
||||||
|
+ // NOTE: Set excludeReserved unconditionally to exclude reserved CPUs from default cpuset.
|
||||||
|
+ // This variable is primarily to make testing easier.
|
||||||
|
+ excludeReserved := true
|
||||||
|
+ policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, cpuPolicyOptions, excludeReserved)
|
||||||
|
+
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("new static policy error: %w", err)
|
||||||
|
}
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||||
|
index f6563bbca23..e5f5d07a2ad 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||||
|
@@ -263,6 +263,7 @@ func makeMultiContainerPodWithOptions(initCPUs, appCPUs []*containerOptions) *v1
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCPUManagerAdd(t *testing.T) {
|
||||||
|
+ testExcl := false
|
||||||
|
testPolicy, _ := NewStaticPolicy(
|
||||||
|
&topology.CPUTopology{
|
||||||
|
NumCPUs: 4,
|
||||||
|
@@ -278,7 +279,8 @@ func TestCPUManagerAdd(t *testing.T) {
|
||||||
|
0,
|
||||||
|
cpuset.New(),
|
||||||
|
topologymanager.NewFakeManager(),
|
||||||
|
- nil)
|
||||||
|
+ nil,
|
||||||
|
+ testExcl)
|
||||||
|
testCases := []struct {
|
||||||
|
description string
|
||||||
|
updateErr error
|
||||||
|
@@ -527,8 +529,9 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) {
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
+ testExcl := false
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil)
|
||||||
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, testExcl)
|
||||||
|
|
||||||
|
mockState := &mockState{
|
||||||
|
assignments: testCase.stAssignments,
|
||||||
|
@@ -753,6 +756,7 @@ func TestCPUManagerRemove(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestReconcileState(t *testing.T) {
|
||||||
|
+ testExcl := false
|
||||||
|
testPolicy, _ := NewStaticPolicy(
|
||||||
|
&topology.CPUTopology{
|
||||||
|
NumCPUs: 8,
|
||||||
|
@@ -772,7 +776,8 @@ func TestReconcileState(t *testing.T) {
|
||||||
|
0,
|
||||||
|
cpuset.New(),
|
||||||
|
topologymanager.NewFakeManager(),
|
||||||
|
- nil)
|
||||||
|
+ nil,
|
||||||
|
+ testExcl)
|
||||||
|
|
||||||
|
testCases := []struct {
|
||||||
|
description string
|
||||||
|
@@ -1276,6 +1281,7 @@ func TestReconcileState(t *testing.T) {
|
||||||
|
// above test cases are without kubelet --reserved-cpus cmd option
|
||||||
|
// the following tests are with --reserved-cpus configured
|
||||||
|
func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||||
|
+ testExcl := false
|
||||||
|
testPolicy, _ := NewStaticPolicy(
|
||||||
|
&topology.CPUTopology{
|
||||||
|
NumCPUs: 4,
|
||||||
|
@@ -1291,7 +1297,8 @@ func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||||
|
1,
|
||||||
|
cpuset.New(0),
|
||||||
|
topologymanager.NewFakeManager(),
|
||||||
|
- nil)
|
||||||
|
+ nil,
|
||||||
|
+ testExcl)
|
||||||
|
testCases := []struct {
|
||||||
|
description string
|
||||||
|
updateErr error
|
||||||
|
@@ -1416,6 +1423,7 @@ func TestCPUManagerHandlePolicyOptions(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCPUManagerGetAllocatableCPUs(t *testing.T) {
|
||||||
|
+ testExcl := false
|
||||||
|
nonePolicy, _ := NewNonePolicy(nil)
|
||||||
|
staticPolicy, _ := NewStaticPolicy(
|
||||||
|
&topology.CPUTopology{
|
||||||
|
@@ -1432,7 +1440,8 @@ func TestCPUManagerGetAllocatableCPUs(t *testing.T) {
|
||||||
|
1,
|
||||||
|
cpuset.New(0),
|
||||||
|
topologymanager.NewFakeManager(),
|
||||||
|
- nil)
|
||||||
|
+ nil,
|
||||||
|
+ testExcl)
|
||||||
|
|
||||||
|
testCases := []struct {
|
||||||
|
description string
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||||
|
index d22a6a64d5e..9690eedab58 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||||
|
@@ -106,6 +106,8 @@ type staticPolicy struct {
|
||||||
|
topology *topology.CPUTopology
|
||||||
|
// set of CPUs that is not available for exclusive assignment
|
||||||
|
reservedCPUs cpuset.CPUSet
|
||||||
|
+ // If true, default CPUSet should exclude reserved CPUs
|
||||||
|
+ excludeReserved bool
|
||||||
|
// Superset of reservedCPUs. It includes not just the reservedCPUs themselves,
|
||||||
|
// but also any siblings of those reservedCPUs on the same physical die.
|
||||||
|
// NOTE: If the reserved set includes full physical CPUs from the beginning
|
||||||
|
@@ -126,7 +128,7 @@ var _ Policy = &staticPolicy{}
|
||||||
|
// NewStaticPolicy returns a CPU manager policy that does not change CPU
|
||||||
|
// assignments for exclusively pinned guaranteed containers after the main
|
||||||
|
// container process starts.
|
||||||
|
-func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, cpuPolicyOptions map[string]string) (Policy, error) {
|
||||||
|
+func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, cpuPolicyOptions map[string]string, excludeReserved bool) (Policy, error) {
|
||||||
|
opts, err := NewStaticPolicyOptions(cpuPolicyOptions)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
@@ -141,6 +143,7 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
||||||
|
policy := &staticPolicy{
|
||||||
|
topology: topology,
|
||||||
|
affinity: affinity,
|
||||||
|
+ excludeReserved: excludeReserved,
|
||||||
|
cpusToReuse: make(map[string]cpuset.CPUSet),
|
||||||
|
options: opts,
|
||||||
|
}
|
||||||
|
@@ -202,7 +205,15 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||||
|
}
|
||||||
|
// state is empty initialize
|
||||||
|
allCPUs := p.topology.CPUDetails.CPUs()
|
||||||
|
- s.SetDefaultCPUSet(allCPUs)
|
||||||
|
+ if p.excludeReserved {
|
||||||
|
+ // Exclude reserved CPUs from the default CPUSet to keep containers off them
|
||||||
|
+ // unless explicitly affined.
|
||||||
|
+ s.SetDefaultCPUSet(allCPUs.Difference(p.reservedCPUs))
|
||||||
|
+ } else {
|
||||||
|
+ s.SetDefaultCPUSet(allCPUs)
|
||||||
|
+ }
|
||||||
|
+ klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, default:%v\n",
|
||||||
|
+ allCPUs, p.reservedCPUs, s.GetDefaultCPUSet())
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -210,11 +221,12 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||||
|
// 1. Check if the reserved cpuset is not part of default cpuset because:
|
||||||
|
// - kube/system reserved have changed (increased) - may lead to some containers not being able to start
|
||||||
|
// - user tampered with file
|
||||||
|
- if !p.reservedCPUs.Intersection(tmpDefaultCPUset).Equals(p.reservedCPUs) {
|
||||||
|
- return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||||
|
- p.reservedCPUs.String(), tmpDefaultCPUset.String())
|
||||||
|
+ if !p.excludeReserved {
|
||||||
|
+ if !p.reservedCPUs.Intersection(tmpDefaultCPUset).Equals(p.reservedCPUs) {
|
||||||
|
+ return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||||
|
+ p.reservedCPUs.String(), tmpDefaultCPUset.String())
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
-
|
||||||
|
// 2. Check if state for static policy is consistent
|
||||||
|
for pod := range tmpAssignments {
|
||||||
|
for container, cset := range tmpAssignments[pod] {
|
||||||
|
@@ -241,6 +253,9 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
totalKnownCPUs = totalKnownCPUs.Union(tmpCPUSets...)
|
||||||
|
+ if p.excludeReserved {
|
||||||
|
+ totalKnownCPUs = totalKnownCPUs.Union(p.reservedCPUs)
|
||||||
|
+ }
|
||||||
|
if !totalKnownCPUs.Equals(p.topology.CPUDetails.CPUs()) {
|
||||||
|
return fmt.Errorf("current set of available CPUs \"%s\" doesn't match with CPUs in state \"%s\"",
|
||||||
|
p.topology.CPUDetails.CPUs().String(), totalKnownCPUs.String())
|
||||||
|
@@ -381,6 +396,9 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa
|
||||||
|
cpusInUse := getAssignedCPUsOfSiblings(s, podUID, containerName)
|
||||||
|
if toRelease, ok := s.GetCPUSet(podUID, containerName); ok {
|
||||||
|
s.Delete(podUID, containerName)
|
||||||
|
+ if p.excludeReserved {
|
||||||
|
+ toRelease = toRelease.Difference(p.reservedCPUs)
|
||||||
|
+ }
|
||||||
|
// Mutate the shared pool, adding released cpus.
|
||||||
|
toRelease = toRelease.Difference(cpusInUse)
|
||||||
|
s.SetDefaultCPUSet(s.GetDefaultCPUSet().Union(toRelease))
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||||
|
index 8cc3fc2be74..b060201e156 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||||
|
@@ -36,6 +36,7 @@ type staticPolicyTest struct {
|
||||||
|
description string
|
||||||
|
topo *topology.CPUTopology
|
||||||
|
numReservedCPUs int
|
||||||
|
+ excludeReserved bool
|
||||||
|
reservedCPUs *cpuset.CPUSet
|
||||||
|
podUID string
|
||||||
|
options map[string]string
|
||||||
|
@@ -69,7 +70,8 @@ func (spt staticPolicyTest) PseudoClone() staticPolicyTest {
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStaticPolicyName(t *testing.T) {
|
||||||
|
- policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.New(), topologymanager.NewFakeManager(), nil)
|
||||||
|
+ testExcl := false
|
||||||
|
+ policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.New(), topologymanager.NewFakeManager(), nil, testExcl)
|
||||||
|
|
||||||
|
policyName := policy.Name()
|
||||||
|
if policyName != "static" {
|
||||||
|
@@ -99,6 +101,15 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||||
|
stDefaultCPUSet: cpuset.New(),
|
||||||
|
expCSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
|
||||||
|
},
|
||||||
|
+ {
|
||||||
|
+ description: "empty cpuset exclude reserved",
|
||||||
|
+ topo: topoDualSocketHT,
|
||||||
|
+ numReservedCPUs: 2,
|
||||||
|
+ excludeReserved: true,
|
||||||
|
+ stAssignments: state.ContainerCPUAssignments{},
|
||||||
|
+ stDefaultCPUSet: cpuset.New(),
|
||||||
|
+ expCSet: cpuset.New(1, 2, 3, 4, 5, 7, 8, 9, 10, 11),
|
||||||
|
+ },
|
||||||
|
{
|
||||||
|
description: "reserved cores 0 & 6 are not present in available cpuset",
|
||||||
|
topo: topoDualSocketHT,
|
||||||
|
@@ -145,7 +156,8 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||||
|
}
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
t.Run(testCase.description, func(t *testing.T) {
|
||||||
|
- p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil)
|
||||||
|
+ p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, testCase.excludeReserved)
|
||||||
|
+
|
||||||
|
policy := p.(*staticPolicy)
|
||||||
|
st := &mockState{
|
||||||
|
assignments: testCase.stAssignments,
|
||||||
|
@@ -614,7 +626,8 @@ func runStaticPolicyTestCase(t *testing.T, testCase staticPolicyTest) {
|
||||||
|
if testCase.reservedCPUs != nil {
|
||||||
|
cpus = testCase.reservedCPUs.Clone()
|
||||||
|
}
|
||||||
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpus, tm, testCase.options)
|
||||||
|
+ testExcl := false
|
||||||
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpus, tm, testCase.options, testExcl)
|
||||||
|
|
||||||
|
st := &mockState{
|
||||||
|
assignments: testCase.stAssignments,
|
||||||
|
@@ -685,7 +698,7 @@ func TestStaticPolicyReuseCPUs(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil)
|
||||||
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, testCase.excludeReserved)
|
||||||
|
|
||||||
|
st := &mockState{
|
||||||
|
assignments: testCase.stAssignments,
|
||||||
|
@@ -738,7 +751,7 @@ func TestStaticPolicyDoNotReuseCPUs(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil)
|
||||||
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, testCase.excludeReserved)
|
||||||
|
|
||||||
|
st := &mockState{
|
||||||
|
assignments: testCase.stAssignments,
|
||||||
|
@@ -762,6 +775,7 @@ func TestStaticPolicyDoNotReuseCPUs(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStaticPolicyRemove(t *testing.T) {
|
||||||
|
+ excludeReserved := false
|
||||||
|
testCases := []staticPolicyTest{
|
||||||
|
{
|
||||||
|
description: "SingleSocketHT, DeAllocOneContainer",
|
||||||
|
@@ -820,7 +834,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil)
|
||||||
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, excludeReserved)
|
||||||
|
|
||||||
|
st := &mockState{
|
||||||
|
assignments: testCase.stAssignments,
|
||||||
|
@@ -842,6 +856,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||||
|
+ excludeReserved := false
|
||||||
|
testCases := []struct {
|
||||||
|
description string
|
||||||
|
topo *topology.CPUTopology
|
||||||
|
@@ -910,7 +925,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tc := range testCases {
|
||||||
|
- p, _ := NewStaticPolicy(tc.topo, 0, cpuset.New(), topologymanager.NewFakeManager(), nil)
|
||||||
|
+ p, _ := NewStaticPolicy(tc.topo, 0, cpuset.New(), topologymanager.NewFakeManager(), nil, excludeReserved)
|
||||||
|
policy := p.(*staticPolicy)
|
||||||
|
st := &mockState{
|
||||||
|
assignments: tc.stAssignments,
|
||||||
|
@@ -982,9 +997,11 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||||
|
expNewErr: fmt.Errorf("[cpumanager] unable to reserve the required amount of CPUs (size of 0-1 did not equal 1)"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
+ testExcl := false
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
t.Run(testCase.description, func(t *testing.T) {
|
||||||
|
- p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil)
|
||||||
|
+ p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil, testExcl)
|
||||||
|
+
|
||||||
|
if !reflect.DeepEqual(err, testCase.expNewErr) {
|
||||||
|
t.Errorf("StaticPolicy Start() error (%v). expected error: %v but got: %v",
|
||||||
|
testCase.description, testCase.expNewErr, err)
|
||||||
|
@@ -1024,7 +1041,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||||
|
numReservedCPUs: 1,
|
||||||
|
reserved: cpuset.New(0),
|
||||||
|
stAssignments: state.ContainerCPUAssignments{},
|
||||||
|
- stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7),
|
||||||
|
+ stDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7),
|
||||||
|
pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"),
|
||||||
|
expErr: fmt.Errorf("not enough cpus available to satisfy request: requested=8, available=7"),
|
||||||
|
expCPUAlloc: false,
|
||||||
|
@@ -1036,7 +1053,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||||
|
numReservedCPUs: 2,
|
||||||
|
reserved: cpuset.New(0, 1),
|
||||||
|
stAssignments: state.ContainerCPUAssignments{},
|
||||||
|
- stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7),
|
||||||
|
+ stDefaultCPUSet: cpuset.New(2, 3, 4, 5, 6, 7),
|
||||||
|
pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"),
|
||||||
|
expErr: nil,
|
||||||
|
expCPUAlloc: true,
|
||||||
|
@@ -1060,8 +1077,9 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
+ testExcl := true
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil)
|
||||||
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil, testExcl)
|
||||||
|
|
||||||
|
st := &mockState{
|
||||||
|
assignments: testCase.stAssignments,
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,556 @@
|
|||||||
|
From 4a28be032c61624f9b16d69ae3a7f699b6a62f51 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||||
|
Date: Tue, 5 Sep 2023 06:27:39 -0400
|
||||||
|
Subject: [PATCH] Namespace-Label-Based Pod Identification and CPU Management
|
||||||
|
for Infra Pods
|
||||||
|
|
||||||
|
This change assigns system infrastructure pods to the "reserved"
|
||||||
|
cpuset to isolate them from the shared pool of CPUs. Kubernetes
|
||||||
|
infrastructure pods are identified based on namespace 'kube-system'.
|
||||||
|
Platform pods are identified based on namespace 'kube-system',
|
||||||
|
or label with 'app.starlingx.io/component=platform'.
|
||||||
|
|
||||||
|
The platform and infrastructure pods are given an isolated CPU
|
||||||
|
affinity cpuset when the CPU manager is configured "with the 'static'
|
||||||
|
policy."
|
||||||
|
|
||||||
|
This implementation assumes that the "reserved" cpuset is large
|
||||||
|
enough to handle all infrastructure and platform pod's CPU
|
||||||
|
allocations, and it prevents the platform pods from running on
|
||||||
|
application/isolated CPUs regardless of what QoS class they're in.
|
||||||
|
|
||||||
|
Co-authored-by: Jim Gauld <james.gauld@windriver.com>
|
||||||
|
Signed-off-by: Gleb Aronsky <gleb.aronsky@windriver.com>
|
||||||
|
Signed-off-by: Thiago Miranda <ThiagoOliveira.Miranda@windriver.com>
|
||||||
|
Signed-off-by: Kaustubh Dhokte <kaustubh.dhokte@windriver.com>
|
||||||
|
Signed-off-by: Ramesh Kumar Sivanandam <rameshkumar.sivanandam@windriver.com>
|
||||||
|
Signed-off-by: Sachin Gopala Krishna <saching.krishna@windriver.com>
|
||||||
|
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||||
|
Signed-off-by: Saba Touheed Mujawar <sabatouheed.mujawar@windriver.com>
|
||||||
|
---
|
||||||
|
pkg/kubelet/cm/cpumanager/policy_static.go | 121 ++++++++-
|
||||||
|
.../cm/cpumanager/policy_static_test.go | 233 +++++++++++++++---
|
||||||
|
.../cm/cpumanager/topology_hints_test.go | 4 +
|
||||||
|
3 files changed, 316 insertions(+), 42 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||||
|
index 9690eedab58..fff571491f0 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||||
|
@@ -17,11 +17,17 @@ limitations under the License.
|
||||||
|
package cpumanager
|
||||||
|
|
||||||
|
import (
|
||||||
|
+ "context"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
v1 "k8s.io/api/core/v1"
|
||||||
|
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||||
|
+ k8sclient "k8s.io/client-go/kubernetes"
|
||||||
|
+ restclient "k8s.io/client-go/rest"
|
||||||
|
+ "k8s.io/client-go/tools/clientcmd"
|
||||||
|
"k8s.io/klog/v2"
|
||||||
|
+ "k8s.io/kubernetes/cmd/kubeadm/app/constants"
|
||||||
|
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||||
|
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
|
||||||
|
"k8s.io/kubernetes/pkg/features"
|
||||||
|
@@ -44,6 +50,22 @@ const (
|
||||||
|
ErrorSMTAlignment = "SMTAlignmentError"
|
||||||
|
)
|
||||||
|
|
||||||
|
+// Declared as variables so that they can easily more
|
||||||
|
+// overridden during testing
|
||||||
|
+type getPodNamespace func(string) (*v1.Namespace, error)
|
||||||
|
+type buildFromConfigFlag func(masterUrl string, kubeconfigPath string) (*restclient.Config, error)
|
||||||
|
+type isKubeInfraFunc func(pod *v1.Pod) bool
|
||||||
|
+
|
||||||
|
+var varGetNamespaceObject getPodNamespace
|
||||||
|
+var varBuildConfigFromFlags buildFromConfigFlag
|
||||||
|
+var varIsKubeInfra isKubeInfraFunc
|
||||||
|
+
|
||||||
|
+func init() {
|
||||||
|
+ varIsKubeInfra = isKubeInfra
|
||||||
|
+ varGetNamespaceObject = getPodNamespaceObject
|
||||||
|
+ varBuildConfigFromFlags = clientcmd.BuildConfigFromFlags
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
// SMTAlignmentError represents an error due to SMT alignment
|
||||||
|
type SMTAlignmentError struct {
|
||||||
|
RequestedCPUs int
|
||||||
|
@@ -141,11 +163,11 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
||||||
|
klog.InfoS("Static policy created with configuration", "options", opts)
|
||||||
|
|
||||||
|
policy := &staticPolicy{
|
||||||
|
- topology: topology,
|
||||||
|
- affinity: affinity,
|
||||||
|
+ topology: topology,
|
||||||
|
+ affinity: affinity,
|
||||||
|
excludeReserved: excludeReserved,
|
||||||
|
- cpusToReuse: make(map[string]cpuset.CPUSet),
|
||||||
|
- options: opts,
|
||||||
|
+ cpusToReuse: make(map[string]cpuset.CPUSet),
|
||||||
|
+ options: opts,
|
||||||
|
}
|
||||||
|
|
||||||
|
allCPUs := topology.CPUDetails.CPUs()
|
||||||
|
@@ -223,8 +245,8 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||||
|
// - user tampered with file
|
||||||
|
if !p.excludeReserved {
|
||||||
|
if !p.reservedCPUs.Intersection(tmpDefaultCPUset).Equals(p.reservedCPUs) {
|
||||||
|
- return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||||
|
- p.reservedCPUs.String(), tmpDefaultCPUset.String())
|
||||||
|
+ return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||||
|
+ p.reservedCPUs.String(), tmpDefaultCPUset.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 2. Check if state for static policy is consistent
|
||||||
|
@@ -309,6 +331,25 @@ func (p *staticPolicy) updateCPUsToReuse(pod *v1.Pod, container *v1.Container, c
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) (rerr error) {
|
||||||
|
+ // Process infra pods before guaranteed pods
|
||||||
|
+ if varIsKubeInfra(pod) {
|
||||||
|
+ // Container belongs in reserved pool.
|
||||||
|
+ // We don't want to fall through to the p.guaranteedCPUs() clause below so return either nil or error.
|
||||||
|
+ if _, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
|
||||||
|
+ klog.Infof("[cpumanager] static policy: reserved container already present in state, skipping (namespace: %s, pod UID: %s, pod: %s, container: %s)", pod.Namespace, string(pod.UID), pod.Name, container.Name)
|
||||||
|
+ return nil
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ cpuset := p.reservedCPUs
|
||||||
|
+ if cpuset.IsEmpty() {
|
||||||
|
+ // If this happens then someone messed up.
|
||||||
|
+ return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reservedCPUs)
|
||||||
|
+ }
|
||||||
|
+ s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
||||||
|
+ klog.Infof("[cpumanager] static policy: reserved: AddContainer (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset)
|
||||||
|
+ return nil
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
numCPUs := p.guaranteedCPUs(pod, container)
|
||||||
|
if numCPUs == 0 {
|
||||||
|
// container belongs in the shared pool (nothing to do; use default cpuset)
|
||||||
|
@@ -460,6 +501,10 @@ func (p *staticPolicy) guaranteedCPUs(pod *v1.Pod, container *v1.Container) int
|
||||||
|
if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
+ // Infrastructure pods use reserved CPUs even if they're in the Guaranteed QoS class
|
||||||
|
+ if varIsKubeInfra(pod) {
|
||||||
|
+ return 0
|
||||||
|
+ }
|
||||||
|
// Safe downcast to do for all systems with < 2.1 billion CPUs.
|
||||||
|
// Per the language spec, `int` is guaranteed to be at least 32 bits wide.
|
||||||
|
// https://golang.org/ref/spec#Numeric_types
|
||||||
|
@@ -685,6 +730,70 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu
|
||||||
|
return hints
|
||||||
|
}
|
||||||
|
|
||||||
|
+func getPodNamespaceObject(podNamespaceName string) (*v1.Namespace, error) {
|
||||||
|
+
|
||||||
|
+ kubeConfigPath := constants.GetKubeletKubeConfigPath()
|
||||||
|
+ cfg, err := varBuildConfigFromFlags("", kubeConfigPath)
|
||||||
|
+ if err != nil {
|
||||||
|
+ klog.Error("Failed to build client config from ", kubeConfigPath, err.Error())
|
||||||
|
+ return nil, err
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ clientset, err := k8sclient.NewForConfig(cfg)
|
||||||
|
+ if err != nil {
|
||||||
|
+ klog.Error("Failed to get clientset for KUBECONFIG ", kubeConfigPath, err.Error())
|
||||||
|
+ return nil, err
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ namespaceObj, err := clientset.CoreV1().Namespaces().Get(context.TODO(), podNamespaceName, metav1.GetOptions{})
|
||||||
|
+ if err != nil {
|
||||||
|
+ klog.Error("Error getting namespace object:", err.Error())
|
||||||
|
+ return nil, err
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return namespaceObj, nil
|
||||||
|
+
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+// check if a given pod is labelled as platform pod or
|
||||||
|
+// is in a namespace labelled as a platform namespace
|
||||||
|
+func isKubeInfra(pod *v1.Pod) bool {
|
||||||
|
+
|
||||||
|
+ podName := pod.GetName()
|
||||||
|
+ podNamespaceName := pod.GetNamespace()
|
||||||
|
+
|
||||||
|
+ if podNamespaceName == "kube-system" {
|
||||||
|
+ klog.Infof("Pod %s has %s namespace. Treating as platform pod.", podName, podNamespaceName)
|
||||||
|
+ return true
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ klog.InfoS("Checking pod ", podName, " for label 'app.starlingx.io/component=platform'.")
|
||||||
|
+ podLabels := pod.GetLabels()
|
||||||
|
+ val, ok := podLabels["app.starlingx.io/component"]
|
||||||
|
+ if ok && val == "platform" {
|
||||||
|
+ klog.InfoS("Pod ", podName, " has 'app.starlingx.io/component=platform' label. Treating as platform pod.")
|
||||||
|
+ return true
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ klog.V(4).InfoS("Pod ", pod.GetName(), " does not have 'app.starlingx.io/component=platform' label. Checking its namespace information...")
|
||||||
|
+
|
||||||
|
+ namespaceObj, err := varGetNamespaceObject(podNamespaceName)
|
||||||
|
+ if err != nil {
|
||||||
|
+ return false
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ namespaceLabels := namespaceObj.GetLabels()
|
||||||
|
+ val, ok = namespaceLabels["app.starlingx.io/component"]
|
||||||
|
+ if ok && val == "platform" {
|
||||||
|
+ klog.InfoS("For pod: ", podName, ", its Namespace ", podNamespaceName, " has 'app.starlingx.io/component=platform' label. Treating as platform pod.")
|
||||||
|
+ return true
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ klog.InfoS("Neither pod ", podName, " nor its namespace ", podNamespaceName, " has 'app.starlingx.io/component=platform' label. Not treating as platform pod.")
|
||||||
|
+ return false
|
||||||
|
+
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
// isHintSocketAligned function return true if numa nodes in hint are socket aligned.
|
||||||
|
func (p *staticPolicy) isHintSocketAligned(hint topologymanager.TopologyHint, minAffinitySize int) bool {
|
||||||
|
numaNodesBitMask := hint.NUMANodeAffinity.GetBits()
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||||
|
index b060201e156..83614619550 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||||
|
@@ -17,12 +17,15 @@ limitations under the License.
|
||||||
|
package cpumanager
|
||||||
|
|
||||||
|
import (
|
||||||
|
+ "errors"
|
||||||
|
"fmt"
|
||||||
|
"reflect"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
v1 "k8s.io/api/core/v1"
|
||||||
|
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||||
|
+ restclient "k8s.io/client-go/rest"
|
||||||
|
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||||
|
pkgfeatures "k8s.io/kubernetes/pkg/features"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
|
||||||
|
@@ -109,7 +112,7 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||||
|
stAssignments: state.ContainerCPUAssignments{},
|
||||||
|
stDefaultCPUSet: cpuset.New(),
|
||||||
|
expCSet: cpuset.New(1, 2, 3, 4, 5, 7, 8, 9, 10, 11),
|
||||||
|
- },
|
||||||
|
+ },
|
||||||
|
{
|
||||||
|
description: "reserved cores 0 & 6 are not present in available cpuset",
|
||||||
|
topo: topoDualSocketHT,
|
||||||
|
@@ -954,17 +957,18 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||||
|
// above test cases are without kubelet --reserved-cpus cmd option
|
||||||
|
// the following tests are with --reserved-cpus configured
|
||||||
|
type staticPolicyTestWithResvList struct {
|
||||||
|
- description string
|
||||||
|
- topo *topology.CPUTopology
|
||||||
|
- numReservedCPUs int
|
||||||
|
- reserved cpuset.CPUSet
|
||||||
|
- stAssignments state.ContainerCPUAssignments
|
||||||
|
- stDefaultCPUSet cpuset.CPUSet
|
||||||
|
- pod *v1.Pod
|
||||||
|
- expErr error
|
||||||
|
- expNewErr error
|
||||||
|
- expCPUAlloc bool
|
||||||
|
- expCSet cpuset.CPUSet
|
||||||
|
+ description string
|
||||||
|
+ topo *topology.CPUTopology
|
||||||
|
+ numReservedCPUs int
|
||||||
|
+ reserved cpuset.CPUSet
|
||||||
|
+ stAssignments state.ContainerCPUAssignments
|
||||||
|
+ stDefaultCPUSet cpuset.CPUSet
|
||||||
|
+ pod *v1.Pod
|
||||||
|
+ isKubeInfraPodfunc isKubeInfraFunc
|
||||||
|
+ expErr error
|
||||||
|
+ expNewErr error
|
||||||
|
+ expCPUAlloc bool
|
||||||
|
+ expCSet cpuset.CPUSet
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||||
|
@@ -1032,35 +1036,63 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
-func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||||
|
+func fakeIsKubeInfraTrue(pod *v1.Pod) bool {
|
||||||
|
+ return true
|
||||||
|
+}
|
||||||
|
|
||||||
|
+func fakeIsKubeInfraFalse(pod *v1.Pod) bool {
|
||||||
|
+ return false
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||||
|
+ infraPod := makePod("fakePod", "fakeContainer2", "200m", "200m")
|
||||||
|
+ infraPod.Namespace = "kube-system"
|
||||||
|
testCases := []staticPolicyTestWithResvList{
|
||||||
|
{
|
||||||
|
- description: "GuPodSingleCore, SingleSocketHT, ExpectError",
|
||||||
|
- topo: topoSingleSocketHT,
|
||||||
|
- numReservedCPUs: 1,
|
||||||
|
- reserved: cpuset.New(0),
|
||||||
|
- stAssignments: state.ContainerCPUAssignments{},
|
||||||
|
- stDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7),
|
||||||
|
- pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"),
|
||||||
|
- expErr: fmt.Errorf("not enough cpus available to satisfy request: requested=8, available=7"),
|
||||||
|
- expCPUAlloc: false,
|
||||||
|
- expCSet: cpuset.New(),
|
||||||
|
+ description: "GuPodSingleCore, SingleSocketHT, ExpectError",
|
||||||
|
+ topo: topoSingleSocketHT,
|
||||||
|
+ numReservedCPUs: 1,
|
||||||
|
+ reserved: cpuset.New(0),
|
||||||
|
+ stAssignments: state.ContainerCPUAssignments{},
|
||||||
|
+ stDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7),
|
||||||
|
+ pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"),
|
||||||
|
+ isKubeInfraPodfunc: fakeIsKubeInfraFalse,
|
||||||
|
+ expErr: fmt.Errorf("not enough cpus available to satisfy request: requested=8, available=7"),
|
||||||
|
+ expCPUAlloc: false,
|
||||||
|
+ expCSet: cpuset.New(),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- description: "GuPodSingleCore, SingleSocketHT, ExpectAllocOneCPU",
|
||||||
|
+ description: "GuPodSingleCore, SingleSocketHT, ExpectAllocOneCPU",
|
||||||
|
+ topo: topoSingleSocketHT,
|
||||||
|
+ numReservedCPUs: 2,
|
||||||
|
+ reserved: cpuset.New(0, 1),
|
||||||
|
+ stAssignments: state.ContainerCPUAssignments{},
|
||||||
|
+ stDefaultCPUSet: cpuset.New(2, 3, 4, 5, 6, 7),
|
||||||
|
+ pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"),
|
||||||
|
+ isKubeInfraPodfunc: fakeIsKubeInfraFalse,
|
||||||
|
+ expErr: nil,
|
||||||
|
+ expCPUAlloc: true,
|
||||||
|
+ expCSet: cpuset.New(4), // expect sibling of partial core
|
||||||
|
+ },
|
||||||
|
+ {
|
||||||
|
+ description: "GuPodMultipleCores, SingleSocketHT, ExpectAllocOneCore",
|
||||||
|
topo: topoSingleSocketHT,
|
||||||
|
numReservedCPUs: 2,
|
||||||
|
reserved: cpuset.New(0, 1),
|
||||||
|
- stAssignments: state.ContainerCPUAssignments{},
|
||||||
|
- stDefaultCPUSet: cpuset.New(2, 3, 4, 5, 6, 7),
|
||||||
|
- pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"),
|
||||||
|
- expErr: nil,
|
||||||
|
- expCPUAlloc: true,
|
||||||
|
- expCSet: cpuset.New(4), // expect sibling of partial core
|
||||||
|
+ stAssignments: state.ContainerCPUAssignments{
|
||||||
|
+ "fakePod": map[string]cpuset.CPUSet{
|
||||||
|
+ "fakeContainer100": cpuset.New(2, 3, 6, 7),
|
||||||
|
+ },
|
||||||
|
+ },
|
||||||
|
+ stDefaultCPUSet: cpuset.New(0, 1, 4, 5),
|
||||||
|
+ pod: makePod("fakePod", "fakeContainer3", "2000m", "2000m"),
|
||||||
|
+ isKubeInfraPodfunc: fakeIsKubeInfraFalse,
|
||||||
|
+ expErr: nil,
|
||||||
|
+ expCPUAlloc: true,
|
||||||
|
+ expCSet: cpuset.New(4, 5),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- description: "GuPodMultipleCores, SingleSocketHT, ExpectAllocOneCore",
|
||||||
|
+ description: "InfraPod, SingleSocketHT, ExpectAllocReserved",
|
||||||
|
topo: topoSingleSocketHT,
|
||||||
|
numReservedCPUs: 2,
|
||||||
|
reserved: cpuset.New(0, 1),
|
||||||
|
@@ -1069,11 +1101,12 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||||
|
"fakeContainer100": cpuset.New(2, 3, 6, 7),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
- stDefaultCPUSet: cpuset.New(0, 1, 4, 5),
|
||||||
|
- pod: makePod("fakePod", "fakeContainer3", "2000m", "2000m"),
|
||||||
|
- expErr: nil,
|
||||||
|
- expCPUAlloc: true,
|
||||||
|
- expCSet: cpuset.New(4, 5),
|
||||||
|
+ stDefaultCPUSet: cpuset.New(4, 5),
|
||||||
|
+ pod: infraPod,
|
||||||
|
+ isKubeInfraPodfunc: fakeIsKubeInfraTrue,
|
||||||
|
+ expErr: nil,
|
||||||
|
+ expCPUAlloc: true,
|
||||||
|
+ expCSet: cpuset.New(0, 1),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -1086,6 +1119,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||||
|
defaultCPUSet: testCase.stDefaultCPUSet,
|
||||||
|
}
|
||||||
|
|
||||||
|
+ varIsKubeInfra = testCase.isKubeInfraPodfunc
|
||||||
|
container := &testCase.pod.Spec.Containers[0]
|
||||||
|
err := policy.Allocate(st, testCase.pod, container)
|
||||||
|
if !reflect.DeepEqual(err, testCase.expErr) {
|
||||||
|
@@ -1206,6 +1240,133 @@ func TestStaticPolicyOptions(t *testing.T) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+func makePodWithLabels(podLabels map[string]string) *v1.Pod {
|
||||||
|
+ return &v1.Pod{
|
||||||
|
+ ObjectMeta: metav1.ObjectMeta{
|
||||||
|
+ Name: "test-pod",
|
||||||
|
+ Namespace: "test-namespace",
|
||||||
|
+ Labels: podLabels,
|
||||||
|
+ },
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+func fakeBuildConfigFromFlags(masterUrl string, kubeconfigPath string) (*restclient.Config, error) {
|
||||||
|
+
|
||||||
|
+ return &restclient.Config{}, nil
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+func fakeBuildConfigFromFlagsError(masterUrl string, kubeconfigPath string) (*restclient.Config, error) {
|
||||||
|
+
|
||||||
|
+ errString := fmt.Sprintf("%s file not found", kubeconfigPath)
|
||||||
|
+ return nil, errors.New(errString)
|
||||||
|
+
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+func getFakeInfraPodNamespace(_ string) (*v1.Namespace, error) {
|
||||||
|
+
|
||||||
|
+ return &v1.Namespace{
|
||||||
|
+ ObjectMeta: metav1.ObjectMeta{
|
||||||
|
+ Name: "test-namespace",
|
||||||
|
+ Labels: map[string]string{
|
||||||
|
+ "app.starlingx.io/component": "platform",
|
||||||
|
+ },
|
||||||
|
+ }}, nil
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+func getFakeNonInfraPodNamespace(_ string) (*v1.Namespace, error) {
|
||||||
|
+
|
||||||
|
+ return &v1.Namespace{
|
||||||
|
+ ObjectMeta: metav1.ObjectMeta{
|
||||||
|
+ Name: "test-namespace",
|
||||||
|
+ Labels: map[string]string{
|
||||||
|
+ "fake": "label",
|
||||||
|
+ }}}, nil
|
||||||
|
+
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+type kubeInfraPodTestCase struct {
|
||||||
|
+ description string
|
||||||
|
+ pod *v1.Pod
|
||||||
|
+ namespaceFunc getPodNamespace
|
||||||
|
+ expectedValue bool
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+func TestKubeInfraPod(t *testing.T) {
|
||||||
|
+ testCases := []kubeInfraPodTestCase{
|
||||||
|
+ {
|
||||||
|
+ description: "Pod with platform label and namespace with platform label",
|
||||||
|
+ pod: makePodWithLabels(map[string]string{
|
||||||
|
+ "app.starlingx.io/component": "platform",
|
||||||
|
+ }),
|
||||||
|
+ namespaceFunc: getFakeInfraPodNamespace,
|
||||||
|
+ expectedValue: true,
|
||||||
|
+ },
|
||||||
|
+ {
|
||||||
|
+ description: "Pod with platform label and namespace without platform label",
|
||||||
|
+ pod: makePodWithLabels(map[string]string{
|
||||||
|
+ "app.starlingx.io/component": "platform",
|
||||||
|
+ }),
|
||||||
|
+ namespaceFunc: getFakeNonInfraPodNamespace,
|
||||||
|
+ expectedValue: true,
|
||||||
|
+ },
|
||||||
|
+ {
|
||||||
|
+ description: "Pod without platform label and namespace with platform label",
|
||||||
|
+ pod: makePodWithLabels(map[string]string{
|
||||||
|
+ "test": "label",
|
||||||
|
+ }),
|
||||||
|
+ namespaceFunc: getFakeInfraPodNamespace,
|
||||||
|
+ expectedValue: true,
|
||||||
|
+ },
|
||||||
|
+ {
|
||||||
|
+ description: "Pod without platform label and namespace without platform label",
|
||||||
|
+ pod: makePodWithLabels(map[string]string{
|
||||||
|
+ "test": "namespace",
|
||||||
|
+ }),
|
||||||
|
+ namespaceFunc: getFakeNonInfraPodNamespace,
|
||||||
|
+ expectedValue: false,
|
||||||
|
+ },
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ for _, testCase := range testCases {
|
||||||
|
+ t.Run(testCase.description, func(t *testing.T) {
|
||||||
|
+
|
||||||
|
+ varGetNamespaceObject = testCase.namespaceFunc
|
||||||
|
+ varBuildConfigFromFlags = fakeBuildConfigFromFlags
|
||||||
|
+ gotValue := isKubeInfra(testCase.pod)
|
||||||
|
+
|
||||||
|
+ if gotValue != testCase.expectedValue {
|
||||||
|
+ t.Errorf("StaticPolicy isKubeInfraPod() error %v. expected value %v actual value %v",
|
||||||
|
+ testCase.description, testCase.expectedValue, gotValue)
|
||||||
|
+ } else {
|
||||||
|
+ fmt.Printf("StaticPolicy isKubeInfraPod() test successful. : %v ", testCase.description)
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ })
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ test := kubeInfraPodTestCase{
|
||||||
|
+ description: "Failure reading kubeconfig file",
|
||||||
|
+ pod: makePodWithLabels(map[string]string{
|
||||||
|
+ "test": "namespace",
|
||||||
|
+ }),
|
||||||
|
+ namespaceFunc: getFakeNonInfraPodNamespace,
|
||||||
|
+ expectedValue: false,
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ varGetNamespaceObject = getPodNamespaceObject
|
||||||
|
+ varBuildConfigFromFlags = fakeBuildConfigFromFlagsError
|
||||||
|
+
|
||||||
|
+ gotValue := isKubeInfra(test.pod)
|
||||||
|
+
|
||||||
|
+ if gotValue != test.expectedValue {
|
||||||
|
+ t.Errorf("StaticPolicy isKubeInfraPod() error %v. expected value %v actual value %v",
|
||||||
|
+ test.description, test.expectedValue, gotValue)
|
||||||
|
+ } else {
|
||||||
|
+ fmt.Printf("StaticPolicy isKubeInfraPod() test successful. : %v ", test.description)
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
func newCPUSetPtr(cpus ...int) *cpuset.CPUSet {
|
||||||
|
ret := cpuset.New(cpus...)
|
||||||
|
return &ret
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/topology_hints_test.go b/pkg/kubelet/cm/cpumanager/topology_hints_test.go
|
||||||
|
index 53738b613c2..ad9c0f17602 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/topology_hints_test.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/topology_hints_test.go
|
||||||
|
@@ -197,6 +197,7 @@ func TestPodGuaranteedCPUs(t *testing.T) {
|
||||||
|
expectedCPU: 210,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
+ varIsKubeInfra = fakeIsKubeInfraFalse
|
||||||
|
for _, tc := range tcases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
requestedCPU := p.podGuaranteedCPUs(tc.pod)
|
||||||
|
@@ -241,6 +242,7 @@ func TestGetTopologyHints(t *testing.T) {
|
||||||
|
sourcesReady: &sourcesReadyStub{},
|
||||||
|
}
|
||||||
|
|
||||||
|
+ varIsKubeInfra = fakeIsKubeInfraFalse
|
||||||
|
hints := m.GetTopologyHints(&tc.pod, &tc.container)[string(v1.ResourceCPU)]
|
||||||
|
if len(tc.expectedHints) == 0 && len(hints) == 0 {
|
||||||
|
continue
|
||||||
|
@@ -294,6 +296,7 @@ func TestGetPodTopologyHints(t *testing.T) {
|
||||||
|
sourcesReady: &sourcesReadyStub{},
|
||||||
|
}
|
||||||
|
|
||||||
|
+ varIsKubeInfra = fakeIsKubeInfraFalse
|
||||||
|
podHints := m.GetPodTopologyHints(&tc.pod)[string(v1.ResourceCPU)]
|
||||||
|
if len(tc.expectedHints) == 0 && len(podHints) == 0 {
|
||||||
|
continue
|
||||||
|
@@ -477,6 +480,7 @@ func TestGetPodTopologyHintsWithPolicyOptions(t *testing.T) {
|
||||||
|
sourcesReady: &sourcesReadyStub{},
|
||||||
|
}
|
||||||
|
|
||||||
|
+ varIsKubeInfra = fakeIsKubeInfraFalse
|
||||||
|
podHints := m.GetPodTopologyHints(&testCase.pod)[string(v1.ResourceCPU)]
|
||||||
|
sort.SliceStable(podHints, func(i, j int) bool {
|
||||||
|
return podHints[i].LessThan(podHints[j])
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,50 @@
|
|||||||
|
From 75eff86c66aa8fac96b7d102d339c1a50b0ad205 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jim Gauld <James.Gauld@windriver.com>
|
||||||
|
Date: Fri, 11 Feb 2022 11:06:35 -0500
|
||||||
|
Subject: [PATCH] kubelet: sort isolcpus allocation when SMT enabled
|
||||||
|
|
||||||
|
The existing device manager code returns CPUs as devices in unsorted
|
||||||
|
order. This numerically sorts isolcpus allocations when SMT/HT is
|
||||||
|
enabled on the host. This logs SMT pairs, singletons, and algorithm
|
||||||
|
order details to make the algorithm understandable.
|
||||||
|
|
||||||
|
Signed-off-by: Jim Gauld <James.Gauld@windriver.com>
|
||||||
|
---
|
||||||
|
pkg/kubelet/cm/devicemanager/manager.go | 13 ++++++++++++-
|
||||||
|
1 file changed, 12 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/pkg/kubelet/cm/devicemanager/manager.go b/pkg/kubelet/cm/devicemanager/manager.go
|
||||||
|
index 8a488a9292f..6a6e1195bcf 100644
|
||||||
|
--- a/pkg/kubelet/cm/devicemanager/manager.go
|
||||||
|
+++ b/pkg/kubelet/cm/devicemanager/manager.go
|
||||||
|
@@ -568,7 +568,16 @@ func order_devices_by_sibling(devices sets.Set[string], needed int) ([]string, e
|
||||||
|
return cpu_lst[0]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
+ //Make post-analysis of selection algorithm obvious by numerical sorting
|
||||||
|
+ //the available isolated cpu_id.
|
||||||
|
+ cpu_ids := make([]int, 0, int(devices.Len()))
|
||||||
|
for cpu_id := range devices {
|
||||||
|
+ cpu_id_, _ := strconv.Atoi(cpu_id)
|
||||||
|
+ cpu_ids = append(cpu_ids, cpu_id_)
|
||||||
|
+ }
|
||||||
|
+ sort.Ints(cpu_ids)
|
||||||
|
+ for _, _cpu_id := range cpu_ids {
|
||||||
|
+ cpu_id := strconv.Itoa(_cpu_id)
|
||||||
|
// If we've already found cpu_id as a sibling, skip it.
|
||||||
|
if _, ok := _iterated_cpu[cpu_id]; ok {
|
||||||
|
continue
|
||||||
|
@@ -610,7 +619,9 @@ func order_devices_by_sibling(devices sets.Set[string], needed int) ([]string, e
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
- //klog.Infof("needed=%d ordered_cpu_list=%v", needed, dev_lst)
|
||||||
|
+ //This algorithm will get some attention. Show minimal details.
|
||||||
|
+ klog.Infof("order_devices_by_sibling: needed=%d, smtpairs=%v, singletons=%v, order=%v",
|
||||||
|
+ needed, sibling_lst, single_lst, dev_lst)
|
||||||
|
return dev_lst, nil
|
||||||
|
}
|
||||||
|
func smt_enabled() bool {
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,150 @@
|
|||||||
|
From 67797d74a1f57e0983ca9457ac8954406bf8b183 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Saba Touheed Mujawar <sabatouheed.mujawar@windriver.com>
|
||||||
|
Date: Wed, 8 Jan 2025 08:27:30 -0500
|
||||||
|
Subject: [PATCH] kubernetes: make isolcpus allocation SMT-aware
|
||||||
|
|
||||||
|
Enhance isolcpus support in Kubernetes to allocate isolated SMT
|
||||||
|
siblings to the same container when SMT/HT is enabled on the host.
|
||||||
|
|
||||||
|
As it stands, the device manager code in Kubernetes is not SMT-aware
|
||||||
|
(since normally it doesn't deal with CPUs). However, StarlingX
|
||||||
|
exposes isolated CPUs as devices and if possible we want to allocate
|
||||||
|
all SMT siblings from a CPU core to the same container in order to
|
||||||
|
minimize cross- container interference due to resource contention
|
||||||
|
within the CPU core.
|
||||||
|
|
||||||
|
The solution is basically to take the list of isolated CPUs and
|
||||||
|
re-order it so that the SMT siblings are next to each other. That
|
||||||
|
way the existing resource selection code will allocate the siblings
|
||||||
|
together. As an optimization, if it is known that an odd number
|
||||||
|
of isolated CPUs are desired, a singleton SMT sibling will be
|
||||||
|
inserted into the list to avoid breaking up sibling pairs.
|
||||||
|
|
||||||
|
Signed-off-by: Tao Wang <tao.wang@windriver.com>
|
||||||
|
Signed-off-by: Ramesh Kumar Sivanandam <rameshkumar.sivanandam@windriver.com>
|
||||||
|
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||||
|
Signed-off-by: Saba Touheed Mujawar <sabatouheed.mujawar@windriver.com>
|
||||||
|
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||||
|
Signed-off-by: Saba Touheed Mujawar <sabatouheed.mujawar@windriver.com>
|
||||||
|
---
|
||||||
|
pkg/kubelet/cm/devicemanager/manager.go | 83 ++++++++++++++++++++++++-
|
||||||
|
1 file changed, 82 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/pkg/kubelet/cm/devicemanager/manager.go b/pkg/kubelet/cm/devicemanager/manager.go
|
||||||
|
index f1d04e97179..8a488a9292f 100644
|
||||||
|
--- a/pkg/kubelet/cm/devicemanager/manager.go
|
||||||
|
+++ b/pkg/kubelet/cm/devicemanager/manager.go
|
||||||
|
@@ -23,6 +23,8 @@ import (
|
||||||
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
|
"sort"
|
||||||
|
+ "strconv"
|
||||||
|
+ "strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
@@ -46,6 +48,7 @@ import (
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/types"
|
||||||
|
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
|
||||||
|
+ "k8s.io/utils/cpuset"
|
||||||
|
)
|
||||||
|
|
||||||
|
const nodeWithoutTopology = -1
|
||||||
|
@@ -550,6 +553,75 @@ func (m *ManagerImpl) UpdateAllocatedDevices() {
|
||||||
|
m.allocatedDevices = m.podDevices.devices()
|
||||||
|
}
|
||||||
|
|
||||||
|
+// Given a list of isolated CPUs in 'devices', and the number of desired CPUs in 'needed',
|
||||||
|
+// return an ordered list of isolated CPUs such that the first 'needed' CPUs in the list
|
||||||
|
+// contain as many hyperthread sibling pairs as possible.
|
||||||
|
+func order_devices_by_sibling(devices sets.Set[string], needed int) ([]string, error) {
|
||||||
|
+ var dev_lst []string
|
||||||
|
+ var single_lst []string
|
||||||
|
+ sibling_lst := make([]string, 0, int(devices.Len()))
|
||||||
|
+ _iterated_cpu := make(map[string]string)
|
||||||
|
+ get_sibling := func(cpu string, cpu_lst []string) string {
|
||||||
|
+ if cpu_lst[0] == cpu {
|
||||||
|
+ return cpu_lst[1]
|
||||||
|
+ } else {
|
||||||
|
+ return cpu_lst[0]
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ for cpu_id := range devices {
|
||||||
|
+ // If we've already found cpu_id as a sibling, skip it.
|
||||||
|
+ if _, ok := _iterated_cpu[cpu_id]; ok {
|
||||||
|
+ continue
|
||||||
|
+ }
|
||||||
|
+ devPath := fmt.Sprintf("/sys/devices/system/cpu/cpu%s/topology/thread_siblings_list", cpu_id)
|
||||||
|
+ dat, err := os.ReadFile(devPath)
|
||||||
|
+ if err != nil {
|
||||||
|
+ return dev_lst, fmt.Errorf("Can't read cpu[%s] thread_siblings_list", cpu_id)
|
||||||
|
+ }
|
||||||
|
+ cpustring := strings.TrimSuffix(string(dat), "\n")
|
||||||
|
+ cpu_pair_set, err := cpuset.Parse(cpustring)
|
||||||
|
+ if err != nil {
|
||||||
|
+ return dev_lst, fmt.Errorf("Unable to parse thread_siblings_list[%s] string to cpuset", cpustring)
|
||||||
|
+ }
|
||||||
|
+ var cpu_pair_lst []string
|
||||||
|
+ for _, v := range cpu_pair_set.List() {
|
||||||
|
+ cpu_pair_lst = append(cpu_pair_lst, strconv.Itoa(v))
|
||||||
|
+ }
|
||||||
|
+ sibling_cpu_id := get_sibling(cpu_id, cpu_pair_lst)
|
||||||
|
+ if _, ok := devices[sibling_cpu_id]; ok {
|
||||||
|
+ sibling_lst = append(sibling_lst, cpu_id, sibling_cpu_id)
|
||||||
|
+ _iterated_cpu[sibling_cpu_id] = ""
|
||||||
|
+ } else {
|
||||||
|
+ single_lst = append(single_lst, cpu_id)
|
||||||
|
+ }
|
||||||
|
+ _iterated_cpu[cpu_id] = ""
|
||||||
|
+ }
|
||||||
|
+ if needed%2 == 0 {
|
||||||
|
+ dev_lst = append(sibling_lst, single_lst...)
|
||||||
|
+ } else {
|
||||||
|
+ if len(single_lst) > 1 {
|
||||||
|
+ _tmp_list := append(sibling_lst, single_lst[1:]...)
|
||||||
|
+ dev_lst = append(single_lst[0:1], _tmp_list...)
|
||||||
|
+ } else {
|
||||||
|
+ if len(single_lst) == 0 {
|
||||||
|
+ dev_lst = sibling_lst
|
||||||
|
+ } else {
|
||||||
|
+ dev_lst = append(single_lst, sibling_lst...)
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ //klog.Infof("needed=%d ordered_cpu_list=%v", needed, dev_lst)
|
||||||
|
+ return dev_lst, nil
|
||||||
|
+}
|
||||||
|
+func smt_enabled() bool {
|
||||||
|
+ dat, _ := os.ReadFile("/sys/devices/system/cpu/smt/active")
|
||||||
|
+ state := strings.TrimSuffix(string(dat), "\n")
|
||||||
|
+ if state == "0" {
|
||||||
|
+ return false
|
||||||
|
+ }
|
||||||
|
+ return true
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
// Returns list of device Ids we need to allocate with Allocate rpc call.
|
||||||
|
// Returns empty list in case we don't need to issue the Allocate rpc call.
|
||||||
|
func (m *ManagerImpl) devicesToAllocate(podUID, contName, resource string, required int, reusableDevices sets.Set[string]) (sets.Set[string], error) {
|
||||||
|
@@ -630,7 +702,16 @@ func (m *ManagerImpl) devicesToAllocate(podUID, contName, resource string, requi
|
||||||
|
if m.allocatedDevices[resource] == nil {
|
||||||
|
m.allocatedDevices[resource] = sets.New[string]()
|
||||||
|
}
|
||||||
|
- for device := range devices.Difference(allocated) {
|
||||||
|
+ availableDevices := sets.List[string](devices.Difference(allocated))
|
||||||
|
+ // If we're dealing with isolcpus and SMT is enabled, reorder to group SMT siblings together.
|
||||||
|
+ if resource == "windriver.com/isolcpus" && len(devices) > 0 && smt_enabled() {
|
||||||
|
+ var err error
|
||||||
|
+ availableDevices, err = order_devices_by_sibling(devices.Difference(allocated), needed)
|
||||||
|
+ if err != nil {
|
||||||
|
+ klog.Errorf("error in order_devices_by_sibling: %v", err)
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ for _, device := range availableDevices {
|
||||||
|
m.allocatedDevices[resource].Insert(device)
|
||||||
|
allocated.Insert(device)
|
||||||
|
needed--
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,10 @@
|
|||||||
|
kubeadm-create-platform-pods-with-zero-CPU-resources.patch
|
||||||
|
kubernetes-make-isolcpus-allocation-SMT-aware.patch
|
||||||
|
kubelet-sort-isolcpus-allocation-when-SMT-enabled.patch
|
||||||
|
kubelet-cpumanager-disable-CFS-quota-throttling.patch
|
||||||
|
kubelet-cpumanager-keep-normal-containers-off-reserv.patch
|
||||||
|
kubelet-cpumanager-platform-pods-on-reserved-cpus.patch
|
||||||
|
kubelet-cpumanager-introduce-concept-of-isolated-CPU.patch
|
||||||
|
kubelet-CFS-quota-throttling-for-non-integer-cpulimit.patch
|
||||||
|
kubeadm-reduce-UpgradeManifestTimeout.patch
|
||||||
|
kubeadm-readiness-probe-timeout-core-dns.patch
|
Loading…
x
Reference in New Issue
Block a user