Support anti-affinity across arbitrary topology keys

Closes #10

Signed-off-by: Sean Eagan <seaneagan1@gmail.com>
Change-Id: Ie2dd2ac6c986c6802a0ddf8efee03854a76ea13e
This commit is contained in:
Sean Eagan 2021-03-12 14:14:29 -06:00
parent 190c28ce44
commit 2659215b84
9 changed files with 81 additions and 141 deletions

View File

@ -41,7 +41,7 @@ spec:
description: 'NodeSet are the the list of Nodes objects workers, or
ControlPlane that define expectations for the Tenant Clusters Includes
artifacts to associate with each defined namespace Such as : - Roles
for the Nodes - Flavor for theh Nodes image - Scheduling expectations
for the Nodes - Flavor for the Nodes image - Anti-affinity expectations
- Scale of the group of Nodes'
properties:
count:
@ -99,13 +99,12 @@ spec:
"value". The requirements are ANDed.
type: object
type: object
spreadTopology:
description: PlaceHolder until we define the real expected Implementation
Scheduling define constraints that allow the SIP Scheduler to
identify the required BMH's to allow CAPI to build a cluster
enum:
- PerRack
- PerHost
topologyKey:
description: TopologyKey is similar to the same named field in
the kubernetes Pod anti-affinity API. If two BMHs are labeled
with this key and have identical values for that label, they
are considered to be in the same topology domain, and thus only
one will be scheduled.
type: string
type: object
description: Nodes defines the set of nodes to schedule for each BMH

View File

@ -10,14 +10,14 @@ spec:
ControlPlane:
labelSelector:
vino.airshipit.org/flavor: control-plane
spreadTopology: PerRack
topologyKey: vino.airshipit.org/rack
count:
active: 1
standby: 1
Worker:
labelSelector:
vino.airshipit.org/flavor: worker
spreadTopology: PerHost
topologyKey: vino.airshipit.org/host
count:
active: 1
standby: 1 # Slew for upgrades

View File

@ -4,8 +4,8 @@ metadata:
name: rdm9r006o002
labels:
vino.airshipit.org/flavor: control-plane
sip.airshipit.org/rack: r006
sip.airshipit.org/server: rdm9r006o002
vino.airshipit.org/rack: r006
vino.airshipit.org/host: rdm9r006o002
spec:
online: true
bmc:
@ -25,8 +25,8 @@ metadata:
name: rdm9r006o001
labels:
vino.airshipit.org/flavor: control-plane
sip.airshipit.org/rack: r006
sip.airshipit.org/server: rdm9r006o001
vino.airshipit.org/rack: r006
vino.airshipit.org/host: rdm9r006o001
spec:
online: true
bmc:
@ -46,8 +46,8 @@ metadata:
name: rdm9r007o001
labels:
vino.airshipit.org/flavor: control-plane
sip.airshipit.org/rack: r007
sip.airshipit.org/server: rdm9r007o001
vino.airshipit.org/rack: r007
vino.airshipit.org/host: rdm9r007o001
spec:
online: true
bmc:
@ -67,8 +67,8 @@ metadata:
name: rdm9r007o002
labels:
vino.airshipit.org/flavor: worker
sip.airshipit.org/rack: r007
sip.airshipit.org/server: rdm9r007o002
vino.airshipit.org/rack: r007
vino.airshipit.org/host: rdm9r007o002
spec:
online: true
bmc:
@ -88,8 +88,8 @@ metadata:
name: rdm9r008o002
labels:
vino.airshipit.org/flavor: worker
sip.airshipit.org/rack: r008
sip.airshipit.org/server: rdm9r008o002
vino.airshipit.org/rack: r008
vino.airshipit.org/host: rdm9r008o002
spec:
online: true
bmc:
@ -109,8 +109,8 @@ metadata:
name: rdm9r009o002
labels:
vino.airshipit.org/flavor: worker
sip.airshipit.org/rack: r009
sip.airshipit.org/server: rdm9r009o002
vino.airshipit.org/rack: r009
vino.airshipit.org/host: rdm9r009o002
spec:
online: true
bmc:

View File

@ -171,8 +171,8 @@ for the Tenant Clusters
Includes artifacts to associate with each defined namespace
Such as :
- Roles for the Nodes
- Flavor for theh Nodes image
- Scheduling expectations
- Flavor for the Nodes image
- Anti-affinity expectations
- Scale of the group of Nodes</p>
<div class="md-typeset__scrollwrap">
<div class="md-typeset__table">
@ -199,18 +199,15 @@ Kubernetes meta/v1.LabelSelector
</tr>
<tr>
<td>
<code>spreadTopology</code><br>
<code>topologyKey</code><br>
<em>
<a href="#airship.airshipit.org/v1.SpreadTopology">
SpreadTopology
</a>
string
</em>
</td>
<td>
<p>PlaceHolder until we define the real expected
Implementation
Scheduling define constraints that allow the SIP Scheduler
to identify the required BMH&rsquo;s to allow CAPI to build a cluster</p>
<p>TopologyKey is similar to the same named field in the kubernetes Pod anti-affinity API.
If two BMHs are labeled with this key and have identical values for that
label, they are considered to be in the same topology domain, and thus only one will be scheduled.</p>
</td>
</tr>
<tr>
@ -526,12 +523,6 @@ SIPClusterServices
</table>
</div>
</div>
<h3 id="airship.airshipit.org/v1.SpreadTopology">SpreadTopology
(<code>string</code> alias)</h3>
<p>
(<em>Appears on:</em>
<a href="#airship.airshipit.org/v1.NodeSet">NodeSet</a>)
</p>
<div class="admonition note">
<p class="last">This page was automatically generated with <code>gen-crd-api-reference-docs</code></p>
</div>

View File

@ -20,9 +20,6 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.
// +kubebuilder:object:root=true
// SIPClusterList contains a list of SIPCluster
@ -46,9 +43,6 @@ type SIPCluster struct {
// SIPClusterSpec defines the desired state of a SIPCluster
type SIPClusterSpec struct {
// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
// Important: Run "make manifests to regenerate code after modifying this file
// Nodes defines the set of nodes to schedule for each BMH role.
Nodes map[BMHRole]NodeSet `json:"nodes,omitempty"`
@ -131,34 +125,20 @@ const (
// Includes artifacts to associate with each defined namespace
// Such as :
// - Roles for the Nodes
// - Flavor for theh Nodes image
// - Scheduling expectations
// - Flavor for the Nodes image
// - Anti-affinity expectations
// - Scale of the group of Nodes
//
type NodeSet struct {
// LabelSelector is the BMH label selector to use.
LabelSelector metav1.LabelSelector `json:"labelSelector,omitempty"`
// PlaceHolder until we define the real expected
// Implementation
// Scheduling define constraints that allow the SIP Scheduler
// to identify the required BMH's to allow CAPI to build a cluster
Scheduling SpreadTopology `json:"spreadTopology,omitempty"`
// TopologyKey is similar to the same named field in the kubernetes Pod anti-affinity API.
// If two BMHs are labeled with this key and have identical values for that
// label, they are considered to be in the same topology domain, and thus only one will be scheduled.
TopologyKey string `json:"topologyKey,omitempty"`
// Count defines the scale expectations for the Nodes
Count *NodeCount `json:"count,omitempty"`
}
// +kubebuilder:validation:Enum=PerRack;PerHost
type SpreadTopology string
const (
// RackAntiAffinity means the scheduling should target separate racks.
RackAntiAffinity SpreadTopology = "PerRack"
// HostAntiAffinity means the scheduling should target separate hosts.
HostAntiAffinity SpreadTopology = "PerHost"
)
type SIPClusterService struct {
Image string `json:"image"`
NodeLabels map[string]string `json:"nodeLabels,omitempty"`

View File

@ -62,11 +62,6 @@ const (
const (
BaseAirshipSelector = "sip.airshipit.org"
// This is a placeholder . Need to synchronize with ViNO the constants below
// Probable pll this or eqivakent values from a ViNO pkg
RackLabel = BaseAirshipSelector + "/rack"
ServerLabel = BaseAirshipSelector + "/server"
// This label is applied to all BMHs scheduled to a given SIPCluster.
SipClusterLabelName = "cluster"
SipClusterLabel = BaseAirshipSelector + "/" + SipClusterLabelName
@ -216,23 +211,15 @@ func (ml *MachineList) identifyNodes(sip airshipv1.SIPCluster,
// If using the SIP Sheduled label, we now have a list of BMH;'s
// that are not scheduled
// Next I need to apply the constraints
// This willl be a poor mans simple scheduler
// Only deals with AntiAffinity at :
// - Racks : Dont select two machines in the same rack
// - Server : Dont select two machines in the same server
ml.Log.Info("Trying to identify BaremetalHosts that match scheduling parameters",
"initial BMH count", len(bmhList.Items))
for nodeRole, nodeCfg := range sip.Spec.Nodes {
logger := ml.Log.WithValues("role", nodeRole) //nolint:govet
ml.ReadyForScheduleCount[nodeRole] = 0
logger.Info("Getting host constraints")
scheduleSetMap, err := ml.initScheduleMaps(nodeRole, nodeCfg.Scheduling)
if err != nil {
return err
}
scheduleSetMap := ml.initScheduleMaps(nodeRole, nodeCfg.TopologyKey)
logger.Info("Matching hosts against constraints")
err = ml.scheduleIt(nodeRole, nodeCfg, bmhList, scheduleSetMap, c, GetClusterLabel(sip))
err := ml.scheduleIt(nodeRole, nodeCfg, bmhList, scheduleSetMap, c, GetClusterLabel(sip))
if err != nil {
return err
}
@ -241,25 +228,15 @@ func (ml *MachineList) identifyNodes(sip airshipv1.SIPCluster,
}
func (ml *MachineList) initScheduleMaps(role airshipv1.BMHRole,
constraint airshipv1.SpreadTopology) (*ScheduleSet, error) {
logger := ml.Log.WithValues("role", role, "spread topology", constraint)
var labelName string
switch constraint {
case airshipv1.RackAntiAffinity:
labelName = RackLabel
case airshipv1.HostAntiAffinity:
labelName = ServerLabel
default:
logger.Info("constraint not supported")
return nil, ErrorUknownSpreadTopology{Topology: constraint}
}
topologyKey string) *ScheduleSet {
logger := ml.Log.WithValues("role", role, "topologyKey", topologyKey)
logger.Info("Marking constraint as active")
logger.Info("Marking schedule set as active")
return &ScheduleSet{
active: true,
set: make(map[string]bool),
labelName: labelName,
}, nil
active: true,
set: make(map[string]bool),
topologyKey: topologyKey,
}
}
func (ml *MachineList) countScheduledAndTobeScheduled(nodeRole airshipv1.BMHRole,
@ -328,28 +305,28 @@ func (ml *MachineList) scheduleIt(nodeRole airshipv1.BMHRole, nodeCfg airshipv1.
if !ml.hasMachine(bmh) {
logger.Info("BaremetalHost not yet marked as ready to be scheduled")
constraint := nodeCfg.Scheduling
topologyKey := nodeCfg.TopologyKey
// Do I care about this constraint
logger := logger.WithValues("constraint", constraint) //nolint:govet
logger := logger.WithValues("topologyKey", topologyKey) //nolint:govet
if scheduleSet.Active() {
logger.Info("constraint is active")
// Check if bmh has the label
bmhConstraintCondition, match, err := scheduleSet.GetLabels(labels.Set(bmh.Labels), &nodeCfg.LabelSelector)
topologyDomain, match, err := scheduleSet.GetLabels(labels.Set(bmh.Labels), &nodeCfg.LabelSelector)
if err != nil {
return err
}
logger.Info("Checked BMH constraint condition and label selector",
"constraint condition", bmhConstraintCondition,
logger.Info("Checked BMH topology key and label selector",
"topology domain", topologyDomain,
"label selector match", match)
validBmh = match
// If it does match the flavor
if bmhConstraintCondition != "" && match {
// If it does match the label selector
if topologyDomain != "" && match {
// If its in the list already for the constraint , theen this bmh is disqualified. Skip it
if scheduleSet.Exists(bmhConstraintCondition) {
logger.Info("Constraint slot is alrady taken some BMH from this constraint is already allocated, skipping it")
if scheduleSet.Exists(topologyDomain) {
logger.Info("Topology domain has already been scheduled to, skipping it")
continue
} else {
scheduleSet.Add(bmhConstraintCondition)
scheduleSet.Add(topologyDomain)
}
}
}
@ -691,8 +668,8 @@ type ScheduleSet struct {
active bool
// Holds list of elements in the Set
set map[string]bool
// Holds the label name that identifies the constraint
labelName string
// Holds the topology key that identifies the constraint
topologyKey string
}
func (ss *ScheduleSet) Active() bool {
@ -717,7 +694,7 @@ func (ss *ScheduleSet) GetLabels(labels labels.Labels, labelSelector *metav1.Lab
if err == nil {
match = selector.Matches(labels)
}
return labels.Get(ss.labelName), match, err
return labels.Get(ss.topologyKey), match, err
}
// ApplyLabels adds the appropriate labels to the BMHs that are ready to be scheduled

View File

@ -38,15 +38,6 @@ func (e ErrorHostIPNotFound) Error() string {
"Infrastructure Service %s", e.HostName, e.IPInterface, e.Message)
}
// ErrorUknownSpreadTopology is returned when wrong AuthType is provided
type ErrorUknownSpreadTopology struct {
Topology airshipv1.SpreadTopology
}
func (e ErrorUknownSpreadTopology) Error() string {
return fmt.Sprintf("Uknown spread topology '%s'", e.Topology)
}
// ErrorNetworkDataNotFound is returned when NetworkData metadata is missing from BMH
type ErrorNetworkDataNotFound struct {
BMH metal3.BareMetalHost

View File

@ -190,24 +190,24 @@ var _ = Describe("SIPCluster controller", func() {
// Create BMH test objects
var nodes []*metal3.BareMetalHost
baremetalServer := "r06o001"
hostLabel := "r06o001"
bmh, networkData := testutil.CreateBMH(0, testNamespace, airshipv1.RoleControlPlane, 6)
bmh.Labels[bmhpkg.ServerLabel] = baremetalServer
bmh.Labels[testutil.HostLabel] = hostLabel
nodes = append(nodes, bmh)
Expect(k8sClient.Create(context.Background(), bmh)).Should(Succeed())
Expect(k8sClient.Create(context.Background(), networkData)).Should(Succeed())
bmh, networkData = testutil.CreateBMH(1, testNamespace, airshipv1.RoleWorker, 6)
bmh.Labels[bmhpkg.ServerLabel] = baremetalServer
bmh.Labels[testutil.HostLabel] = hostLabel
nodes = append(nodes, bmh)
Expect(k8sClient.Create(context.Background(), bmh)).Should(Succeed())
Expect(k8sClient.Create(context.Background(), networkData)).Should(Succeed())
bmh, networkData = testutil.CreateBMH(2, testNamespace, airshipv1.RoleWorker, 6)
bmh.Labels[bmhpkg.ServerLabel] = baremetalServer
bmh.Labels[testutil.HostLabel] = hostLabel
nodes = append(nodes, bmh)
Expect(k8sClient.Create(context.Background(), bmh)).Should(Succeed())
@ -250,24 +250,24 @@ var _ = Describe("SIPCluster controller", func() {
// Create BMH test objects
var nodes []*metal3.BareMetalHost
baremetalServer := "r06o001"
hostLabel := "r06o001"
bmh, networkData := testutil.CreateBMH(0, testNamespace, airshipv1.RoleControlPlane, 6)
bmh.Labels[bmhpkg.ServerLabel] = baremetalServer
bmh.Labels[testutil.HostLabel] = hostLabel
nodes = append(nodes, bmh)
Expect(k8sClient.Create(context.Background(), bmh)).Should(Succeed())
Expect(k8sClient.Create(context.Background(), networkData)).Should(Succeed())
bmh, networkData = testutil.CreateBMH(1, testNamespace, airshipv1.RoleControlPlane, 6)
bmh.Labels[bmhpkg.ServerLabel] = baremetalServer
bmh.Labels[testutil.HostLabel] = hostLabel
nodes = append(nodes, bmh)
Expect(k8sClient.Create(context.Background(), bmh)).Should(Succeed())
Expect(k8sClient.Create(context.Background(), networkData)).Should(Succeed())
bmh, networkData = testutil.CreateBMH(2, testNamespace, airshipv1.RoleWorker, 6)
bmh.Labels[bmhpkg.ServerLabel] = baremetalServer
bmh.Labels[testutil.HostLabel] = hostLabel
nodes = append(nodes, bmh)
Expect(k8sClient.Create(context.Background(), bmh)).Should(Succeed())
@ -337,11 +337,11 @@ var _ = Describe("SIPCluster controller", func() {
sipCluster, nodeSSHPrivateKeys := testutil.CreateSIPCluster(clusterName, testNamespace, 1, 2)
controlPlaneSpec := sipCluster.Spec.Nodes[airshipv1.RoleControlPlane]
controlPlaneSpec.Scheduling = airshipv1.RackAntiAffinity
controlPlaneSpec.TopologyKey = testutil.RackLabel
sipCluster.Spec.Nodes[airshipv1.RoleControlPlane] = controlPlaneSpec
workerSpec := sipCluster.Spec.Nodes[airshipv1.RoleWorker]
workerSpec.Scheduling = airshipv1.RackAntiAffinity
workerSpec.TopologyKey = testutil.RackLabel
sipCluster.Spec.Nodes[airshipv1.RoleWorker] = workerSpec
Expect(k8sClient.Create(context.Background(), nodeSSHPrivateKeys)).Should(Succeed())
@ -402,11 +402,11 @@ var _ = Describe("SIPCluster controller", func() {
sipCluster, nodeSSHPrivateKeys := testutil.CreateSIPCluster(clusterName, testNamespace, 2, 1)
controlPlaneSpec := sipCluster.Spec.Nodes[airshipv1.RoleControlPlane]
controlPlaneSpec.Scheduling = airshipv1.RackAntiAffinity
controlPlaneSpec.TopologyKey = testutil.RackLabel
sipCluster.Spec.Nodes[airshipv1.RoleControlPlane] = controlPlaneSpec
workerSpec := sipCluster.Spec.Nodes[airshipv1.RoleWorker]
workerSpec.Scheduling = airshipv1.RackAntiAffinity
workerSpec.TopologyKey = testutil.RackLabel
sipCluster.Spec.Nodes[airshipv1.RoleWorker] = workerSpec
Expect(k8sClient.Create(context.Background(), nodeSSHPrivateKeys)).Should(Succeed())

View File

@ -26,13 +26,14 @@ func UnscheduledSelector() labels.Selector {
return sel.Add(*r)
}
// NOTE(aw442m): These constants have been redefined from the bmh package in order to avoid an import cycle.
const (
sipRackLabel = "sip.airshipit.org/rack"
sipClusterLabel = "sip.airshipit.org/cluster"
sipServerLabel = "sip.airshipit.org/server"
// NOTE(aw442m): These constants have been redefined from the bmh package in order to avoid an import cycle.
sipClusterLabelName = "cluster"
sipClusterLabel = "sip.airshipit.org" + "/" + sipClusterLabelName
bmhLabel = "example.org/bmh-label"
HostLabel = "vino.airshipit.org/host"
RackLabel = "vino.airshipit.org/rack"
bmhLabel = "example.org/bmh-label"
sshPrivateKeyBase64 = "DUMMY_DATA"
@ -185,16 +186,17 @@ const (
// CreateBMH initializes a BaremetalHost with specific parameters for use in test cases.
func CreateBMH(node int, namespace string, role airshipv1.BMHRole, rack int) (*metal3.BareMetalHost, *corev1.Secret) {
rackLabel := fmt.Sprintf("r%d", rack)
rackLabelValue := fmt.Sprintf("r%d", rack)
hostLabelValue := fmt.Sprintf("stl2%so%d", rackLabelValue, node)
networkDataName := fmt.Sprintf("node%d-network-data", node)
return &metal3.BareMetalHost{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("node0%d", node),
Namespace: namespace,
Labels: map[string]string{
bmhLabel: bmhRoleToLabelValue[role],
sipRackLabel: rackLabel,
sipServerLabel: fmt.Sprintf("stl2%so%d", rackLabel, node),
bmhLabel: bmhRoleToLabelValue[role],
RackLabel: rackLabelValue,
HostLabel: hostLabelValue,
},
},
Spec: metal3.BareMetalHostSpec{
@ -239,7 +241,7 @@ func CreateSIPCluster(name string, namespace string, controlPlanes int, workers
bmhLabel: bmhRoleToLabelValue[airshipv1.RoleControlPlane],
},
},
Scheduling: airshipv1.HostAntiAffinity,
TopologyKey: HostLabel,
Count: &airshipv1.NodeCount{
Active: controlPlanes,
Standby: 0,
@ -251,7 +253,7 @@ func CreateSIPCluster(name string, namespace string, controlPlanes int, workers
bmhLabel: bmhRoleToLabelValue[airshipv1.RoleWorker],
},
},
Scheduling: airshipv1.HostAntiAffinity,
TopologyKey: HostLabel,
Count: &airshipv1.NodeCount{
Active: workers,
Standby: 0,