Skip to content

Commit

Permalink
feat: allow coexistence of podoffloadingpolicy and runtimeclass
Browse files Browse the repository at this point in the history
  • Loading branch information
claudiolor committed Dec 16, 2024
1 parent 7e265cb commit 36b4b26
Show file tree
Hide file tree
Showing 12 changed files with 206 additions and 189 deletions.
6 changes: 3 additions & 3 deletions cmd/webhook/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ func main() {
"Enforce offerer-side that offloaded pods do not exceed offered resources (based on container limits)")
refreshInterval := pflag.Duration("resource-validator-refresh-interval",
5*time.Minute, "The interval at which the resource validator cache is refreshed")
addVirtualNodeTolerationOnOffloadedPods := pflag.Bool("add-virtual-node-toleration-on-offloaded-pods", false,
"Automatically add the virtual node toleration on offloaded pods")
liqoRuntimeClassName := pflag.String("liqo-runtime-class", "liqo",
"Define the Liqo runtime class forcing the pods to be scheduled on virtual nodes")

flagsutils.InitKlogFlags(pflag.CommandLine)
restcfg.InitFlags(pflag.CommandLine)
Expand Down Expand Up @@ -192,7 +192,7 @@ func main() {
mgr.GetWebhookServer().Register("/validate/shadowpods", &webhook.Admission{Handler: spv})
mgr.GetWebhookServer().Register("/mutate/shadowpods", shadowpodswh.NewMutator(mgr.GetClient()))
mgr.GetWebhookServer().Register("/validate/namespace-offloading", nsoffwh.New())
mgr.GetWebhookServer().Register("/mutate/pod", podwh.New(mgr.GetClient(), *addVirtualNodeTolerationOnOffloadedPods))
mgr.GetWebhookServer().Register("/mutate/pod", podwh.New(mgr.GetClient(), *liqoRuntimeClassName))
mgr.GetWebhookServer().Register("/mutate/virtualnodes", virtualnodewh.New(
mgr.GetClient(), clusterID, *podcidr, *liqoNamespace, vkOptsDefaultTemplateRef))
mgr.GetWebhookServer().Register("/validate/resourceslices", resourceslicewh.NewValidator(mgr.GetClient()))
Expand Down
1 change: 0 additions & 1 deletion deployments/liqo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,6 @@
| offloading.reflection.skip.annotations | list | `["cloud.google.com/neg","cloud.google.com/neg-status","kubernetes.digitalocean.com/load-balancer-id","ingress.kubernetes.io/backends","ingress.kubernetes.io/forwarding-rule","ingress.kubernetes.io/target-proxy","ingress.kubernetes.io/url-map","metallb.universe.tf/address-pool","metallb.universe.tf/ip-allocated-from-pool","metallb.universe.tf/loadBalancerIPs","loadbalancer.openstack.org/load-balancer-id"]` | List of annotations that must not be reflected on remote clusters. |
| offloading.reflection.skip.labels | list | `[]` | List of labels that must not be reflected on remote clusters. |
| offloading.runtimeClass.annotations | object | `{}` | Annotations for the runtime class. |
| offloading.runtimeClass.enabled | bool | `false` | |
| offloading.runtimeClass.handler | string | `"liqo"` | Handler for the runtime class. |
| offloading.runtimeClass.labels | object | `{}` | Labels for the runtime class. |
| offloading.runtimeClass.name | string | `"liqo"` | Name of the runtime class to use for offloading. |
Expand Down
4 changes: 1 addition & 3 deletions deployments/liqo/templates/liqo-webhook-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,12 @@ spec:
- --cluster-id=$(CLUSTER_ID)
- --liqo-namespace=$(POD_NAMESPACE)
- --secret-name={{ include "liqo.prefixedName" $webhookConfig }}-certs
- --liqo-runtime-class={{ .Values.offloading.runtimeClass.name }}
- --podcidr={{ .Values.ipam.podCIDR }}
- --vk-options-default-template={{ .Release.Namespace }}/{{ printf "%s-default" $kubeletConfig.name }}
{{- if .Values.controllerManager.config.enableResourceEnforcement }}
- --enable-resource-enforcement
{{- end }}
{{- if not .Values.offloading.runtimeClass.enabled }}
- --add-virtual-node-toleration-on-offloaded-pods
{{- end }}
{{- if .Values.common.extraArgs }}
{{- toYaml .Values.common.extraArgs | nindent 10 }}
{{- end }}
Expand Down
4 changes: 0 additions & 4 deletions deployments/liqo/templates/runtime-class.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
{{- $runtimeConfig := (merge (dict "name" "runtimeclass" "module" "runtimeclass") .) -}}

{{- if .Values.offloading.runtimeClass.enabled }}

apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:
Expand All @@ -25,5 +23,3 @@ scheduling:
tolerations:
{{- toYaml .Values.offloading.runtimeClass.tolerations.tolerations | nindent 4 }}
{{- end }}

{{- end }}
1 change: 0 additions & 1 deletion deployments/liqo/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,6 @@ offloading:
# by setting the "disableNetworkCheck" field in the resource Spec.
disableNetworkCheck: false
runtimeClass:
enabled: false
# -- Name of the runtime class to use for offloading.
name: liqo
# -- Annotations for the runtime class.
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/offloading-with-policies.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Offloading with Policies

This tutorial aims to guide you through a tour to learn how to use the core Liqo features.
You will learn how to tune namespace offloading, and specify the target clusters through the [cluster selector](UsageOffloadingClusterSelector) concept.
You will learn how to tune namespace offloading, and specify the target clusters through the [cluster selector](../usage/namespace-offloading.md#cluster-selector) concept.

More specifically, you will configure a scenario composed of a *single entry point cluster* leveraged for the deployment of the applications (i.e., the *Venice* cluster, located in *north* Italy) and two *worker clusters* characterized by different geographical regions (i.e., the *Florence* and *Naples* clusters, respectively located in *center* and *south* Italy).
Then, you will offload a given namespace (and the applications contained therein) to a subset of the worker clusters (i.e., only to the *Naples* cluster), while allowing pods to be also scheduled on the local cluster (i.e., the *Venice* one).
Expand Down
39 changes: 24 additions & 15 deletions docs/usage/namespace-offloading.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,18 +79,27 @@ In other words, an empty *cluster selector* matches all virtual clusters.
The remote clusters are backed by a Liqo Virtual Node, which allows the vanilla Kubernetes scheduler to address the remote cluster as target for pod scheduling.
However, by default the Liqo virtual nodes have a [Taint](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/) applied to them, which prevents pods from being scheduled on them, unless a *namespace offloading* is enabled in the namespace where the pod is running.

You have two different ways to determine whether a pod can be scheduled on a virtual node (so on a remote cluster) and they are mutually exclusive per Liqo installation:
You have two different ways to determine whether a pod can be scheduled on a virtual node (so on a remote cluster):

* Defining a **pod offloading strategy** for the offloaded namespaces (default), which tells where the pods created on that namespace should be scheduled (whether in the local cluster, the remote clusters, or both letting the vanilla K8s scheduler decide).
* Setting the Liqo **RuntimeClass** in the pod, in this case, the namespace offloading strategy is ignored, and the pod will be scheduled to the virtual nodes.

Note these two methods can be used in conjunction to define how pods should be scheduled in the offloaded namespace.
For example, a user might want to schedule all the pods on physical nodes, and only a subset of them on virtual nodes.
To do so, it is possible to define `Local` as *pod offloading strategy* of the namespace, so that all the pods are scheduled locally and only the ones having the Liqo runtime class will be executed on a virtual node.

### Pod offloading strategy

The *pod offloading strategy* defines high-level constraints about pod scheduling, and can be configured through the `--pod-offloading-strategy` flag.
The *pod offloading strategy* defines high-level constraints about pod scheduling, and can be configured through the `--pod-offloading-strategy` flag to be provided to the `liqoctl offload namespace` command. E.g.

```bash
liqoctl offload namespace NAMESPACE_NAME --pod-offloading-strategy Local
```

The accepted values are:

* **LocalAndRemote** (default): pods deployed in the local namespace can be scheduled **both onto local nodes and onto virtual nodes**, hence possibly offloaded to remote clusters. This will leave the Kubernetes scheduler to decide about the best placement, based on the available resources and the pod requirements. You can still influence the scheduler decision on which pods should be scheduled onto virtual nodes using the [standard Kubernetes mechanisms to assign Pods to Nodes](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/).
* **Local**: pods deployed in the local namespace are enforced to be scheduled onto **local nodes only**, hence never offloaded to remote clusters.
* **LocalAndRemote** (default): pods deployed in the local namespace can be scheduled **both onto local nodes and onto virtual nodes**, hence possibly offloaded to remote clusters. This will leave the Kubernetes scheduler to decide about the best placement, based on the available resources and the pod requirements. You can still influence the scheduler decision on which pods should be scheduled onto virtual nodes using the [standard Kubernetes mechanisms to assign Pods to Nodes](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/) or via the [Liqo runtime class](#runtimeclass).
* **Local**: pods deployed in the local namespace are enforced to be scheduled onto **local nodes only**, hence never offloaded to remote clusters (unless the pod uses the [Liqo runtime class](#runtimeclass)).
* **Remote**: pods deployed in the local namespace are enforced to be scheduled onto **remote nodes only**, hence always offloaded to remote clusters.

It is worth mentioning that, independently from the selected pod offloading strategy, the services that expose them are propagated to the entire namespace (both locally and in the remote cluster), hence enabling the above pods to be consumed from anywhere in the Liqo domain, as shown in the [service offloading example](../examples/service-offloading.md).
Expand All @@ -105,22 +114,22 @@ Due to current limitations of Liqo, the pods violating the *pod offloading strat

### RuntimeClass

At Liqo install or upgrade time, you can specify a flag to enable the creation of a [RuntimeClass](https://kubernetes.io/docs/concepts/containers/runtime-class/) to be used to specify the pods that should be offloaded to the virtual nodes.
By default Liqo creates a [RuntimeClass](https://kubernetes.io/docs/concepts/containers/runtime-class/) with name `liqo`, which can be used to **force pods to be scheduled on virtual nodes (so on the provider clusters) independently from the [pod offloading strategy](#pod-offloading-strategy)** configured on the offloaded namespace.

```bash
liqoctl install [...] --set offloading.runtimeClass.enable=true
```
For example, if the *pod offloading strategy* is `Local` all the pods will be scheduled on the local cluster unless the Liqo runtime class is specified in the manifest of the pod.

or
To use the Liqo runtime class, you will need to specify `runtimeClassName: liqo` in the Pod spec:

```bash
helm install liqo liqo/liqo [...] --set offloading.runtimeClass.enable=true
```yaml
apiVersion: v1
kind: Pod
metadata:
name: mypod
spec:
runtimeClassName: liqo
# ...
```

The RuntimeClass is created with the name `liqo`, and it is configured to add a Toleration to the virtual node taint for pods selecting it and to set a node selector to the virtual node's label.

(UsageOffloadingClusterSelector)=

## Unoffloading a namespace

The offloading of a namespace can be disabled through the dedicated *liqoctl* command, causing in turn the deletion of all resources reflected to remote clusters (including the namespaces themselves), and triggering the rescheduling of all offloaded pods locally:
Expand Down
61 changes: 34 additions & 27 deletions pkg/webhooks/pod/mutations.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,23 +38,24 @@ func getVirtualNodeToleration() corev1.Toleration {
// or RemotePodOffloadingStrategyType. In case of PodOffloadingStrategyType not recognized, returns an error.
func createTolerationFromNamespaceOffloading(strategy offloadingv1beta1.PodOffloadingStrategyType) (corev1.Toleration, error) {
var toleration corev1.Toleration
switch {
case strategy == offloadingv1beta1.LocalAndRemotePodOffloadingStrategyType, strategy == offloadingv1beta1.RemotePodOffloadingStrategyType:
switch strategy {
case offloadingv1beta1.LocalAndRemotePodOffloadingStrategyType, offloadingv1beta1.RemotePodOffloadingStrategyType:
// The virtual-node toleration must be added.
toleration = getVirtualNodeToleration()
case offloadingv1beta1.LocalPodOffloadingStrategyType:
return toleration, nil
default:
err := fmt.Errorf("PodOffloadingStrategyType '%s' not recognized", strategy)
klog.Error(err)
return corev1.Toleration{}, err
err := fmt.Errorf("unknown PodOffloadingStrategyType %q", strategy)
return toleration, err
}
return toleration, nil
}

// createNodeSelectorFromNamespaceOffloading creates the right NodeSelector according to the PodOffloadingStrategy chosen.
func createNodeSelectorFromNamespaceOffloading(nsoff *offloadingv1beta1.NamespaceOffloading) (*corev1.NodeSelector, error) {
nodeSelector := nsoff.Spec.ClusterSelector
switch {
case nsoff.Spec.PodOffloadingStrategy == offloadingv1beta1.RemotePodOffloadingStrategyType:
switch nsoff.Spec.PodOffloadingStrategy {
case offloadingv1beta1.RemotePodOffloadingStrategyType:
// To ensure that the pod is not scheduled on local nodes is necessary to add to every NodeSelectorTerm a
// new NodeSelectorRequirement. This NodeSelectorRequirement requires explicitly the label
// "liqo.io/type=virtual-node" to exclude local nodes from the scheduler choice.
Expand All @@ -71,7 +72,7 @@ func createNodeSelectorFromNamespaceOffloading(nsoff *offloadingv1beta1.Namespac
})
}

case nsoff.Spec.PodOffloadingStrategy == offloadingv1beta1.LocalAndRemotePodOffloadingStrategyType:
case offloadingv1beta1.LocalAndRemotePodOffloadingStrategyType:
// In case the selector is empty, it is not necessary to modify anything, as it already allows pods to be scheduled on all nodes.
if len(nodeSelector.NodeSelectorTerms) == 0 {
return nil, nil
Expand All @@ -86,10 +87,10 @@ func createNodeSelectorFromNamespaceOffloading(nsoff *offloadingv1beta1.Namespac
}},
}
nodeSelector.NodeSelectorTerms = append(nodeSelector.NodeSelectorTerms, newNodeSelectorTerm)

case offloadingv1beta1.LocalPodOffloadingStrategyType:
return nil, nil
default:
err := fmt.Errorf("PodOffloadingStrategyType '%s' not recognized", nsoff.Spec.PodOffloadingStrategy)
klog.Error(err)
err := fmt.Errorf("unknown PodOffloadingStrategyType %q", nsoff.Spec.PodOffloadingStrategy)
return nil, err
}
return &nodeSelector, nil
Expand Down Expand Up @@ -130,7 +131,8 @@ func fillPodWithTheNewNodeSelector(imposedNodeSelector *corev1.NodeSelector, pod
// chosen in the CR. Two possible modifications:
// - The VirtualNodeToleration is added to the Pod Toleration if necessary.
// - The old Pod NodeSelector is substituted with a new one according to the PodOffloadingStrategyType.
func mutatePod(namespaceOffloading *offloadingv1beta1.NamespaceOffloading, pod *corev1.Pod, addVirtualNodeToleration bool) error {
// No changes are applied to the Pod if the Liqo runtime when the Liqo runtime class is specified.
func mutatePod(namespaceOffloading *offloadingv1beta1.NamespaceOffloading, pod *corev1.Pod, liqoRuntimeClassName string) error {
// The NamespaceOffloading CR contains information about the PodOffloadingStrategy and
// the NodeSelector inserted by the user (ClusterSelector field).
klog.V(5).Infof("Chosen strategy: %s", namespaceOffloading.Spec.PodOffloadingStrategy)
Expand All @@ -140,31 +142,36 @@ func mutatePod(namespaceOffloading *offloadingv1beta1.NamespaceOffloading, pod *
return nil
}

if addVirtualNodeToleration {
// Mutate Pod affinity and tolerations only if the Pod has NOT the Liqo runtime class.
hasLiqoRuntimeClass := pod.Spec.RuntimeClassName != nil && *pod.Spec.RuntimeClassName == liqoRuntimeClassName
if !hasLiqoRuntimeClass {
// Create the right Toleration according to the PodOffloadingStrategy case.
toleration, err := createTolerationFromNamespaceOffloading(namespaceOffloading.Spec.PodOffloadingStrategy)
if err != nil {
klog.Errorf("The NamespaceOffloading in namespace '%s' has unknown strategy '%s'",
namespaceOffloading.Namespace, namespaceOffloading.Spec.PodOffloadingStrategy)
return err
wErr := fmt.Errorf("unable to define tolerations for pod %q in namespace %q: %w",
pod.Name, namespaceOffloading.Namespace, err)
klog.Error(wErr)
return wErr
}
klog.V(5).Infof("Generated Toleration: %s", toleration.String())

// It is necessary to add the just created toleration.
pod.Spec.Tolerations = append(pod.Spec.Tolerations, toleration)
}

// Create the right NodeSelector according to the PodOffloadingStrategy case.
imposedNodeSelector, err := createNodeSelectorFromNamespaceOffloading(namespaceOffloading)
if err != nil {
klog.Errorf("The NamespaceOffloading in namespace '%s' has unknown strategy '%s'",
namespaceOffloading.Namespace, namespaceOffloading.Spec.PodOffloadingStrategy)
return err
// Create the right NodeSelector according to the PodOffloadingStrategy case.
imposedNodeSelector, err := createNodeSelectorFromNamespaceOffloading(namespaceOffloading)
if err != nil {
wErr := fmt.Errorf("unable to define node selectors for pod %q in namespace %q: %w",
pod.Name, namespaceOffloading.Namespace, err)
klog.Error(wErr)
return wErr
}
klog.V(5).Infof("ImposedNodeSelector: %s", imposedNodeSelector)

// Enforce the new NodeSelector policy imposed by the NamespaceOffloading creator.
fillPodWithTheNewNodeSelector(imposedNodeSelector, pod)
klog.V(5).Infof("Pod NodeSelector: %s", imposedNodeSelector)
}
klog.V(5).Infof("ImposedNodeSelector: %s", imposedNodeSelector)

// Enforce the new NodeSelector policy imposed by the NamespaceOffloading creator.
fillPodWithTheNewNodeSelector(imposedNodeSelector, pod)
klog.V(5).Infof("Pod NodeSelector: %s", imposedNodeSelector)
return nil
}
8 changes: 4 additions & 4 deletions pkg/webhooks/pod/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,16 @@ type podwh struct {
client client.Client
decoder admission.Decoder

addVirtualNodeToleration bool
runtimeClassName string
}

// New returns a new PodWebhook instance.
func New(cl client.Client, addVirtualNodeToleration bool) *webhook.Admission {
func New(cl client.Client, liqoRuntimeClassName string) *webhook.Admission {
return &webhook.Admission{Handler: &podwh{
client: cl,
decoder: admission.NewDecoder(runtime.NewScheme()),

addVirtualNodeToleration: addVirtualNodeToleration,
runtimeClassName: liqoRuntimeClassName,
}}
}

Expand Down Expand Up @@ -91,7 +91,7 @@ func (w *podwh) Handle(ctx context.Context, req admission.Request) admission.Res
return admission.Errored(http.StatusInternalServerError, errors.New("failed retrieving NamespaceOffloading"))
}

if err = mutatePod(nsoff, pod, w.addVirtualNodeToleration); err != nil {
if err = mutatePod(nsoff, pod, w.runtimeClassName); err != nil {
return admission.Errored(http.StatusInternalServerError, errors.New("failed constructing pod mutation"))
}

Expand Down
Loading

0 comments on commit 36b4b26

Please sign in to comment.