From 858a708dc424b9a564f428710ae62d2c81e31938 Mon Sep 17 00:00:00 2001 From: killianmuldoon Date: Tue, 9 Jul 2024 16:10:58 +0100 Subject: [PATCH] docs: Update wording of install instructions and fix yamls Signed-off-by: killianmuldoon --- docs/getting-started-kubernetes.rst | 454 +++++++++++----------------- 1 file changed, 180 insertions(+), 274 deletions(-) diff --git a/docs/getting-started-kubernetes.rst b/docs/getting-started-kubernetes.rst index a31e8d1..75f24e6 100644 --- a/docs/getting-started-kubernetes.rst +++ b/docs/getting-started-kubernetes.rst @@ -156,21 +156,24 @@ Below are deployment examples, which the ``values.yaml`` file provided to the He Network Operator Deployment with RDMA Shared Device Plugin ---------------------------------------------------------- -First install the Network Operator with NFD enabled +First install the Network Operator with NFD enabled: -``values.yaml`` configuration file for such a deployment: +``values.yaml``: .. code-block:: yaml nfd: enabled: true - sriovNetworkOperator: - enabled: false -Once the Network Operator is installed deploy a NicClusterPolicy with a default version of the OFED driver single RDMA resource mapped to ens1f0 netdev. +Once the Network Operator is installed create a NicClusterPolicy with +* DOCA driver +* RDMA Shared device plugin configured to a netdev with name ens1f0. + + Note: You may need to change the interface names in the NicClusterPolicy to those used by your target nodes. .. code-block:: yaml + apiVersion: mellanox.com/v1alpha1 kind: NicClusterPolicy metadata: @@ -226,22 +229,6 @@ Note: You may need to change the interface names in the NicClusterPolicy to thos } ] } - secondaryNetwork: - cniPlugins: - image: plugins - repository: ghcr.io/k8snetworkplumbingwg - version: v1.3.0 - imagePullSecrets: [] - multus: - image: multus-cni - repository: ghcr.io/k8snetworkplumbingwg - version: v3.9.3 - imagePullSecrets: [] - ipamPlugin: - image: whereabouts - repository: ghcr.io/k8snetworkplumbingwg - version: v0.7.0 - imagePullSecrets: [] -------------------------------------------------------------------------------- Network Operator Deployment with Multiple Resources in RDMA Shared Device Plugin @@ -249,7 +236,7 @@ Network Operator Deployment with Multiple Resources in RDMA Shared Device Plugin -First install the Network Operator with NFD enabled +First install the Network Operator with NFD enabled: ``values.yaml``: @@ -257,13 +244,15 @@ First install the Network Operator with NFD enabled nfd: enabled: true - sriovNetworkOperator: - enabled: false -Once the Network Operator is installed deploy a NicClusterPolicy with the default version of OFED and an RDMA device plugin with two RDMA resources. The first is mapped to ens1f0 and ens1f1, and the second is mapped to ens2f0 and ens2f1. +Once the Network Operator is installed create a NicClusterPolicy with: +* DOCA driver +* RDMA Shared Device pluging with two RDMA resources - the first mapped to ens1f0 and ens1f1 and the second mapped to ens2f0 and ens2f1. + Note: You may need to change the interface names in the NicClusterPolicy to those used by your target nodes. .. code-block:: yaml + apiVersion: mellanox.com/v1alpha1 kind: NicClusterPolicy metadata: @@ -330,22 +319,6 @@ Note: You may need to change the interface names in the NicClusterPolicy to thos } ] } - secondaryNetwork: - cniPlugins: - image: plugins - repository: ghcr.io/k8snetworkplumbingwg - version: v1.3.0 - imagePullSecrets: [] - multus: - image: multus-cni - repository: ghcr.io/k8snetworkplumbingwg - version: v3.9.3 - imagePullSecrets: [] - ipamPlugin: - image: whereabouts - repository: ghcr.io/k8snetworkplumbingwg - version: v0.7.0 - imagePullSecrets: [] ---------------------------------------------------- Network Operator Deployment with a Secondary Network @@ -353,54 +326,26 @@ Network Operator Deployment with a Secondary Network First install the Network Operator with NFD enabled: - - ``values.yaml``: .. code-block:: yaml nfd: enabled: true - sriovNetworkOperator: - enabled: false -Once the Network Operator is installed deploy a NicClusterPolicy with the following enabled: -* RDMA shared device plugin +Once the Network Operator is installed create a NicClusterPolicy with the following enabled: * Secondary network -* Mutlus CNI +* Multus CNI * Container-networking-plugins CNI plugins -* Whereabouts IPAM CNI Plugin +* IPAM Plugin .. code-block:: yaml + apiVersion: mellanox.com/v1alpha1 kind: NicClusterPolicy metadata: name: nic-cluster-policy spec: - rdmaSharedDevicePlugin: - # [map[ifNames:[ens1f0] name:rdma_shared_device_a]] - image: k8s-rdma-shared-dev-plugin - repository: ghcr.io/mellanox - version: 1.4.0 - imagePullSecrets: [] - # The config below directly propagates to k8s-rdma-shared-device-plugin configuration. - # Replace 'devices' with your (RDMA capable) netdevice name. - config: | - { - "configList": [ - { - "resourceName": "rdma_shared_device_a", - "rdmaHcaMax": 63, - "selectors": { - "vendors": [], - "deviceIDs": [], - "drivers": [], - "ifNames": ["ens1f0"], - "linkTypes": [] - } - } - ] - } secondaryNetwork: cniPlugins: image: plugins @@ -415,62 +360,34 @@ Once the Network Operator is installed deploy a NicClusterPolicy with the follow ipamPlugin: image: whereabouts repository: ghcr.io/k8snetworkplumbingwg - version: v0.7.0 + version: |whereabouts-version| imagePullSecrets: [] -------------------------------------------- Network Operator Deployment with NVIDIA-IPAM -------------------------------------------- -Network Operator deployment with: - +First install the Network Operator with NFD enabled: ``values.yaml``: .. code-block:: yaml nfd: enabled: true - sriovNetworkOperator: - enabled: false Once the Network Operator is installed deploy a NicClusterPolicy with the following enabled: - * RDMA shared device plugin * Secondary network * Multus CNI - * Container-networking-plugins - * CNI plugins - * NVIDIA-IPAM CNI Plugin + * Container Networking plugins + * NVIDIA-IPAM plugin .. code-block:: yaml + apiVersion: mellanox.com/v1alpha1 kind: NicClusterPolicy metadata: name: nic-cluster-policy spec: - rdmaSharedDevicePlugin: - # [map[ifNames:[ens1f0] name:rdma_shared_device_a]] - image: k8s-rdma-shared-dev-plugin - repository: ghcr.io/mellanox - version: 1.4.0 - imagePullSecrets: [] - # The config below directly propagates to k8s-rdma-shared-device-plugin configuration. - # Replace 'devices' with your (RDMA capable) netdevice name. - config: | - { - "configList": [ - { - "resourceName": "rdma_shared_device_a", - "rdmaHcaMax": 63, - "selectors": { - "vendors": [], - "deviceIDs": [], - "drivers": [], - "ifNames": ["ens1f0"], - "linkTypes": [] - } - } - ] - } secondaryNetwork: cniPlugins: image: plugins @@ -527,14 +444,6 @@ Example of a MacvlanNetwork that uses NVIDIA-IPAM: Network Operator Deployment with a Host Device Network ------------------------------------------------------ -Network Operator deployment with: - -* SR-IOV device plugin, single SR-IOV resource pool -* Secondary network -* Multus CNI -* Container-networking-plugins CNI plugins -* Whereabouts IPAM CNI plugin - In this mode, the Network Operator could be deployed on virtualized deployments as well. It supports both Ethernet and InfiniBand modes. From the Network Operator perspective, there is no difference between the deployment procedures. To work on a VM (virtual machine), the PCI passthrough must be configured for SR-IOV devices. The Network Operator works both with VF (Virtual Function) and PF (Physical Function) inside the VMs. .. warning:: If the Host Device Network is used without the MLNX_OFED driver, the following packages should be installed: @@ -542,16 +451,23 @@ In this mode, the Network Operator could be deployed on virtualized deployments * the linux-generic package on Ubuntu hosts * the kernel-modules-extra package on the RedHat-based hosts +First install the Network Operator with NFD enabled: ``values.yaml``: .. code-block:: yaml nfd: enabled: true - sriovNetworkOperator: - enabled: false + +Once the Network Operator is installed deploy a NicClusterPolicy with: + * SR-IOV device plugin configured with a single SR-IOV resource pool + * Secondary network + * Multus CNI + * Container Networking plugins + * IPAM plugin .. code-block:: yaml + apiVersion: mellanox.com/v1alpha1 kind: NicClusterPolicy metadata: @@ -595,7 +511,7 @@ In this mode, the Network Operator could be deployed on virtualized deployments ipamPlugin: image: whereabouts repository: ghcr.io/k8snetworkplumbingwg - version: v0.7.0 + version: |whereabouts-version| imagePullSecrets: [] @@ -604,6 +520,7 @@ Following the deployment, the network operator should be configured, and K8s net The ``host-device-net.yaml`` configuration file for such a deployment: .. code-block:: yaml + apiVersion: mellanox.com/v1alpha1 kind: HostDeviceNetwork metadata: @@ -682,6 +599,7 @@ Network Operator Deployment with a Host Device Network and Macvlan Network In this combined deployment, different NVIDIA NICs are used for RDMA Shared Device Plugin and SR-IOV Network Device Plugin in order to work with a Host Device Network or a Macvlan Network on different NICs. It is impossible to combine different networking types on the same NICs. The same principle should be applied for other networking combinations. +First install the Network Operator with NFD enabled: ``values.yaml``: .. code-block:: yaml @@ -689,7 +607,17 @@ In this combined deployment, different NVIDIA NICs are used for RDMA Shared Devi nfd: enabled: true +Once the Network Operator is installed deploy a NicClusterPolicy with: + * RDMA shared device plugin with + * SR-IOV device plugin, single SR-IOV resource pool + * Secondary network + * Multus CNI + * Container-networking-plugins CNI plugins + * RDMA Shared device plugin + * Whereabouts IPAM CNI plugin + .. code-block:: yaml + apiVersion: mellanox.com/v1alpha1 kind: NicClusterPolicy metadata: @@ -757,7 +685,7 @@ In this combined deployment, different NVIDIA NICs are used for RDMA Shared Devi ipamPlugin: image: whereabouts repository: ghcr.io/k8snetworkplumbingwg - version: v0.7.0 + version: |whereabouts-version| imagePullSecrets: [] For pods and network configuration examples please refer to the corresponding sections: Network Operator Deployment with the RDMA Shared Device Plugin and Network Operator Deployment with a Host Device Network. @@ -766,100 +694,102 @@ For pods and network configuration examples please refer to the corresponding se Network Operator Deployment with an IP over InfiniBand (IPoIB) Network ---------------------------------------------------------------------- -Network Operator deployment with: - -* RDMA shared device plugin -* Secondary network -* Multus CNI -* IPoIB CNI -* Whereabouts IPAM CNI plugin In this mode, the Network Operator could be deployed on virtualized deployments as well. It supports both Ethernet and InfiniBand modes. From the Network Operator perspective, there is no difference between the deployment procedures. To work on a VM (virtual machine), the PCI passthrough must be configured for SR-IOV devices. The Network Operator works both with VF (Virtual Function) and PF (Physical Function) inside the VMs. +First install the Network Operator with NFD enabled: ``values.yaml``: .. code-block:: yaml nfd: enabled: true - sriovNetworkOperator: - enabled: false - -apiVersion: mellanox.com/v1alpha1 -kind: NicClusterPolicy -metadata: - name: nic-cluster-policy -spec: - ofedDriver: - image: doca-driver - repository: nvcr.io/nvidia/mellanox - version: 24.04-0.6.6.0-0 - forcePrecompiled: false - imagePullSecrets: [] - terminationGracePeriodSeconds: 300 - startupProbe: - initialDelaySeconds: 10 - periodSeconds: 20 - livenessProbe: - initialDelaySeconds: 30 - periodSeconds: 30 - readinessProbe: - initialDelaySeconds: 10 - periodSeconds: 30 - upgradePolicy: - autoUpgrade: true - maxParallelUpgrades: 1 - safeLoad: false - drain: - enable: true - force: true - podSelector: "" - timeoutSeconds: 300 - deleteEmptyDir: true - rdmaSharedDevicePlugin: - # [map[ifNames:[ibs1f0] name:rdma_shared_device_a]] - image: k8s-rdma-shared-dev-plugin - repository: ghcr.io/mellanox - version: 1.4.0 - imagePullSecrets: [] - # The config below directly propagates to k8s-rdma-shared-device-plugin configuration. - # Replace 'devices' with your (RDMA capable) netdevice name. - config: | - { - "configList": [ + +Once the Network Operator is installed create a NicClusterPolicy with: +* DOCA driver +* RDMA shared device plugin +* Secondary network +* Multus CNI +* IPoIB CNI +* Whereabouts IPAM CNI plugin + +.. code-block:: yaml + + apiVersion: mellanox.com/v1alpha1 + kind: NicClusterPolicy + metadata: + name: nic-cluster-policy + spec: + ofedDriver: + image: doca-driver + repository: nvcr.io/nvidia/mellanox + version: 24.04-0.6.6.0-0 + forcePrecompiled: false + imagePullSecrets: [] + terminationGracePeriodSeconds: 300 + startupProbe: + initialDelaySeconds: 10 + periodSeconds: 20 + livenessProbe: + initialDelaySeconds: 30 + periodSeconds: 30 + readinessProbe: + initialDelaySeconds: 10 + periodSeconds: 30 + upgradePolicy: + autoUpgrade: true + maxParallelUpgrades: 1 + safeLoad: false + drain: + enable: true + force: true + podSelector: "" + timeoutSeconds: 300 + deleteEmptyDir: true + rdmaSharedDevicePlugin: + # [map[ifNames:[ibs1f0] name:rdma_shared_device_a]] + image: k8s-rdma-shared-dev-plugin + repository: ghcr.io/mellanox + version: 1.4.0 + imagePullSecrets: [] + # The config below directly propagates to k8s-rdma-shared-device-plugin configuration. + # Replace 'devices' with your (RDMA capable) netdevice name. + config: | { - "resourceName": "rdma_shared_device_a", - "rdmaHcaMax": 63, - "selectors": { - "vendors": [], - "deviceIDs": [], - "drivers": [], - "ifNames": ["ibs1f0"], - "linkTypes": [] - } + "configList": [ + { + "resourceName": "rdma_shared_device_a", + "rdmaHcaMax": 63, + "selectors": { + "vendors": [], + "deviceIDs": [], + "drivers": [], + "ifNames": ["ibs1f0"], + "linkTypes": [] + } + } + ] } - ] - } - secondaryNetwork: - cniPlugins: - image: plugins - repository: ghcr.io/k8snetworkplumbingwg - version: v1.3.0 - imagePullSecrets: [] - multus: - image: multus-cni - repository: ghcr.io/k8snetworkplumbingwg - version: v3.9.3 - imagePullSecrets: [] - ipoib: - image: ipoib-cni - repository: ghcr.io/mellanox - version: 428715a57c0b633e48ec7620f6e3af6863149ccf - ipamPlugin: - image: whereabouts - repository: ghcr.io/k8snetworkplumbingwg - version: v0.7.0 - imagePullSecrets: [] + secondaryNetwork: + cniPlugins: + image: plugins + repository: ghcr.io/k8snetworkplumbingwg + version: v1.3.0 + imagePullSecrets: [] + multus: + image: multus-cni + repository: ghcr.io/k8snetworkplumbingwg + version: v3.9.3 + imagePullSecrets: [] + ipoib: + image: ipoib-cni + repository: ghcr.io/mellanox + version: 428715a57c0b633e48ec7620f6e3af6863149ccf + ipamPlugin: + image: whereabouts + repository: ghcr.io/k8snetworkplumbingwg + version: |whereabouts-version| + imagePullSecrets: [] Following the deployment, the network operator should be configured, and K8s networking deployed to use it in the pod configuration. @@ -950,16 +880,25 @@ GPUDirect requires the following: * GPU Operator v1.9.0 or newer * NVIDIA GPU and driver supporting GPUDirect e.g Quadro RTX 6000/8000 or NVIDIA T4/NVIDIA V100/NVIDIA A100 -``values.yaml`` example: +First install the Network Operator with NFD enabled: +``values.yaml``: .. code-block:: yaml nfd: enabled: true - sriovNetworkOperator: - enabled: false + +Once the Network Operator is installed create a NicClusterPolicy with: +* DOCA driver +* SR-IOV Device Plugin +* Secondary network +* Multus CNI +* Container Networking plugins +* IPAM plugin + .. code-block:: yaml + apiVersion: mellanox.com/v1alpha1 kind: NicClusterPolicy metadata: @@ -991,30 +930,6 @@ GPUDirect requires the following: podSelector: "" timeoutSeconds: 300 deleteEmptyDir: true - rdmaSharedDevicePlugin: - # [map[name:rdma_shared_device_a rdmaHcaMax:63 vendors:[15b3]]] - image: k8s-rdma-shared-dev-plugin - repository: ghcr.io/mellanox - version: 1.4.0 - imagePullSecrets: [] - # The config below directly propagates to k8s-rdma-shared-device-plugin configuration. - # Replace 'devices' with your (RDMA capable) netdevice name. - config: | - { - "configList": [ - { - "resourceName": "rdma_shared_device_a", - "rdmaHcaMax": 63, - "selectors": { - "vendors": ["15b3"], - "deviceIDs": [], - "drivers": [], - "ifNames": [], - "linkTypes": [] - } - } - ] - } sriovDevicePlugin: image: sriov-network-device-plugin repository: ghcr.io/k8snetworkplumbingwg @@ -1053,7 +968,7 @@ GPUDirect requires the following: ipamPlugin: image: whereabouts repository: ghcr.io/k8snetworkplumbingwg - version: v0.7.0 + version: |whereabouts-version| imagePullSecrets: [] ``host-device-net.yaml:`` @@ -1142,7 +1057,8 @@ Network Operator Deployment in SR-IOV Legacy Mode .. warning:: The SR-IOV Network Operator will be deployed with the default configuration. You can override these settings using a CLI argument, or the ‘sriov-network-operator’ section in the values.yaml file. For more information, refer to the `Project Documentation`_. .. warning:: This deployment mode supports SR-IOV in legacy mode. -``values.yaml`` configuration for such a deployment: +First install the Network Operator with NFD and SRIOV Network Operator enabled: +``values.yaml``: .. code-block:: yaml @@ -1151,7 +1067,15 @@ Network Operator Deployment in SR-IOV Legacy Mode sriovNetworkOperator: enabled: true +Once the Network Operator is installed create a NicClusterPolicy with: +* DOCA driver +* Secondary network +* Multus CNI +* IPoIB CNI +* IPAM CNI plugin + .. code-block:: yaml + apiVersion: mellanox.com/v1alpha1 kind: NicClusterPolicy metadata: @@ -1197,7 +1121,7 @@ Network Operator Deployment in SR-IOV Legacy Mode ipamPlugin: image: whereabouts repository: ghcr.io/k8snetworkplumbingwg - version: v0.7.0 + version: |whereabouts-version| imagePullSecrets: [] Following the deployment, the Network Operator should be configured, and sriovnetwork node policy and K8s networking should be deployed. @@ -1440,6 +1364,7 @@ Network Operator deployment with InfiniBand network requires the following: * InfiniBand device – Both the host device and switch ports must be enabled in InfiniBand mode. * rdma-core package should be installed when an inbox driver is used. +First install the Network Operator with NFD and SR-IOV Network Operator enabled: ``values.yaml`` .. code-block:: yaml @@ -1449,7 +1374,15 @@ Network Operator deployment with InfiniBand network requires the following: sriovNetworkOperator: enabled: true +Once the Network Operator is installed create a NicClusterPolicy with: +* DOCA driver +* Secondary network +* Multus CNI +* Container Networking Plugins +* IPAM plugin + .. code-block:: yaml + apiVersion: mellanox.com/v1alpha1 kind: NicClusterPolicy metadata: @@ -1495,7 +1428,7 @@ Network Operator deployment with InfiniBand network requires the following: ipamPlugin: image: whereabouts repository: ghcr.io/k8snetworkplumbingwg - version: v0.7.0 + version: |whereabouts-version| imagePullSecrets: [] ``sriov-ib-network-node-policy.yaml`` @@ -1616,6 +1549,7 @@ Current limitations: data: UFM_CERTIFICATE: "" +First install the Network Operator with NFD enabled: ``values.yaml`` .. code-block:: yaml @@ -1626,6 +1560,14 @@ Current limitations: enabled: true resourcePrefix: "nvidia.com" +Once the Network Operator is installed create a NicClusterPolicy with: +* DOCA driver +* ibKubernetes +* Secondary network +* Multus CNI +* Container Networking plugins +* IPAM Plugin + .. code-block:: yaml apiVersion: mellanox.com/v1alpha1 @@ -1681,7 +1623,7 @@ Current limitations: ipamPlugin: image: whereabouts repository: ghcr.io/k8snetworkplumbingwg - version: v0.7.0 + version: |whereabouts-version| imagePullSecrets: [] Wait for NVIDIA DOCA Driver to install and apply the following CRs: @@ -1936,7 +1878,7 @@ Network Operator Configuration Deploy network-operator by Helm with sriov-network-operator and nv-ipam. - +First install the Network Operator with NFD enabled: ``values.yaml`` .. code-block:: yaml @@ -1944,52 +1886,16 @@ Deploy network-operator by Helm with sriov-network-operator and nv-ipam. sriovNetworkOperator: enabled: true +Once the Network Operator has been installed create a NicClusterPolicy with: +* NVIPAM + .. code-block:: yaml + apiVersion: mellanox.com/v1alpha1 kind: NicClusterPolicy metadata: name: nic-cluster-policy spec: - rdmaSharedDevicePlugin: - # [map[name:rdma_shared_device_a rdmaHcaMax:63 vendors:[15b3]]] - image: k8s-rdma-shared-dev-plugin - repository: ghcr.io/mellanox - version: 1.4.0 - imagePullSecrets: [] - # The config below directly propagates to k8s-rdma-shared-device-plugin configuration. - # Replace 'devices' with your (RDMA capable) netdevice name. - config: | - { - "configList": [ - { - "resourceName": "rdma_shared_device_a", - "rdmaHcaMax": 63, - "selectors": { - "vendors": ["15b3"], - "deviceIDs": [], - "drivers": [], - "ifNames": [], - "linkTypes": [] - } - } - ] - } - secondaryNetwork: - cniPlugins: - image: plugins - repository: ghcr.io/k8snetworkplumbingwg - version: v1.3.0 - imagePullSecrets: [] - multus: - image: multus-cni - repository: ghcr.io/k8snetworkplumbingwg - version: v3.9.3 - imagePullSecrets: [] - ipamPlugin: - image: whereabouts - repository: ghcr.io/k8snetworkplumbingwg - version: v0.7.0 - imagePullSecrets: [] nvIpam: image: nvidia-k8s-ipam repository: ghcr.io/mellanox