From 75e7d15d3fb1b879b560a399af1abf92cb5ce098 Mon Sep 17 00:00:00 2001 From: Leonardo Milleri Date: Tue, 18 Jul 2023 12:48:17 +0200 Subject: [PATCH 1/3] Driver loading refactoring Signed-off-by: Leonardo Milleri --- pkg/plugins/generic/generic_plugin.go | 171 +++++++++++++-------- pkg/plugins/generic/generic_plugin_test.go | 118 +++++++++++++- 2 files changed, 224 insertions(+), 65 deletions(-) diff --git a/pkg/plugins/generic/generic_plugin.go b/pkg/plugins/generic/generic_plugin.go index 20c759fc1..c314164dd 100644 --- a/pkg/plugins/generic/generic_plugin.go +++ b/pkg/plugins/generic/generic_plugin.go @@ -19,34 +19,67 @@ import ( var PluginName = "generic_plugin" +// driver id +const ( + Vfio = iota + VirtioVdpa +) + +// driver name +const ( + vfioPciDriver = "vfio_pci" + virtioVdpaDriver = "virtio_vdpa" +) + +// function type for determining if a given driver has to be loaded in the kernel +type needDriver func(state *sriovnetworkv1.SriovNetworkNodeState, driverState *DriverState) bool + +type DriverState struct { + DriverName string + DeviceType string + VdpaType string + NeedDriverFunc needDriver + DriverLoaded bool +} + +type DriverStateMapType map[uint]*DriverState + type GenericPlugin struct { - PluginName string - SpecVersion string - DesireState *sriovnetworkv1.SriovNetworkNodeState - LastState *sriovnetworkv1.SriovNetworkNodeState - LoadVfioDriver uint - LoadVirtioVdpaDriver uint - RunningOnHost bool - HostManager host.HostManagerInterface + PluginName string + SpecVersion string + DesireState *sriovnetworkv1.SriovNetworkNodeState + LastState *sriovnetworkv1.SriovNetworkNodeState + DriverStateMap DriverStateMapType + RunningOnHost bool + HostManager host.HostManagerInterface } const scriptsPath = "bindata/scripts/enable-kargs.sh" -const ( - unloaded = iota - loading - loaded -) - // Initialize our plugin and set up initial values func NewGenericPlugin(runningOnHost bool) (plugin.VendorPlugin, error) { + driverStateMap := make(map[uint]*DriverState) + driverStateMap[Vfio] = &DriverState{ + DriverName: vfioPciDriver, + DeviceType: constants.DeviceTypeVfioPci, + VdpaType: "", + NeedDriverFunc: needDriverCheckDeviceType, + DriverLoaded: false, + } + driverStateMap[VirtioVdpa] = &DriverState{ + DriverName: virtioVdpaDriver, + DeviceType: constants.DeviceTypeNetDevice, + VdpaType: constants.VdpaTypeVirtio, + NeedDriverFunc: needDriverCheckVdpaType, + DriverLoaded: false, + } + return &GenericPlugin{ - PluginName: PluginName, - SpecVersion: "1.0", - LoadVfioDriver: unloaded, - LoadVirtioVdpaDriver: unloaded, - RunningOnHost: runningOnHost, - HostManager: host.NewHostManager(runningOnHost), + PluginName: PluginName, + SpecVersion: "1.0", + DriverStateMap: driverStateMap, + RunningOnHost: runningOnHost, + HostManager: host.NewHostManager(runningOnHost), }, nil } @@ -60,7 +93,7 @@ func (p *GenericPlugin) Spec() string { return p.SpecVersion } -// OnNodeStateChange Invoked when SriovNetworkNodeState CR is created or updated, return if need dain and/or reboot node +// OnNodeStateChange Invoked when SriovNetworkNodeState CR is created or updated, return if need drain and/or reboot node func (p *GenericPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeState) (needDrain bool, needReboot bool, err error) { glog.Info("generic-plugin OnNodeStateChange()") needDrain = false @@ -69,7 +102,7 @@ func (p *GenericPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeSt p.DesireState = new needDrain = needDrainNode(new.Spec.Interfaces, new.Status.Interfaces) - needReboot = needRebootNode(new, &p.LoadVfioDriver, &p.LoadVirtioVdpaDriver) + needReboot = needRebootNode(new, p.DriverStateMap) if needReboot { needDrain = true @@ -77,24 +110,23 @@ func (p *GenericPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeSt return } -// Apply config change -func (p *GenericPlugin) Apply() error { - glog.Infof("generic-plugin Apply(): desiredState=%v", p.DesireState.Spec) - if p.LoadVfioDriver == loading { - if err := p.HostManager.LoadKernelModule("vfio_pci"); err != nil { - glog.Errorf("generic-plugin Apply(): fail to load vfio_pci kmod: %v", err) - return err +func (p *GenericPlugin) syncDriverState() error { + for _, driverState := range p.DriverStateMap { + if !driverState.DriverLoaded && driverState.NeedDriverFunc(p.DesireState, driverState) { + glog.V(2).Infof("loading driver %s", driverState.DriverName) + if err := p.HostManager.LoadKernelModule(driverState.DriverName); err != nil { + glog.Errorf("generic-plugin syncDriverState(): fail to load %s kmod: %v", driverState.DriverName, err) + return err + } + driverState.DriverLoaded = true } - p.LoadVfioDriver = loaded } + return nil +} - if p.LoadVirtioVdpaDriver == loading { - if err := p.HostManager.LoadKernelModule("virtio_vdpa"); err != nil { - glog.Errorf("generic-plugin Apply(): fail to load virtio_vdpa kmod: %v", err) - return err - } - p.LoadVirtioVdpaDriver = loaded - } +// Apply config change +func (p *GenericPlugin) Apply() error { + glog.Infof("generic-plugin Apply(): desiredState=%v", p.DesireState.Spec) if p.LastState != nil { glog.Infof("generic-plugin Apply(): lastStat=%v", p.LastState.Spec) @@ -104,6 +136,10 @@ func (p *GenericPlugin) Apply() error { } } + if err := p.syncDriverState(); err != nil { + return err + } + // Create a map with all the PFs we will need to configure // we need to create it here before we access the host file system using the chroot function // because the skipConfigVf needs the mstconfig package that exist only inside the sriov-config-daemon file system @@ -129,10 +165,10 @@ func (p *GenericPlugin) Apply() error { return nil } -func needVfioDriver(state *sriovnetworkv1.SriovNetworkNodeState) bool { +func needDriverCheckDeviceType(state *sriovnetworkv1.SriovNetworkNodeState, driverState *DriverState) bool { for _, iface := range state.Spec.Interfaces { for i := range iface.VfGroups { - if iface.VfGroups[i].DeviceType == constants.DeviceTypeVfioPci { + if iface.VfGroups[i].DeviceType == driverState.DeviceType { return true } } @@ -140,10 +176,10 @@ func needVfioDriver(state *sriovnetworkv1.SriovNetworkNodeState) bool { return false } -func needVirtioVdpaDriver(state *sriovnetworkv1.SriovNetworkNodeState) bool { +func needDriverCheckVdpaType(state *sriovnetworkv1.SriovNetworkNodeState, driverState *DriverState) bool { for _, iface := range state.Spec.Interfaces { for i := range iface.VfGroups { - if iface.VfGroups[i].VdpaType == constants.VdpaTypeVirtio { + if iface.VfGroups[i].VdpaType == driverState.VdpaType { return true } } @@ -217,35 +253,46 @@ func needDrainNode(desired sriovnetworkv1.Interfaces, current sriovnetworkv1.Int return } -func needRebootNode(state *sriovnetworkv1.SriovNetworkNodeState, loadVfioDriver *uint, loadVirtioVdpaDriver *uint) (needReboot bool) { - needReboot = false - if *loadVfioDriver != loaded { - if needVfioDriver(state) { - *loadVfioDriver = loading - update, err := tryEnableIommuInKernelArgs() - if err != nil { - glog.Errorf("generic-plugin needRebootNode():fail to enable iommu in kernel args: %v", err) - } - if update { - glog.V(2).Infof("generic-plugin needRebootNode(): need reboot for enabling iommu kernel args") - } - needReboot = needReboot || update +func needRebootIfVfio(state *sriovnetworkv1.SriovNetworkNodeState, driverMap DriverStateMapType) (needReboot bool) { + driverState := driverMap[Vfio] + if !driverState.DriverLoaded && driverState.NeedDriverFunc(state, driverState) { + var err error + needReboot, err = tryEnableIommuInKernelArgs() + if err != nil { + glog.Errorf("generic-plugin needRebootNode():fail to enable iommu in kernel args: %v", err) } - } - - if *loadVirtioVdpaDriver != loaded { - if needVirtioVdpaDriver(state) { - *loadVirtioVdpaDriver = loading + if needReboot { + glog.V(2).Infof("generic-plugin needRebootNode(): need reboot for enabling iommu kernel args") } } + return needReboot +} + +func needRebootNode(state *sriovnetworkv1.SriovNetworkNodeState, driverMap DriverStateMapType) (needReboot bool) { + needReboot = needRebootIfVfio(state, driverMap) + updateNode, err := utils.WriteSwitchdevConfFile(state) - update, err := utils.WriteSwitchdevConfFile(state) if err != nil { glog.Errorf("generic-plugin needRebootNode(): fail to write switchdev device config file") } - if update { + if updateNode { glog.V(2).Infof("generic-plugin needRebootNode(): need reboot for updating switchdev device configuration") } - needReboot = needReboot || update + needReboot = needReboot || updateNode return } + +// ////////////// for testing purposes only /////////////////////// +func (p *GenericPlugin) getDriverStateMap() DriverStateMapType { + return p.DriverStateMap +} + +func (p *GenericPlugin) loadDriverForTests(state *sriovnetworkv1.SriovNetworkNodeState) { + for _, driverState := range p.DriverStateMap { + if !driverState.DriverLoaded && driverState.NeedDriverFunc(state, driverState) { + driverState.DriverLoaded = true + } + } +} + +////////////////////////////////////////////////////////////////// diff --git a/pkg/plugins/generic/generic_plugin_test.go b/pkg/plugins/generic/generic_plugin_test.go index 0d8fc0000..4350fbac4 100644 --- a/pkg/plugins/generic/generic_plugin_test.go +++ b/pkg/plugins/generic/generic_plugin_test.go @@ -1,4 +1,4 @@ -package generic_test +package generic import ( "testing" @@ -8,7 +8,6 @@ import ( sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins/generic" ) func TestGenericPlugin(t *testing.T) { @@ -20,7 +19,7 @@ var _ = Describe("Generic plugin", func() { var genericPlugin plugin.VendorPlugin var err error BeforeEach(func() { - genericPlugin, err = generic.NewGenericPlugin(false) + genericPlugin, err = NewGenericPlugin(false) Expect(err).ToNot(HaveOccurred()) }) @@ -126,6 +125,119 @@ var _ = Describe("Generic plugin", func() { Expect(needReboot).To(BeFalse()) Expect(needDrain).To(BeTrue()) }) + + It("should load vfio_pci driver", func() { + networkNodeState := &sriovnetworkv1.SriovNetworkNodeState{ + Spec: sriovnetworkv1.SriovNetworkNodeStateSpec{ + Interfaces: sriovnetworkv1.Interfaces{{ + PciAddress: "0000:00:00.0", + NumVfs: 2, + Mtu: 1500, + VfGroups: []sriovnetworkv1.VfGroup{{ + DeviceType: "vfio-pci", + PolicyName: "policy-1", + ResourceName: "resource-1", + VfRange: "0-1", + Mtu: 1500, + }}}}, + }, + Status: sriovnetworkv1.SriovNetworkNodeStateStatus{ + Interfaces: sriovnetworkv1.InterfaceExts{{ + PciAddress: "0000:00:00.0", + NumVfs: 2, + TotalVfs: 2, + DeviceID: "1015", + Vendor: "15b3", + Name: "sriovif1", + Mtu: 1500, + Mac: "0c:42:a1:55:ee:46", + Driver: "mlx5_core", + EswitchMode: "legacy", + LinkSpeed: "25000 Mb/s", + LinkType: "ETH", + VFs: []sriovnetworkv1.VirtualFunction{{ + PciAddress: "0000:00:00.1", + DeviceID: "1016", + Vendor: "15b3", + VfID: 0, + Driver: "mlx5_core", + Name: "sriovif1v0", + Mtu: 1500, + Mac: "8e:d6:2c:62:87:1b", + }, { + PciAddress: "0000:00:00.2", + DeviceID: "1016", + Vendor: "15b3", + VfID: 0, + Driver: "mlx5_core", + }}, + }}, + }, + } + + concretePlugin := genericPlugin.(*GenericPlugin) + driverStateMap := concretePlugin.getDriverStateMap() + driverState := driverStateMap[Vfio] + concretePlugin.loadDriverForTests(networkNodeState) + Expect(driverState.DriverLoaded).To(BeTrue()) + }) + + It("should load virtio_vdpa driver", func() { + networkNodeState := &sriovnetworkv1.SriovNetworkNodeState{ + Spec: sriovnetworkv1.SriovNetworkNodeStateSpec{ + Interfaces: sriovnetworkv1.Interfaces{{ + PciAddress: "0000:00:00.0", + NumVfs: 2, + Mtu: 1500, + VfGroups: []sriovnetworkv1.VfGroup{{ + DeviceType: "netdevice", + VdpaType: "virtio", + PolicyName: "policy-1", + ResourceName: "resource-1", + VfRange: "0-1", + Mtu: 1500, + }}}}, + }, + Status: sriovnetworkv1.SriovNetworkNodeStateStatus{ + Interfaces: sriovnetworkv1.InterfaceExts{{ + PciAddress: "0000:00:00.0", + NumVfs: 2, + TotalVfs: 2, + DeviceID: "1015", + Vendor: "15b3", + Name: "sriovif1", + Mtu: 1500, + Mac: "0c:42:a1:55:ee:46", + Driver: "mlx5_core", + EswitchMode: "legacy", + LinkSpeed: "25000 Mb/s", + LinkType: "ETH", + VFs: []sriovnetworkv1.VirtualFunction{{ + PciAddress: "0000:00:00.1", + DeviceID: "1016", + Vendor: "15b3", + VfID: 0, + Driver: "mlx5_core", + Name: "sriovif1v0", + Mtu: 1500, + Mac: "8e:d6:2c:62:87:1b", + }, { + PciAddress: "0000:00:00.2", + DeviceID: "1016", + Vendor: "15b3", + VfID: 0, + Driver: "mlx5_core", + }}, + }}, + }, + } + + concretePlugin := genericPlugin.(*GenericPlugin) + driverStateMap := concretePlugin.getDriverStateMap() + driverState := driverStateMap[VirtioVdpa] + concretePlugin.loadDriverForTests(networkNodeState) + Expect(driverState.DriverLoaded).To(BeTrue()) + }) }) }) From 1a5faaeb183bb13a299fe104e2f555ec1f90ee14 Mon Sep 17 00:00:00 2001 From: Leonardo Milleri Date: Thu, 13 Jul 2023 18:08:50 +0200 Subject: [PATCH 2/3] vdpa multiqueue enabled Max number of queues is set to 32 by default. This setting should help with the performance of NVIDIA network cards. Signed-off-by: Leonardo Milleri --- .../files/switchdev-configuration-after-nm.sh.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bindata/manifests/switchdev-config/files/switchdev-configuration-after-nm.sh.yaml b/bindata/manifests/switchdev-config/files/switchdev-configuration-after-nm.sh.yaml index 03de02980..49975dec4 100644 --- a/bindata/manifests/switchdev-config/files/switchdev-configuration-after-nm.sh.yaml +++ b/bindata/manifests/switchdev-config/files/switchdev-configuration-after-nm.sh.yaml @@ -58,8 +58,8 @@ contents: do extract_min_max_ids vdpaType=$(jq -c '.vdpaType' -r <<< $group) - if [ $vfid -le $maxId ] && [ $vfid -ge $minId ] && [ $vdpaType == "virtio" ]; then - vdpa_cmd="vdpa dev add name vdpa:"${VfPciAddr}" mgmtdev pci/"${VfPciAddr} + if [ $vfid -le $maxId ] && [ $vfid -ge $minId ] && [ $vdpaType != "" ]; then + vdpa_cmd="vdpa dev add name vdpa:"${VfPciAddr}" mgmtdev pci/"${VfPciAddr}" max_vqp 32" eval $vdpa_cmd fi done From 688979dfe82a1e908444213d5c8f0e3c08585e5a Mon Sep 17 00:00:00 2001 From: Leonardo Milleri Date: Tue, 18 Jul 2023 12:59:56 +0200 Subject: [PATCH 3/3] vhost-vdpa support SriovNetworkNodePolicy API: VDPA device type can take now the following values: "virtio" and "vhost" Vhost/vdpa: Same behaviour of virtio/vdpa apart for the drivers that are loaded in the kernel (vhost-vdpa instead of virtio-vdpa) Signed-off-by: Leonardo Milleri --- api/v1/helper_test.go | 133 +++++++++++++++++- api/v1/sriovnetworknodepolicy_types.go | 4 +- .../switchdev-configuration-after-nm.sh.yaml | 12 +- ...openshift.io_sriovnetworknodepolicies.yaml | 3 +- .../sriovnetworknodepolicy_controller_test.go | 21 ++- ...openshift.io_sriovnetworknodepolicies.yaml | 3 +- pkg/consts/constants.go | 1 + pkg/plugins/generic/generic_plugin.go | 9 ++ pkg/plugins/generic/generic_plugin_test.go | 57 ++++++++ pkg/webhook/validate.go | 12 +- pkg/webhook/validate_test.go | 86 ++++++++++- 11 files changed, 315 insertions(+), 26 deletions(-) diff --git a/api/v1/helper_test.go b/api/v1/helper_test.go index 15cb435b3..674c64d2c 100644 --- a/api/v1/helper_test.go +++ b/api/v1/helper_test.go @@ -94,7 +94,7 @@ func newNodePolicy() *v1.SriovNetworkNodePolicy { } } -func newVdpaNodePolicy() *v1.SriovNetworkNodePolicy { +func newVirtioVdpaNodePolicy() *v1.SriovNetworkNodePolicy { return &v1.SriovNetworkNodePolicy{ ObjectMeta: metav1.ObjectMeta{ Name: "p1", @@ -102,6 +102,29 @@ func newVdpaNodePolicy() *v1.SriovNetworkNodePolicy { Spec: v1.SriovNetworkNodePolicySpec{ DeviceType: consts.DeviceTypeNetDevice, VdpaType: consts.VdpaTypeVirtio, + NicSelector: v1.SriovNetworkNicSelector{ + PfNames: []string{"ens803f1#2-3"}, + RootDevices: []string{"0000:86:00.1"}, + Vendor: "8086", + }, + NodeSelector: map[string]string{ + "feature.node.kubernetes.io/network-sriov.capable": "true", + }, + NumVfs: 4, + Priority: 99, + ResourceName: "virtiovdpa", + }, + } +} + +func newVhostVdpaNodePolicy() *v1.SriovNetworkNodePolicy { + return &v1.SriovNetworkNodePolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "p1", + }, + Spec: v1.SriovNetworkNodePolicySpec{ + DeviceType: consts.DeviceTypeNetDevice, + VdpaType: consts.VdpaTypeVhost, NicSelector: v1.SriovNetworkNicSelector{ PfNames: []string{"ens803f1"}, RootDevices: []string{"0000:86:00.1"}, @@ -112,7 +135,7 @@ func newVdpaNodePolicy() *v1.SriovNetworkNodePolicy { }, NumVfs: 2, Priority: 99, - ResourceName: "p1res", + ResourceName: "vhostvdpa", }, } } @@ -437,6 +460,55 @@ func TestSriovNetworkNodePolicyApply(t *testing.T) { }, }, }, + { + // vdpa policy with same priority (both virtio and vhost), VfRange's do not overlap so all is merged + tname: "one vdpa policy present same pf same priority partitioning", + currentState: func() *v1.SriovNetworkNodeState { + st := newNodeState() + st.Spec.Interfaces = []v1.Interface{ + { + Name: "ens803f1", + NumVfs: 4, + PciAddress: "0000:86:00.1", + VfGroups: []v1.VfGroup{ + { + DeviceType: consts.DeviceTypeNetDevice, + VdpaType: consts.VdpaTypeVhost, + ResourceName: "vhostvdpa", + VfRange: "0-1", + PolicyName: "p2", + }, + }, + }, + } + return st + }(), + policy: newVirtioVdpaNodePolicy(), + equalP: true, + expectedInterfaces: []v1.Interface{ + { + Name: "ens803f1", + NumVfs: 4, + PciAddress: "0000:86:00.1", + VfGroups: []v1.VfGroup{ + { + DeviceType: consts.DeviceTypeNetDevice, + VdpaType: consts.VdpaTypeVirtio, + ResourceName: "virtiovdpa", + VfRange: "2-3", + PolicyName: "p1", + }, + { + DeviceType: consts.DeviceTypeNetDevice, + VdpaType: consts.VdpaTypeVhost, + ResourceName: "vhostvdpa", + VfRange: "0-1", + PolicyName: "p2", + }, + }, + }, + }, + }, { // policy with same priority that overwrites the 2 present groups in // SriovNetworkNodeState because they overlap VfRange @@ -637,7 +709,7 @@ func TestSriovNetworkNodePolicyApply(t *testing.T) { } } -func TestVdpaNodePolicyApply(t *testing.T) { +func TestVirtioVdpaNodePolicyApply(t *testing.T) { testtable := []struct { tname string currentState *v1.SriovNetworkNodeState @@ -647,20 +719,67 @@ func TestVdpaNodePolicyApply(t *testing.T) { expectedErr bool }{ { - tname: "vdpa configuration", + tname: "virtio/vdpa configuration", currentState: newNodeState(), - policy: newVdpaNodePolicy(), + policy: newVirtioVdpaNodePolicy(), equalP: false, expectedInterfaces: []v1.Interface{ { Name: "ens803f1", - NumVfs: 2, + NumVfs: 4, PciAddress: "0000:86:00.1", VfGroups: []v1.VfGroup{ { DeviceType: consts.DeviceTypeNetDevice, VdpaType: consts.VdpaTypeVirtio, - ResourceName: "p1res", + ResourceName: "virtiovdpa", + VfRange: "2-3", + PolicyName: "p1", + }, + }, + }, + }, + }, + } + for _, tc := range testtable { + t.Run(tc.tname, func(t *testing.T) { + err := tc.policy.Apply(tc.currentState, tc.equalP) + if tc.expectedErr && err == nil { + t.Errorf("Apply expecting error.") + } else if !tc.expectedErr && err != nil { + t.Errorf("Apply error:\n%s", err) + } + if diff := cmp.Diff(tc.expectedInterfaces, tc.currentState.Spec.Interfaces); diff != "" { + t.Errorf("SriovNetworkNodeState spec diff (-want +got):\n%s", diff) + } + }) + } +} + +func TestVhostVdpaNodePolicyApply(t *testing.T) { + testtable := []struct { + tname string + currentState *v1.SriovNetworkNodeState + policy *v1.SriovNetworkNodePolicy + expectedInterfaces v1.Interfaces + equalP bool + expectedErr bool + }{ + { + tname: "vhost/vdpa configuration", + currentState: newNodeState(), + policy: newVhostVdpaNodePolicy(), + equalP: false, + expectedInterfaces: []v1.Interface{ + { + Name: "ens803f1", + NumVfs: 2, + PciAddress: "0000:86:00.1", + VfGroups: []v1.VfGroup{ + { + DeviceType: consts.DeviceTypeNetDevice, + VdpaType: consts.VdpaTypeVhost, + ResourceName: "vhostvdpa", VfRange: "0-1", PolicyName: "p1", }, diff --git a/api/v1/sriovnetworknodepolicy_types.go b/api/v1/sriovnetworknodepolicy_types.go index 8955c300d..17e22d2fa 100644 --- a/api/v1/sriovnetworknodepolicy_types.go +++ b/api/v1/sriovnetworknodepolicy_types.go @@ -54,8 +54,8 @@ type SriovNetworkNodePolicySpec struct { // +kubebuilder:validation:Enum=legacy;switchdev // NIC Device Mode. Allowed value "legacy","switchdev". EswitchMode string `json:"eSwitchMode,omitempty"` - // +kubebuilder:validation:Enum=virtio - // VDPA device type. Allowed value "virtio" + // +kubebuilder:validation:Enum=virtio;vhost + // VDPA device type. Allowed value "virtio", "vhost" VdpaType string `json:"vdpaType,omitempty"` // Exclude device's NUMA node when advertising this resource by SRIOV network device plugin. Default to false. ExcludeTopology bool `json:"excludeTopology,omitempty"` diff --git a/bindata/manifests/switchdev-config/files/switchdev-configuration-after-nm.sh.yaml b/bindata/manifests/switchdev-config/files/switchdev-configuration-after-nm.sh.yaml index 49975dec4..00a4019b4 100644 --- a/bindata/manifests/switchdev-config/files/switchdev-configuration-after-nm.sh.yaml +++ b/bindata/manifests/switchdev-config/files/switchdev-configuration-after-nm.sh.yaml @@ -58,9 +58,17 @@ contents: do extract_min_max_ids vdpaType=$(jq -c '.vdpaType' -r <<< $group) - if [ $vfid -le $maxId ] && [ $vfid -ge $minId ] && [ $vdpaType != "" ]; then - vdpa_cmd="vdpa dev add name vdpa:"${VfPciAddr}" mgmtdev pci/"${VfPciAddr}" max_vqp 32" + if [ $vfid -le $maxId ] && [ $vfid -ge $minId ] && ([ $vdpaType == "virtio" ] || [ $vdpaType == "vhost" ]); then + vdpa_name=vdpa:${VfPciAddr} + vdpa_cmd="vdpa dev add name "${vdpa_name}" mgmtdev pci/"${VfPciAddr}" max_vqp 32" eval $vdpa_cmd + # set the driver_override. When the specified driver will be loaded in the kernel + # it will be automatically bound to the vdpa device + driver_override=virtio_vdpa + if [ $vdpaType == "vhost" ]; then + driver_override=vhost_vdpa + fi + echo $driver_override > /sys/bus/vdpa/devices/$vdpa_name/driver_override fi done done diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml index 5bebf01e0..a81c6a227 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml @@ -117,9 +117,10 @@ spec: description: SRIOV Network device plugin endpoint resource name type: string vdpaType: - description: VDPA device type. Allowed value "virtio" + description: VDPA device type. Allowed value "virtio", "vhost" enum: - virtio + - vhost type: string required: - nicSelector diff --git a/controllers/sriovnetworknodepolicy_controller_test.go b/controllers/sriovnetworknodepolicy_controller_test.go index 798d756cd..bd4579d78 100644 --- a/controllers/sriovnetworknodepolicy_controller_test.go +++ b/controllers/sriovnetworknodepolicy_controller_test.go @@ -152,7 +152,7 @@ func TestRenderDevicePluginConfigData(t *testing.T) { expResource dptypes.ResourceConfList }{ { - tname: "testVdpaVirtio", + tname: "testVirtioVdpaVirtio", policy: sriovnetworkv1.SriovNetworkNodePolicy{ Spec: v1.SriovNetworkNodePolicySpec{ ResourceName: "resourceName", @@ -170,6 +170,25 @@ func TestRenderDevicePluginConfigData(t *testing.T) { }, }, }, + }, { + tname: "testVhostVdpaVirtio", + policy: sriovnetworkv1.SriovNetworkNodePolicy{ + Spec: v1.SriovNetworkNodePolicySpec{ + ResourceName: "resourceName", + DeviceType: consts.DeviceTypeNetDevice, + VdpaType: consts.VdpaTypeVhost, + }, + }, + expResource: dptypes.ResourceConfList{ + ResourceList: []dptypes.ResourceConfig{ + { + ResourceName: "resourceName", + Selectors: mustMarshallSelector(t, &dptypes.NetDeviceSelectors{ + VdpaType: dptypes.VdpaType(consts.VdpaTypeVhost), + }), + }, + }, + }, }, { tname: "testExcludeTopology", diff --git a/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml b/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml index 5bebf01e0..a81c6a227 100644 --- a/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml +++ b/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml @@ -117,9 +117,10 @@ spec: description: SRIOV Network device plugin endpoint resource name type: string vdpaType: - description: VDPA device type. Allowed value "virtio" + description: VDPA device type. Allowed value "virtio", "vhost" enum: - virtio + - vhost type: string required: - nicSelector diff --git a/pkg/consts/constants.go b/pkg/consts/constants.go index 72e847d82..1f37edfa4 100644 --- a/pkg/consts/constants.go +++ b/pkg/consts/constants.go @@ -32,4 +32,5 @@ const ( DeviceTypeVfioPci = "vfio-pci" DeviceTypeNetDevice = "netdevice" VdpaTypeVirtio = "virtio" + VdpaTypeVhost = "vhost" ) diff --git a/pkg/plugins/generic/generic_plugin.go b/pkg/plugins/generic/generic_plugin.go index c314164dd..bc5b1129e 100644 --- a/pkg/plugins/generic/generic_plugin.go +++ b/pkg/plugins/generic/generic_plugin.go @@ -23,12 +23,14 @@ var PluginName = "generic_plugin" const ( Vfio = iota VirtioVdpa + VhostVdpa ) // driver name const ( vfioPciDriver = "vfio_pci" virtioVdpaDriver = "virtio_vdpa" + vhostVdpaDriver = "vhost_vdpa" ) // function type for determining if a given driver has to be loaded in the kernel @@ -73,6 +75,13 @@ func NewGenericPlugin(runningOnHost bool) (plugin.VendorPlugin, error) { NeedDriverFunc: needDriverCheckVdpaType, DriverLoaded: false, } + driverStateMap[VhostVdpa] = &DriverState{ + DriverName: vhostVdpaDriver, + DeviceType: constants.DeviceTypeNetDevice, + VdpaType: constants.VdpaTypeVhost, + NeedDriverFunc: needDriverCheckVdpaType, + DriverLoaded: false, + } return &GenericPlugin{ PluginName: PluginName, diff --git a/pkg/plugins/generic/generic_plugin_test.go b/pkg/plugins/generic/generic_plugin_test.go index 4350fbac4..dc8201448 100644 --- a/pkg/plugins/generic/generic_plugin_test.go +++ b/pkg/plugins/generic/generic_plugin_test.go @@ -238,6 +238,63 @@ var _ = Describe("Generic plugin", func() { concretePlugin.loadDriverForTests(networkNodeState) Expect(driverState.DriverLoaded).To(BeTrue()) }) + + It("should load vhost_vdpa driver", func() { + networkNodeState := &sriovnetworkv1.SriovNetworkNodeState{ + Spec: sriovnetworkv1.SriovNetworkNodeStateSpec{ + Interfaces: sriovnetworkv1.Interfaces{{ + PciAddress: "0000:00:00.0", + NumVfs: 2, + Mtu: 1500, + VfGroups: []sriovnetworkv1.VfGroup{{ + DeviceType: "netdevice", + VdpaType: "vhost", + PolicyName: "policy-1", + ResourceName: "resource-1", + VfRange: "0-1", + Mtu: 1500, + }}}}, + }, + Status: sriovnetworkv1.SriovNetworkNodeStateStatus{ + Interfaces: sriovnetworkv1.InterfaceExts{{ + PciAddress: "0000:00:00.0", + NumVfs: 2, + TotalVfs: 2, + DeviceID: "1015", + Vendor: "15b3", + Name: "sriovif1", + Mtu: 1500, + Mac: "0c:42:a1:55:ee:46", + Driver: "mlx5_core", + EswitchMode: "legacy", + LinkSpeed: "25000 Mb/s", + LinkType: "ETH", + VFs: []sriovnetworkv1.VirtualFunction{{ + PciAddress: "0000:00:00.1", + DeviceID: "1016", + Vendor: "15b3", + VfID: 0, + Driver: "mlx5_core", + Name: "sriovif1v0", + Mtu: 1500, + Mac: "8e:d6:2c:62:87:1b", + }, { + PciAddress: "0000:00:00.2", + DeviceID: "1016", + Vendor: "15b3", + VfID: 0, + Driver: "mlx5_core", + }}, + }}, + }, + } + + concretePlugin := genericPlugin.(*GenericPlugin) + driverStateMap := concretePlugin.getDriverStateMap() + driverState := driverStateMap[VhostVdpa] + concretePlugin.loadDriverForTests(networkNodeState) + Expect(driverState.DriverLoaded).To(BeTrue()) + }) }) }) diff --git a/pkg/webhook/validate.go b/pkg/webhook/validate.go index 91b3922fa..571d93d37 100644 --- a/pkg/webhook/validate.go +++ b/pkg/webhook/validate.go @@ -203,12 +203,12 @@ func staticValidateSriovNetworkNodePolicy(cr *sriovnetworkv1.SriovNetworkNodePol } // vdpa: deviceType must be set to 'netdevice' - if cr.Spec.DeviceType != constants.DeviceTypeNetDevice && cr.Spec.VdpaType == constants.VdpaTypeVirtio { - return false, fmt.Errorf("'deviceType: %s' conflicts with 'vdpaType: virtio'; Set 'deviceType' to (string)'netdevice' Or Remove 'vdpaType'", cr.Spec.DeviceType) + if cr.Spec.DeviceType != constants.DeviceTypeNetDevice && (cr.Spec.VdpaType == constants.VdpaTypeVirtio || cr.Spec.VdpaType == constants.VdpaTypeVhost) { + return false, fmt.Errorf("'deviceType: %s' conflicts with '%s'; Set 'deviceType' to (string)'netdevice' Or Remove 'vdpaType'", cr.Spec.DeviceType, cr.Spec.VdpaType) } // vdpa: device must be configured in switchdev mode - if cr.Spec.VdpaType == constants.VdpaTypeVirtio && cr.Spec.EswitchMode != sriovnetworkv1.ESwithModeSwitchDev { - return false, fmt.Errorf("virtio/vdpa requires the device to be configured in switchdev mode") + if (cr.Spec.VdpaType == constants.VdpaTypeVirtio || cr.Spec.VdpaType == constants.VdpaTypeVhost) && cr.Spec.EswitchMode != sriovnetworkv1.ESwithModeSwitchDev { + return false, fmt.Errorf("vdpa requires the device to be configured in switchdev mode") } return true, nil } @@ -286,8 +286,8 @@ func validatePolicyForNodeState(policy *sriovnetworkv1.SriovNetworkNodePolicy, s return fmt.Errorf("numVfs(%d) in CR %s exceed the maximum allowed value(%d)", policy.Spec.NumVfs, policy.GetName(), MlxMaxVFs) } // vdpa: only mellanox cards are supported - if policy.Spec.VdpaType == constants.VdpaTypeVirtio && iface.Vendor != MellanoxID { - return fmt.Errorf("vendor(%s) in CR %s not supported for virtio-vdpa", iface.Vendor, policy.GetName()) + if (policy.Spec.VdpaType == constants.VdpaTypeVirtio || policy.Spec.VdpaType == constants.VdpaTypeVhost) && iface.Vendor != MellanoxID { + return fmt.Errorf("vendor(%s) in CR %s not supported for vdpa", iface.Vendor, policy.GetName()) } } } diff --git a/pkg/webhook/validate_test.go b/pkg/webhook/validate_test.go index a82796312..965504770 100644 --- a/pkg/webhook/validate_test.go +++ b/pkg/webhook/validate_test.go @@ -538,7 +538,7 @@ func TestStaticValidateSriovNetworkNodePolicyWithConflictIsRdmaAndDeviceType(t * g.Expect(ok).To(Equal(false)) } -func TestStaticValidateSriovNetworkNodePolicyWithConflictDeviceTypeAndVdpaType(t *testing.T) { +func TestStaticValidateSriovNetworkNodePolicyWithConflictDeviceTypeAndVirtioVdpaType(t *testing.T) { policy := &SriovNetworkNodePolicy{ Spec: SriovNetworkNodePolicySpec{ DeviceType: constants.DeviceTypeVfioPci, @@ -558,11 +558,35 @@ func TestStaticValidateSriovNetworkNodePolicyWithConflictDeviceTypeAndVdpaType(t } g := NewGomegaWithT(t) ok, err := staticValidateSriovNetworkNodePolicy(policy) - g.Expect(err).To(MatchError(ContainSubstring("'deviceType: vfio-pci' conflicts with 'vdpaType: virtio'"))) + g.Expect(err).To(MatchError(ContainSubstring("'deviceType: vfio-pci' conflicts with 'virtio'"))) g.Expect(ok).To(Equal(false)) } -func TestStaticValidateSriovNetworkNodePolicyVdpaMustSpecifySwitchDev(t *testing.T) { +func TestStaticValidateSriovNetworkNodePolicyWithConflictDeviceTypeAndVhostVdpaType(t *testing.T) { + policy := &SriovNetworkNodePolicy{ + Spec: SriovNetworkNodePolicySpec{ + DeviceType: constants.DeviceTypeVfioPci, + NicSelector: SriovNetworkNicSelector{ + Vendor: "15b3", + DeviceID: "101d", + }, + NodeSelector: map[string]string{ + "feature.node.kubernetes.io/network-sriov.capable": "true", + }, + NumVfs: 1, + Priority: 99, + ResourceName: "p0", + VdpaType: constants.VdpaTypeVhost, + EswitchMode: "switchdev", + }, + } + g := NewGomegaWithT(t) + ok, err := staticValidateSriovNetworkNodePolicy(policy) + g.Expect(err).To(MatchError(ContainSubstring("'deviceType: vfio-pci' conflicts with 'vhost'"))) + g.Expect(ok).To(Equal(false)) +} + +func TestStaticValidateSriovNetworkNodePolicyVirtioVdpaMustSpecifySwitchDev(t *testing.T) { policy := &SriovNetworkNodePolicy{ Spec: SriovNetworkNodePolicySpec{ DeviceType: "netdevice", @@ -581,11 +605,34 @@ func TestStaticValidateSriovNetworkNodePolicyVdpaMustSpecifySwitchDev(t *testing } g := NewGomegaWithT(t) ok, err := staticValidateSriovNetworkNodePolicy(policy) - g.Expect(err).To(MatchError(ContainSubstring("virtio/vdpa requires the device to be configured in switchdev mode"))) + g.Expect(err).To(MatchError(ContainSubstring("vdpa requires the device to be configured in switchdev mode"))) g.Expect(ok).To(Equal(false)) } -func TestValidatePolicyForNodeStateVdpaWithNotSupportedVendor(t *testing.T) { +func TestStaticValidateSriovNetworkNodePolicyVhostVdpaMustSpecifySwitchDev(t *testing.T) { + policy := &SriovNetworkNodePolicy{ + Spec: SriovNetworkNodePolicySpec{ + DeviceType: "netdevice", + NicSelector: SriovNetworkNicSelector{ + Vendor: "15b3", + DeviceID: "101d", + }, + NodeSelector: map[string]string{ + "feature.node.kubernetes.io/network-sriov.capable": "true", + }, + NumVfs: 1, + Priority: 99, + ResourceName: "p0", + VdpaType: constants.VdpaTypeVhost, + }, + } + g := NewGomegaWithT(t) + ok, err := staticValidateSriovNetworkNodePolicy(policy) + g.Expect(err).To(MatchError(ContainSubstring("vdpa requires the device to be configured in switchdev mode"))) + g.Expect(ok).To(Equal(false)) +} + +func TestValidatePolicyForNodeStateVirtioVdpaWithNotSupportedVendor(t *testing.T) { state := newNodeState() policy := &SriovNetworkNodePolicy{ ObjectMeta: metav1.ObjectMeta{ @@ -609,7 +656,34 @@ func TestValidatePolicyForNodeStateVdpaWithNotSupportedVendor(t *testing.T) { } g := NewGomegaWithT(t) err := validatePolicyForNodeState(policy, state, NewNode()) - g.Expect(err).To(MatchError(ContainSubstring(fmt.Sprintf("vendor(%s) in CR %s not supported for virtio-vdpa", state.Status.Interfaces[0].Vendor, policy.Name)))) + g.Expect(err).To(MatchError(ContainSubstring(fmt.Sprintf("vendor(%s) in CR %s not supported for vdpa", state.Status.Interfaces[0].Vendor, policy.Name)))) +} + +func TestValidatePolicyForNodeStateVhostVdpaWithNotSupportedVendor(t *testing.T) { + state := newNodeState() + policy := &SriovNetworkNodePolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "p1", + }, + Spec: SriovNetworkNodePolicySpec{ + DeviceType: "netdevice", + VdpaType: "vhost", + NicSelector: SriovNetworkNicSelector{ + PfNames: []string{"ens803f0"}, + RootDevices: []string{"0000:86:00.0"}, + Vendor: "8086", + }, + NodeSelector: map[string]string{ + "feature.node.kubernetes.io/network-sriov.capable": "true", + }, + NumVfs: 4, + Priority: 99, + ResourceName: "p0", + }, + } + g := NewGomegaWithT(t) + err := validatePolicyForNodeState(policy, state, NewNode()) + g.Expect(err).To(MatchError(ContainSubstring(fmt.Sprintf("vendor(%s) in CR %s not supported for vdpa", state.Status.Interfaces[0].Vendor, policy.Name)))) } func TestValidatePolicyForNodeStateWithInvalidDevice(t *testing.T) {