diff --git a/pkg/plugins/generic/generic_plugin.go b/pkg/plugins/generic/generic_plugin.go index bc5b1129e7..257e971a0f 100644 --- a/pkg/plugins/generic/generic_plugin.go +++ b/pkg/plugins/generic/generic_plugin.go @@ -47,13 +47,14 @@ type DriverState struct { type DriverStateMapType map[uint]*DriverState type GenericPlugin struct { - PluginName string - SpecVersion string - DesireState *sriovnetworkv1.SriovNetworkNodeState - LastState *sriovnetworkv1.SriovNetworkNodeState - DriverStateMap DriverStateMapType - RunningOnHost bool - HostManager host.HostManagerInterface + PluginName string + SpecVersion string + DesireState *sriovnetworkv1.SriovNetworkNodeState + LastState *sriovnetworkv1.SriovNetworkNodeState + DriverStateMap DriverStateMapType + DesiredKernelArgs map[string]uint + RunningOnHost bool + HostManager host.HostManagerInterface } const scriptsPath = "bindata/scripts/enable-kargs.sh" @@ -84,11 +85,12 @@ func NewGenericPlugin(runningOnHost bool) (plugin.VendorPlugin, error) { } return &GenericPlugin{ - PluginName: PluginName, - SpecVersion: "1.0", - DriverStateMap: driverStateMap, - RunningOnHost: runningOnHost, - HostManager: host.NewHostManager(runningOnHost), + PluginName: PluginName, + SpecVersion: "1.0", + DriverStateMap: driverStateMap, + DesiredKernelArgs: make(map[string]uint), + RunningOnHost: runningOnHost, + HostManager: host.NewHostManager(runningOnHost), }, nil } @@ -111,7 +113,10 @@ func (p *GenericPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeSt p.DesireState = new needDrain = needDrainNode(new.Spec.Interfaces, new.Status.Interfaces) - needReboot = needRebootNode(new, p.DriverStateMap) + needReboot, err = p.needRebootNode(new) + if err != nil { + return false, false, err + } if needReboot { needDrain = true @@ -196,28 +201,28 @@ func needDriverCheckVdpaType(state *sriovnetworkv1.SriovNetworkNodeState, driver return false } -func tryEnableIommuInKernelArgs() (bool, error) { - glog.Info("generic-plugin tryEnableIommuInKernelArgs()") - args := [2]string{"intel_iommu=on", "iommu=pt"} +// setKernelArg Tries to add the kernel args via ostree or grubby. +func setKernelArg(karg string) (bool, error) { + glog.Info("generic-plugin setKernelArg()") var stdout, stderr bytes.Buffer - cmd := exec.Command("/bin/sh", scriptsPath, args[0], args[1]) + cmd := exec.Command("/bin/sh", scriptsPath, karg) cmd.Stdout = &stdout cmd.Stderr = &stderr if err := cmd.Run(); err != nil { // if grubby is not there log and assume kernel args are set correctly. if isCommandNotFound(err) { - glog.Error("generic-plugin tryEnableIommuInKernelArgs(): grubby command not found. Please ensure that kernel args intel_iommu=on iommu=pt are set") + glog.Errorf("generic-plugin setKernelArg(): grubby or ostree command not found. Please ensure that kernel arg %s are set", karg) return false, nil } - glog.Errorf("generic-plugin tryEnableIommuInKernelArgs(): fail to enable iommu %s: %v", args, err) + glog.Errorf("generic-plugin setKernelArg(): fail to enable kernel arg %s: %v", karg, err) return false, err } i, err := strconv.Atoi(strings.TrimSpace(stdout.String())) if err == nil { if i > 0 { - glog.Infof("generic-plugin tryEnableIommuInKernelArgs(): need to reboot node") + glog.Infof("generic-plugin setKernelArg(): need to reboot node for kernel arg %s", karg) return true, nil } } @@ -233,6 +238,50 @@ func isCommandNotFound(err error) bool { return false } +// addToDesiredKernelArgs Should be called to queue a kernel arg to be added to the node. +func (p *GenericPlugin) addToDesiredKernelArgs(karg string) { + if _, ok := p.DesiredKernelArgs[karg]; !ok { + glog.Infof("generic-plugin addToDesiredKernelArgs(): Adding %s to desired kernel arg", karg) + // element "uint" is a counter of number of attempts to set the kernel argument + p.DesiredKernelArgs[karg] = 0 + } +} + +// syncDesiredKernelArgs Should be called to set all the kernel arguments. Returns bool if node update is needed. +func (p *GenericPlugin) syncDesiredKernelArgs() (bool, error) { + needReboot := false + if len(p.DesiredKernelArgs) != 0 { + return false, nil + } + kargs, err := utils.GetCurrentKernelArgs(false) + if err != nil { + return false, err + } + for desiredKarg, attempts := range p.DesiredKernelArgs { + set := utils.IsKernelArgsSet(kargs, desiredKarg) + if !set { + if attempts > 0 { + glog.Errorf("generic-plugin syncDesiredKernelArgs(): failed to set kernel arg %s with attempts %d", desiredKarg, attempts) + } + // There is a case when we try to set the kernel argument here, the daemon could decide to not reboot because + // the daemon encountered a potentially one-time error. However we always want to make sure that the kernel + // argument is set once the daemon goes through node state sync again. + update, err := setKernelArg(desiredKarg) + if err != nil { + glog.Errorf("generic-plugin syncDesiredKernelArgs(): fail to set kernel arg %s: %v", desiredKarg, err) + return false, err + } + if update { + needReboot = true + glog.V(2).Infof("generic-plugin syncDesiredKernelArgs(): need reboot for setting kernel arg %s", desiredKarg) + } + // Update the number of attempts we tried to set the kernel argument. + p.DesiredKernelArgs[desiredKarg]++ + } + } + return needReboot, nil +} + func needDrainNode(desired sriovnetworkv1.Interfaces, current sriovnetworkv1.InterfaceExts) (needDrain bool) { glog.V(2).Infof("generic-plugin needDrainNode(): current state '%+v', desired state '%+v'", current, desired) needDrain = false @@ -262,33 +311,39 @@ func needDrainNode(desired sriovnetworkv1.Interfaces, current sriovnetworkv1.Int return } -func needRebootIfVfio(state *sriovnetworkv1.SriovNetworkNodeState, driverMap DriverStateMapType) (needReboot bool) { - driverState := driverMap[Vfio] +func (p *GenericPlugin) addVfioDesiredKernelArg(state *sriovnetworkv1.SriovNetworkNodeState) { + driverState := p.DriverStateMap[Vfio] if !driverState.DriverLoaded && driverState.NeedDriverFunc(state, driverState) { - var err error - needReboot, err = tryEnableIommuInKernelArgs() - if err != nil { - glog.Errorf("generic-plugin needRebootNode():fail to enable iommu in kernel args: %v", err) - } - if needReboot { - glog.V(2).Infof("generic-plugin needRebootNode(): need reboot for enabling iommu kernel args") - } + p.addToDesiredKernelArgs(utils.KernelArgIntelIommu) + p.addToDesiredKernelArgs(utils.KernelArgIommuPt) } - return needReboot } -func needRebootNode(state *sriovnetworkv1.SriovNetworkNodeState, driverMap DriverStateMapType) (needReboot bool) { - needReboot = needRebootIfVfio(state, driverMap) - updateNode, err := utils.WriteSwitchdevConfFile(state) +func (p *GenericPlugin) needRebootNode(state *sriovnetworkv1.SriovNetworkNodeState) (needReboot bool, err error) { + needReboot = false + p.addVfioDesiredKernelArg(state) + + updateNode, err := p.syncDesiredKernelArgs() + if err != nil { + glog.Errorf("generic-plugin needRebootNode(): failed to set the desired kernel arguments") + return false, err + } + if updateNode { + glog.V(2).Infof("generic-plugin needRebootNode(): need reboot for updating kernel arguments") + needReboot = true + } + updateNode, err = utils.WriteSwitchdevConfFile(state) if err != nil { glog.Errorf("generic-plugin needRebootNode(): fail to write switchdev device config file") + return false, err } if updateNode { glog.V(2).Infof("generic-plugin needRebootNode(): need reboot for updating switchdev device configuration") + needReboot = true } - needReboot = needReboot || updateNode - return + + return needReboot, nil } // ////////////// for testing purposes only /////////////////////// diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index 231e2ab800..635ca054be 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -32,6 +32,7 @@ const ( sysBusPciDrivers = "/sys/bus/pci/drivers" sysBusPciDriversProbe = "/sys/bus/pci/drivers_probe" sysClassNet = "/sys/class/net" + procKernelCmdLine = "/proc/cmdline" netClass = 0x02 numVfsFile = "sriov_numvfs" @@ -40,6 +41,9 @@ const ( VendorMellanox = "15b3" DeviceBF2 = "a2d6" DeviceBF3 = "a2dc" + + KernelArgIntelIommu = "intel_iommu=on" + KernelArgIommuPt = "iommu=pt" ) var InitialState sriovnetworkv1.SriovNetworkNodeState @@ -51,6 +55,25 @@ func init() { ClusterType = os.Getenv("CLUSTER_TYPE") } +// GetCurrentKernelArgs This retrieves the kernel cmd line arguments +func GetCurrentKernelArgs(chroot bool) (string, error) { + path := procKernelCmdLine + if !chroot { + path = "/host" + path + } + cmdLine, err := os.ReadFile(path) + if err != nil { + return "", fmt.Errorf("GetCurrentKernelArgs(): Error reading %s: %v", procKernelCmdLine, err) + } + return string(cmdLine), nil +} + +// IsKernelArgsSet This checks if the kernel cmd line is set properly. Please note that the same key could be repeated +// several times in the kernel cmd line. We can only ensure that the kernel cmd line has the key/val kernel arg that we set. +func IsKernelArgsSet(cmdLine string, karg string) bool { + return strings.Contains(cmdLine, karg) +} + func DiscoverSriovDevices(withUnsupported bool) ([]sriovnetworkv1.InterfaceExt, error) { glog.V(2).Info("DiscoverSriovDevices") pfList := []sriovnetworkv1.InterfaceExt{}