Skip to content

Commit

Permalink
Error out when Kernel Parameters cannot be set
Browse files Browse the repository at this point in the history
We need to make sure that the kernel parameters are set, thus we should
pass the errors up to the config daemon.

Signed-off-by: William Zhao <[email protected]>
  • Loading branch information
wizhaoredhat committed Aug 15, 2023
1 parent f63b13d commit c52c3cc
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 28 deletions.
47 changes: 29 additions & 18 deletions pkg/plugins/generic/generic_plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ func (p *GenericPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeSt
p.DesireState = new

needDrain = needDrainNode(new.Spec.Interfaces, new.Status.Interfaces)
needReboot = p.needRebootNode(new)
needReboot, err = p.needRebootNode(new)

if needReboot {
needDrain = true
Expand Down Expand Up @@ -202,30 +202,34 @@ func (p *GenericPlugin) AddToDesiredKernelParams(kparam string) {
}
}

// SetAllDesiredKernelParams Should be called to set all the kernel parameters. Returns true if reboot of the node is needed.
func (p *GenericPlugin) SetAllDesiredKernelParams() bool {
needReboot := false
// SetAllDesiredKernelParams Should be called to set all the kernel parameters. Updates needReboot if reboot is needed.
func (p *GenericPlugin) SetAllDesiredKernelParams(needReboot *bool) error {
for kparam, attempts := range p.DesiredKernelParams {
if !utils.IsKernelCmdLineParamSet(kparam, false) {
if attempts > 0 && attempts <= 4 {
glog.Errorf("generic-plugin SetAllDesiredKernelParams(): Fail to set kernel param %s after reboot with attempts %d", kparam, attempts)
} else if attempts > 4 {
// If we tried several times and was unsuccessful, we should give up.
continue
set, err := utils.IsKernelCmdLineParamSet(kparam, false)
if err != nil {
return err
}
if !set {
if attempts > 0 {
glog.Errorf("generic-plugin SetAllDesiredKernelParams(): failed to set kernel param %s with attempts %d", kparam, attempts)
}
// There is a case when we try to set the kernel parameter here, the daemon could decide to not reboot because
// the daemon encountered a potentially one-time error. However we always want to make sure that the kernel
// parameter is set once the daemon goes through node state sync again.
update, err := trySetKernelParam(kparam)
if err != nil {
glog.Errorf("generic-plugin SetAllDesiredKernelParams(): Fail to set kernel param %s: %v", kparam, err)
glog.Errorf("generic-plugin SetAllDesiredKernelParams(): fail to set kernel param %s: %v", kparam, err)
return err
}
if update {
glog.V(2).Infof("generic-plugin SetAllDesiredKernelParams(): Need reboot for setting kernel param %s", kparam)
glog.V(2).Infof("generic-plugin SetAllDesiredKernelParams(): need reboot for setting kernel param %s", kparam)
}
// Update the number of attempts we tried to set the kernel parameter.
p.DesiredKernelParams[kparam]++
needReboot = needReboot || update
*needReboot = *needReboot || update
}
}
return needReboot
return nil
}

func needDrainNode(desired sriovnetworkv1.Interfaces, current sriovnetworkv1.InterfaceExts) (needDrain bool) {
Expand Down Expand Up @@ -257,8 +261,8 @@ func needDrainNode(desired sriovnetworkv1.Interfaces, current sriovnetworkv1.Int
return
}

func (p *GenericPlugin) needRebootNode(state *sriovnetworkv1.SriovNetworkNodeState) (needReboot bool) {
needReboot = false
func (p *GenericPlugin) needRebootNode(state *sriovnetworkv1.SriovNetworkNodeState) (bool, error) {
needReboot := false
if p.LoadVfioDriver != loaded {
if needVfioDriver(state) {
p.LoadVfioDriver = loading
Expand All @@ -273,13 +277,20 @@ func (p *GenericPlugin) needRebootNode(state *sriovnetworkv1.SriovNetworkNodeSta
}
}

err := p.SetAllDesiredKernelParams(&needReboot)
if err != nil {
glog.Errorf("generic-plugin needRebootNode(): failed to set the desired kernel parameters")
return false, err
}

update, err := utils.WriteSwitchdevConfFile(state)
if err != nil {
glog.Errorf("generic-plugin needRebootNode(): fail to write switchdev device config file")
return false, err
}
if update {
glog.V(2).Infof("generic-plugin needRebootNode(): need reboot for updating switchdev device configuration")
}
needReboot = needReboot || update || p.SetAllDesiredKernelParams()
return
needReboot = needReboot || update
return needReboot, nil
}
16 changes: 6 additions & 10 deletions pkg/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ const (
sysBusPciDrivers = "/sys/bus/pci/drivers"
sysBusPciDriversProbe = "/sys/bus/pci/drivers_probe"
sysClassNet = "/sys/class/net"
sysKernelCmdLine = "/proc/cmdline"
procKernelCmdLine = "/proc/cmdline"
netClass = 0x02
numVfsFile = "sriov_numvfs"

Expand All @@ -57,24 +57,20 @@ func init() {
}

// IsKernelCmdLineParamSet This checks if the kernel cmd line is set properly.
func IsKernelCmdLineParamSet(param string, chroot bool) bool {
path := sysKernelCmdLine
func IsKernelCmdLineParamSet(param string, chroot bool) (bool, error) {
path := procKernelCmdLine
if !chroot {
path = "/host" + path
}
cmdLine, err := os.ReadFile(path)
if err != nil {
glog.Warningf("IsKernelCmdLineParamSet(): Error reading %s, Please check the Kernel Params manually!", sysKernelCmdLine)
// Although intuitively we should report false here. However if we cannot reliably read the Kernel's cmd line params
// then it is unknown whether it was successfully set, and to avoid unnecessary reboots we should just post a warning
// and return true.
return true
return false, fmt.Errorf("IsKernelCmdLineParamSet(): Error reading %s: %v", procKernelCmdLine, err)
}

if strings.Contains(string(cmdLine), param) {
return true
return true, nil
}
return false
return false, nil
}

func DiscoverSriovDevices(withUnsupported bool) ([]sriovnetworkv1.InterfaceExt, error) {
Expand Down

0 comments on commit c52c3cc

Please sign in to comment.