Skip to content

Commit

Permalink
deploy: 6529a8b
Browse files Browse the repository at this point in the history
  • Loading branch information
rollandf committed Jul 11, 2024
1 parent 1a642dc commit 62f04d6
Show file tree
Hide file tree
Showing 3 changed files with 240 additions and 19 deletions.
199 changes: 186 additions & 13 deletions _downloads/46bd948566ee03504c6421cadd8047fa/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -426,19 +426,19 @@ function restart_driver() {
exec_cmd "modprobe -d /host tls"
exec_cmd "modprobe -d /host psample"

load_pci-hyperv-intf=false
load_pci_hyperv_intf=false

# ARM does not contain relevant packages, also not a blocker for this OS type for mlx5_core load
if [ "${ARCH}" != "aarch64" ]; then
if ! ${IS_OS_UBUNTU}; then
redhat_fetch_major_ver
[[ $RHEL_MAJOR_VERSION -ge $RH_RT_MIN_MAJOR_VER ]] && load_pci-hyperv-intf=true
[[ $RHEL_MAJOR_VERSION -ge $RH_RT_MIN_MAJOR_VER ]] && load_pci_hyperv_intf=true
else
load_pci-hyperv-intf=true
load_pci_hyperv_intf=true
fi
fi

${load_pci-hyperv-intf} && exec_cmd "modprobe -d /host pci-hyperv-intf"
${load_pci_hyperv_intf} && exec_cmd "modprobe -d /host pci-hyperv-intf"

${UNLOAD_STORAGE_MODULES} && unload_storage_modules

Expand Down Expand Up @@ -495,13 +495,15 @@ function find_mlx_devs() {
dev_guid="-"
fi

dev_record="$pci_addr $dev_type $dev_name $dev_operstate $dev_mtu $pf_numvfs $dev_guid"
eswitch_mode=$(devlink dev eswitch show pci/$pci_addr 2>/dev/null | awk '{for (i=1; i<=NF; i++) if ($i == "mode") {print $(i+1); exit}}')

dev_record="$pci_addr $dev_type $dev_name $dev_operstate $dev_mtu $pf_numvfs $dev_guid $eswitch_mode"
debug_print "Storing device record [${mlx_dev_record_idx}] $dev_record"
mlx_devs_arr[${mlx_dev_record_idx}]=$dev_record
# Example:
# pci_addr dev_type dev_name dev_operstate dev_mtu pf_numvfs dev_guid (For IB only)
# 0000:08:00.0 eth eth2 up 1500 4 0c42:a103:0016:054c
# 0000:08:00.1 eth eth3 up 1500 0 0c42:a103:0016:054d
# pci_addr dev_type dev_name dev_operstate dev_mtu pf_numvfs dev_guid (For IB only) eswitch_mode
# 0000:08:00.0 eth eth2 up 1500 4 0c42:a103:0016:054c legacy
# 0000:08:00.1 eth eth3 up 1500 0 0c42:a103:0016:054d switchdev

mlx_dev_record_idx=$((mlx_dev_record_idx+1))

Expand Down Expand Up @@ -532,12 +534,13 @@ function find_mlx_vfs() {
while [ ${num_mlx_devices} -gt 0 ]; do
declare -a mlx_dev_info=(${mlx_devs_arr[$((num_mlx_devices-1))]})

# pci_addr dev_type dev_name dev_operstate dev_mtu pf_numvfs dev_guid
# [0] [1] [2] [3] [4] [5] [6]
# pci_addr dev_type dev_name dev_operstate dev_mtu pf_numvfs dev_guid eswitch_mode
# [0] [1] [2] [3] [4] [5] [6] [7]
mlnx_dev_pci=${mlx_dev_info[0]}
mlnx_dev_type=${mlx_dev_info[1]}
mlnx_dev_name=${mlx_dev_info[2]}
mlnx_dev_numvfs=${mlx_dev_info[5]}
mlnx_dev_eswitch_mode=${mlx_dev_info[7]}

if [ "${mlnx_dev_numvfs}" == "0" ]; then
debug_print "Device ${mlnx_dev_name} does not have open VFs, skipping"
Expand All @@ -546,6 +549,19 @@ function find_mlx_vfs() {
continue
fi

if [ "${mlnx_dev_eswitch_mode}" == "switchdev" ]; then
representor_regex="^pf([0-9]+)vf([0-9]+)$"

mlnx_dev_phys_port_name=$(cat /sys/class/net/"${mlnx_dev_name}"/phys_port_name 2>/dev/null)
# Use regex to parse the string and extract numbers
if [[ "$mlnx_dev_phys_port_name" =~ $representor_regex ]]; then
debug_print "Device ${mlnx_dev_name} is a switchdev representor, not a PF, skipping"

num_mlx_devices=$(($num_mlx_devices-1))
continue
fi
fi

found_sriov_conf=$(($found_sriov_conf+1))

debug_print "Fetching [$mlnx_dev_numvfs] VFs info for PCI device: ${mlnx_dev_pci} ($mlnx_dev_name)"
Expand Down Expand Up @@ -584,6 +600,95 @@ function find_mlx_vfs() {
done
}

function find_switchdev_representors() {
debug_print "Function: ${FUNCNAME[0]}"
num_mlx_devices=${mlx_dev_record_idx-1}
timestamp_print "Query representors info from [${num_mlx_devices}] devices"

while [ ${num_mlx_devices} -gt 0 ]; do
declare -a mlx_dev_info=(${mlx_devs_arr[$((num_mlx_devices-1))]})

# pci_addr dev_type dev_name dev_operstate dev_mtu pf_numvfs dev_guid eswitch_mode
# [0] [1] [2] [3] [4] [5] [6] [7]
mlnx_dev_pci=${mlx_dev_info[0]}
mlnx_dev_name=${mlx_dev_info[2]}
mlnx_dev_numvfs=${mlx_dev_info[5]}
mlnx_dev_eswitch_mode=${mlx_dev_info[7]}

if [ "${mlnx_dev_eswitch_mode}" != "switchdev" ]; then
debug_print "Device ${mlnx_dev_name} is not in switchdev mode, skipping"

num_mlx_devices=$(($num_mlx_devices-1))
continue
fi

regex="^p([0-9]+)$"
mlnx_dev_phys_port_name=$(cat /sys/class/net/"${mlnx_dev_name}"/phys_port_name 2>/dev/null)
if [[ "$mlnx_dev_phys_port_name" =~ $regex ]]; then
mlnx_dev_phys_port_num="${BASH_REMATCH[1]}"
else
debug_print "Device ${mlnx_dev_name} is not a PF, skipping"

num_mlx_devices=$(($num_mlx_devices-1))
continue
fi

mlnx_dev_phys_switch_id=$(cat /sys/class/net/"${mlnx_dev_name}"/phys_switch_id 2>/dev/null)

debug_print "Device phys port num ${mlnx_dev_phys_port_num} check"
debug_print "Device phys port name ${mlnx_dev_phys_port_name} check"

if [ "${mlnx_dev_numvfs}" == "0" ]; then
debug_print "Device ${mlnx_dev_name} does not have open VFs, skipping"

num_mlx_devices=$(($num_mlx_devices-1))
continue
fi

debug_print "Fetching [$mlnx_dev_numvfs] representors info for PCI device: ${mlnx_dev_pci} ($mlnx_dev_name)"

for netdev_path in /sys/class/net/"${mlnx_dev_name}"/subsystem/*; do
dev_phys_port_name=$(cat "${netdev_path}"/phys_port_name 2>/dev/null)

regex="^pf([0-9]+)vf([0-9]+)$"

# Use regex to parse the string and extract numbers
if [[ "$dev_phys_port_name" =~ $regex ]]; then
dev_pf_port_number="${BASH_REMATCH[1]}"
dev_vf_id_number="${BASH_REMATCH[2]}"
else
debug_print "Device ${netdev_path} is not a switchdev representor, skipping"
continue
fi

if [ "${mlnx_dev_phys_switch_id}" != "$(cat "${netdev_path}"/phys_switch_id 2>/dev/null)" ]; then
debug_print "Physical switch id of device ${netdev_path} does not match PF ${mlnx_dev_name}, skipping"
continue
fi

if [ "${dev_pf_port_number}" != "${mlnx_dev_phys_port_num}" ]; then
debug_print "Switchdev representor ${netdev_path} does not belong to this PF ${mlnx_dev_name}"
continue
fi

representor_name=$(basename "$netdev_path") && [[ -n $representor_name ]] || return 1
representor_operstate=$(cat "$netdev_path"/operstate) && [[ -n $representor_operstate ]] || return 1
representor_mtu_val=$(cat "$netdev_path"/mtu) && [[ -n $representor_mtu_val ]] || return 1

representor_record="$mlnx_dev_phys_switch_id $mlnx_dev_phys_port_num $dev_vf_id_number $representor_name $representor_operstate $representor_mtu_val"
debug_print "Storing switchdev representor record [${representor_record_idx}]: $representor_record"
switchdev_representors_arr[$representor_record_idx]="$representor_record"
# Example:
# pf_phys_switch_id pf_port_num vf_id representor_name representor_operstate representor_mtu_val
# 8a45730003da341c 1 3 enp3s1f1npf1vf3 up 1500

representor_record_idx=$((representor_record_idx+1))
done

num_mlx_devices=$(($num_mlx_devices-1))
done
}

function restore_sriov_config() {
debug_print "Function: ${FUNCNAME[0]}"

Expand All @@ -598,27 +703,32 @@ function restore_sriov_config() {
while [ ${num_mlx_devices} -gt 0 ]; do
declare -a mlx_dev_info=(${mlx_devs_arr[$((num_mlx_devices-1))]})

# pci_addr dev_type dev_name dev_operstate dev_mtu pf_numvfs dev_guid (For IB only)
# [0] [1] [2] [3] [4] [5] [6]
# pci_addr dev_type dev_name dev_operstate dev_mtu pf_numvfs dev_guid (For IB only) eswitch_mode
# [0] [1] [2] [3] [4] [5] [6] [7]

pf_pci_addr=${mlx_dev_info[0]}
pf_type=${mlx_dev_info[1]}
pf_old_name=${mlx_dev_info[2]}
pf_oper_state=${mlx_dev_info[3]}
pf_mtu=${mlx_dev_info[4]}
pf_numvfs=${mlx_dev_info[5]}
pf_eswitch_mode=${mlx_dev_info[7]}

if [ "${pf_numvfs}" == "0" ]; then
num_mlx_devices=$(($num_mlx_devices-1))
continue
fi

pf_pci_dev_path="/sys/bus/pci/devices/${pf_pci_addr}"
pf_new_dev_name=$(ls "$pf_pci_dev_path/net/")
pf_new_dev_name=$(ls "$pf_pci_dev_path/net/" | head -n 1)
pf_new_netdev_path="${pf_pci_dev_path}/net/${pf_new_dev_name}"

timestamp_print "Restoring SR-IOV config for device: $pf_pci_addr (VFs=$pf_numvfs) netdev name: ${pf_new_dev_name}"

if [ "${pf_eswitch_mode}" == "switchdev" ]; then
exec_cmd "devlink dev eswitch set pci/$pf_pci_addr mode switchdev"
fi

exec_cmd "ip link set dev ${pf_new_dev_name} $pf_oper_state"
# Permission denied via sysfs

Expand Down Expand Up @@ -681,18 +791,80 @@ function restore_sriov_config() {

num_mlx_devices=$(($num_mlx_devices-1))
done

for representor_record in $(seq 0 1 $((representor_record_idx-1))); do
declare -a representor_info=(${switchdev_representors_arr[$representor_record]})

debug_print "Inspecting switchdev representor entry: ${representor_info[@]}"

# pf_phys_switch_id pf_port_num vf_id representor_name representor_operstate representor_mtu_val
# [0] [1] [2] [3] [4] [5]
pf_phys_switch_id=${representor_info[0]}
pf_port_num=${representor_info[1]}
vf_id=${representor_info[2]}
representor_name=${representor_info[3]}
representor_operstate=${representor_info[4]}
representor_mtu_val=${representor_info[5]}

mlnx_dev_phys_switch_id=$(cat /sys/class/net/"${mlnx_dev_name}"/phys_switch_id 2>/dev/null)

for netdev_path in /sys/class/net/*; do
if [ "${pf_phys_switch_id}" != "$(cat "${netdev_path}"/phys_switch_id 2>/dev/null)" ]; then
debug_print "Physical switch id of device ${netdev_path} does not match the record, skipping"
continue
else
debug_print "Physical switch id of device ${netdev_path} matches the record"
fi

dev_phys_port_name=$(cat "${netdev_path}"/phys_port_name 2>/dev/null)

regex="^pf([0-9]+)vf([0-9]+)$"

# Use regex to parse the string and extract numbers
if [[ "$dev_phys_port_name" =~ $regex ]]; then
dev_pf_port_number="${BASH_REMATCH[1]}"
dev_vf_id_number="${BASH_REMATCH[2]}"
else
debug_print "Device ${netdev_path} is not a switchdev representor, skipping"
continue
fi

if [ "${dev_pf_port_number}" != "${pf_port_num}" ]; then
debug_print "Switchdev representor ${netdev_path} does not belong to this port ${pf_port_num}"
continue
fi

if [ "${dev_vf_id_number}" != "${vf_id}" ]; then
debug_print "Switchdev representor ${netdev_path} does not belong to this vf id ${vf_id}"
continue
fi

debug_print "Switchdev representor ${netdev_path} belongs to this vf id ${vf_id}, setting parameters"

representor_new_name=$(basename "$netdev_path") && [[ -n $representor_new_name ]]

exec_cmd "ip link set dev ${representor_new_name} name ${representor_name}"
exec_cmd "ip link set dev ${representor_name} mtu ${representor_mtu_val}"
exec_cmd "ip link set dev ${representor_name} ${representor_operstate}"

break
done

done
}

function store_devices_conf() {
debug_print "Function: ${FUNCNAME[0]}"

mlx_dev_record_idx=0
vf_record_idx=0
representor_record_idx=0
found_sriov_conf=0

if ${mlx5_core_loaded}; then
find_mlx_devs
find_mlx_vfs
find_switchdev_representors
else
debug_print "Driver not loaded, skipped store netdev conf info"
fi
Expand Down Expand Up @@ -973,6 +1145,7 @@ RH_RT_MIN_MAJOR_VER=9

declare -a mlx_devs_arr
declare -a mlx_vfs_arr
declare -a switchdev_representors_arr
mlx_dev_record_idx=0

if ${IS_OS_UBUNTU}; then
Expand Down
Loading

0 comments on commit 62f04d6

Please sign in to comment.