diff --git a/api/v1/helper.go b/api/v1/helper.go index 300992acb..1f30be618 100644 --- a/api/v1/helper.go +++ b/api/v1/helper.go @@ -12,6 +12,7 @@ import ( "sort" "strconv" "strings" + "time" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -1005,3 +1006,43 @@ func GenerateBridgeName(iface *InterfaceExt) string { func NeedToUpdateBridges(bridgeSpec, bridgeStatus *Bridges) bool { return !reflect.DeepEqual(bridgeSpec, bridgeStatus) } + +// SetKeepUntilTime sets an annotation to hold the "keep until time" for the node’s state. +// The "keep until time" specifies the earliest time at which the state object can be removed +// if the daemon's pod is not found on the node. +func (s *SriovNetworkNodeState) SetKeepUntilTime(t time.Time) { + ts := t.Format(time.RFC3339) + annotations := s.GetAnnotations() + if annotations == nil { + annotations = map[string]string{} + } + annotations[consts.NodeStateKeepUntilAnnotation] = ts + s.SetAnnotations(annotations) +} + +// GetKeepUntilTime returns the value that is stored in the "keep until time" annotation. +// The "keep until time" specifies the earliest time at which the state object can be removed +// if the daemon's pod is not found on the node. +// Return zero time instant if annotaion is not found on the object or if it has a wrong format. +func (s *SriovNetworkNodeState) GetKeepUntilTime() time.Time { + t, err := time.Parse(time.RFC3339, s.GetAnnotations()[consts.NodeStateKeepUntilAnnotation]) + if err != nil { + return time.Time{} + } + return t +} + +// ResetKeepUntilTime removes "keep until time" annotation from the state object. +// The "keep until time" specifies the earliest time at which the state object can be removed +// if the daemon's pod is not found on the node. +// Returns true if the value was removed, false otherwise. +func (s *SriovNetworkNodeState) ResetKeepUntilTime() bool { + annotations := s.GetAnnotations() + _, exist := annotations[consts.NodeStateKeepUntilAnnotation] + if !exist { + return false + } + delete(annotations, consts.NodeStateKeepUntilAnnotation) + s.SetAnnotations(annotations) + return true +} diff --git a/bindata/scripts/kargs.sh b/bindata/scripts/kargs.sh index 8d118456e..07e283600 100755 --- a/bindata/scripts/kargs.sh +++ b/bindata/scripts/kargs.sh @@ -7,6 +7,14 @@ declare -a kargs=( "$@" ) ret=0 args=$(chroot /host/ cat /proc/cmdline) +IS_OS_UBUNTU=true; [[ "$(chroot /host/ grep -i ubuntu /etc/os-release -c)" == "0" ]] && IS_OS_UBUNTU=false + +# Kernel args configuration isn't supported for Ubuntu now, so we shouldn't do anything here +if ${IS_OS_UBUNTU} ; then + echo $ret + exit 0 +fi + if chroot /host/ test -f /run/ostree-booted ; then for t in "${kargs[@]}";do if [[ $command == "add" ]];then diff --git a/controllers/sriovnetworknodepolicy_controller.go b/controllers/sriovnetworknodepolicy_controller.go index 29438b176..f8811ed97 100644 --- a/controllers/sriovnetworknodepolicy_controller.go +++ b/controllers/sriovnetworknodepolicy_controller.go @@ -20,8 +20,10 @@ import ( "context" "encoding/json" "fmt" + "os" "reflect" "sort" + "strconv" "strings" "time" @@ -338,10 +340,7 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con logger.Error(err, "Fail to remove device plugin label from node", "node", ns.Name) return err } - logger.Info("Deleting SriovNetworkNodeState as node with that name doesn't exist", "nodeStateName", ns.Name) - err = r.Delete(ctx, &ns, &client.DeleteOptions{}) - if err != nil { - logger.Error(err, "Fail to Delete", "SriovNetworkNodeState CR:", ns.GetName()) + if err := r.handleStaleNodeState(ctx, &ns); err != nil { return err } } @@ -350,6 +349,56 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con return nil } +// handleStaleNodeState handles stale SriovNetworkNodeState CR (the CR which no longer have a corresponding node with the daemon). +// If the CR has the "keep until time" annotation, indicating the earliest time the state object can be removed, +// this function will compare it to the current time to determine if deletion is permissible and do deletion if allowed. +// If the annotation is absent, the function will create one with a timestamp in future, using either the default or a configured offset. +// If STALE_NODE_STATE_CLEANUP_DELAY_MINUTES env variable is set to 0, removes the CR immediately +func (r *SriovNetworkNodePolicyReconciler) handleStaleNodeState(ctx context.Context, ns *sriovnetworkv1.SriovNetworkNodeState) error { + logger := log.Log.WithName("handleStaleNodeState") + + var delayMinutes int + var err error + + envValue, found := os.LookupEnv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES") + if found { + delayMinutes, err = strconv.Atoi(envValue) + if err != nil || delayMinutes < 0 { + delayMinutes = constants.DefaultNodeStateCleanupDelayMinutes + logger.Error(err, "invalid value in STALE_NODE_STATE_CLEANUP_DELAY_MINUTES env variable, use default delay", + "delay", delayMinutes) + } + } else { + delayMinutes = constants.DefaultNodeStateCleanupDelayMinutes + } + + if delayMinutes != 0 { + now := time.Now().UTC() + keepUntilTime := ns.GetKeepUntilTime() + if keepUntilTime.IsZero() { + keepUntilTime = now.Add(time.Minute * time.Duration(delayMinutes)) + logger.V(2).Info("SriovNetworkNodeState has no matching node, configure cleanup delay for the state object", + "nodeStateName", ns.Name, "delay", delayMinutes, "keepUntilTime", keepUntilTime.String()) + ns.SetKeepUntilTime(keepUntilTime) + if err := r.Update(ctx, ns); err != nil { + logger.Error(err, "Fail to update SriovNetworkNodeState CR", "name", ns.GetName()) + return err + } + return nil + } + if now.Before(keepUntilTime) { + return nil + } + } + // remove the object if delayMinutes is 0 or if keepUntilTime is already passed + logger.Info("Deleting SriovNetworkNodeState as node with that name doesn't exist", "nodeStateName", ns.Name) + if err := r.Delete(ctx, ns, &client.DeleteOptions{}); err != nil { + logger.Error(err, "Fail to delete SriovNetworkNodeState CR", "name", ns.GetName()) + return err + } + return nil +} + func (r *SriovNetworkNodePolicyReconciler) syncSriovNetworkNodeState(ctx context.Context, dc *sriovnetworkv1.SriovOperatorConfig, npl *sriovnetworkv1.SriovNetworkNodePolicyList, @@ -375,9 +424,16 @@ func (r *SriovNetworkNodePolicyReconciler) syncSriovNetworkNodeState(ctx context return fmt.Errorf("failed to get SriovNetworkNodeState: %v", err) } } else { + keepUntilAnnotationUpdated := found.ResetKeepUntilTime() + if len(found.Status.Interfaces) == 0 { logger.Info("SriovNetworkNodeState Status Interfaces are empty. Skip update of policies in spec", "namespace", ns.Namespace, "name", ns.Name) + if keepUntilAnnotationUpdated { + if err := r.Update(ctx, found); err != nil { + return fmt.Errorf("couldn't update SriovNetworkNodeState: %v", err) + } + } return nil } @@ -420,7 +476,7 @@ func (r *SriovNetworkNodePolicyReconciler) syncSriovNetworkNodeState(ctx context // Note(adrianc): we check same ownerReferences since SriovNetworkNodeState // was owned by a default SriovNetworkNodePolicy. if we encounter a descripancy // we need to update. - if reflect.DeepEqual(newVersion.OwnerReferences, found.OwnerReferences) && + if !keepUntilAnnotationUpdated && reflect.DeepEqual(newVersion.OwnerReferences, found.OwnerReferences) && equality.Semantic.DeepEqual(newVersion.Spec, found.Spec) { logger.V(1).Info("SriovNetworkNodeState did not change, not updating") return nil diff --git a/controllers/sriovnetworknodepolicy_controller_test.go b/controllers/sriovnetworknodepolicy_controller_test.go index d5534f55e..b955e0133 100644 --- a/controllers/sriovnetworknodepolicy_controller_test.go +++ b/controllers/sriovnetworknodepolicy_controller_test.go @@ -3,6 +3,7 @@ package controllers import ( "context" "encoding/json" + "os" "sync" "testing" "time" @@ -11,18 +12,17 @@ import ( . "github.com/onsi/gomega" "github.com/google/go-cmp/cmp" + dptypes "github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/types" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" - dptypes "github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/types" - sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" - v1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" @@ -48,7 +48,7 @@ func TestRenderDevicePluginConfigData(t *testing.T) { { tname: "testVirtioVdpaVirtio", policy: sriovnetworkv1.SriovNetworkNodePolicy{ - Spec: v1.SriovNetworkNodePolicySpec{ + Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ ResourceName: "resourceName", DeviceType: consts.DeviceTypeNetDevice, VdpaType: consts.VdpaTypeVirtio, @@ -67,7 +67,7 @@ func TestRenderDevicePluginConfigData(t *testing.T) { }, { tname: "testVhostVdpaVirtio", policy: sriovnetworkv1.SriovNetworkNodePolicy{ - Spec: v1.SriovNetworkNodePolicySpec{ + Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ ResourceName: "resourceName", DeviceType: consts.DeviceTypeNetDevice, VdpaType: consts.VdpaTypeVhost, @@ -87,7 +87,7 @@ func TestRenderDevicePluginConfigData(t *testing.T) { { tname: "testExcludeTopology", policy: sriovnetworkv1.SriovNetworkNodePolicy{ - Spec: v1.SriovNetworkNodePolicySpec{ + Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ ResourceName: "resourceName", ExcludeTopology: true, }, @@ -138,6 +138,10 @@ var _ = Describe("SriovnetworkNodePolicy controller", Ordered, func() { var ctx context.Context BeforeAll(func() { + // disable stale state cleanup delay to check that the controller can cleanup state objects + DeferCleanup(os.Setenv, "STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", os.Getenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES")) + os.Setenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", "0") + By("Create SriovOperatorConfig controller k8s objs") config := makeDefaultSriovOpConfig() Expect(k8sClient.Create(context.Background(), config)).Should(Succeed()) @@ -321,3 +325,65 @@ var _ = Describe("SriovnetworkNodePolicy controller", Ordered, func() { }) }) }) + +var _ = Describe("SriovNetworkNodePolicyReconciler", Ordered, func() { + Context("handleStaleNodeState", func() { + var ( + ctx context.Context + r *SriovNetworkNodePolicyReconciler + nodeState *sriovnetworkv1.SriovNetworkNodeState + ) + + BeforeEach(func() { + ctx = context.Background() + scheme := runtime.NewScheme() + utilruntime.Must(sriovnetworkv1.AddToScheme(scheme)) + nodeState = &sriovnetworkv1.SriovNetworkNodeState{ObjectMeta: metav1.ObjectMeta{Name: "node1"}} + r = &SriovNetworkNodePolicyReconciler{Client: fake.NewClientBuilder().WithObjects(nodeState).Build()} + }) + It("should set default delay", func() { + nodeState := nodeState.DeepCopy() + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState)).NotTo(HaveOccurred()) + Expect(time.Now().UTC().Before(nodeState.GetKeepUntilTime())).To(BeTrue()) + }) + It("should remove CR if wait time expired", func() { + nodeState := nodeState.DeepCopy() + nodeState.SetKeepUntilTime(time.Now().UTC().Add(-time.Minute)) + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(errors.IsNotFound(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState))).To(BeTrue()) + }) + It("should keep existing wait time if already set", func() { + nodeState := nodeState.DeepCopy() + nodeState.SetKeepUntilTime(time.Now().UTC().Add(time.Minute)) + testTime := nodeState.GetKeepUntilTime() + r.Update(ctx, nodeState) + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState)).NotTo(HaveOccurred()) + Expect(nodeState.GetKeepUntilTime()).To(Equal(testTime)) + }) + It("non default dealy", func() { + DeferCleanup(os.Setenv, "STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", os.Getenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES")) + os.Setenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", "60") + nodeState := nodeState.DeepCopy() + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState)).NotTo(HaveOccurred()) + Expect(time.Until(nodeState.GetKeepUntilTime()) > 30*time.Minute).To(BeTrue()) + }) + It("invalid non default delay - should use default", func() { + DeferCleanup(os.Setenv, "STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", os.Getenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES")) + os.Setenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", "-20") + nodeState := nodeState.DeepCopy() + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState)).NotTo(HaveOccurred()) + Expect(time.Until(nodeState.GetKeepUntilTime()) > 20*time.Minute).To(BeTrue()) + }) + It("should remove CR if delay is zero", func() { + DeferCleanup(os.Setenv, "STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", os.Getenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES")) + os.Setenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", "0") + nodeState := nodeState.DeepCopy() + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(errors.IsNotFound(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState))).To(BeTrue()) + }) + }) +}) diff --git a/deployment/sriov-network-operator-chart/templates/operator.yaml b/deployment/sriov-network-operator-chart/templates/operator.yaml index 0e89d1959..c2a813fc8 100644 --- a/deployment/sriov-network-operator-chart/templates/operator.yaml +++ b/deployment/sriov-network-operator-chart/templates/operator.yaml @@ -112,6 +112,8 @@ spec: value: {{ .Values.operator.cniBinPath }} - name: CLUSTER_TYPE value: {{ .Values.operator.clusterType }} + - name: STALE_NODE_STATE_CLEANUP_DELAY_MINUTES + value: "{{ .Values.operator.staleNodeStateCleanupDelayMinutes }}" {{- if .Values.operator.admissionControllers.enabled }} - name: ADMISSION_CONTROLLERS_CERTIFICATES_OPERATOR_SECRET_NAME value: {{ .Values.operator.admissionControllers.certificates.secretNames.operator }} diff --git a/deployment/sriov-network-operator-chart/values.yaml b/deployment/sriov-network-operator-chart/values.yaml index c70d6e323..ec9323bf7 100644 --- a/deployment/sriov-network-operator-chart/values.yaml +++ b/deployment/sriov-network-operator-chart/values.yaml @@ -27,6 +27,10 @@ operator: resourcePrefix: "openshift.io" cniBinPath: "/opt/cni/bin" clusterType: "kubernetes" + # minimal amount of time (in minutes) the operator will wait before removing + # stale SriovNetworkNodeState objects (objects that doesn't match node with the daemon) + # "0" means no extra delay, in this case the CR will be removed by the next reconcilation cycle (may take up to 5 minutes) + staleNodeStateCleanupDelayMinutes: "30" metricsExporter: port: "9110" certificates: diff --git a/hack/run-e2e-conformance-virtual-cluster.sh b/hack/run-e2e-conformance-virtual-cluster.sh index d6fa44fd9..353c8528d 100755 --- a/hack/run-e2e-conformance-virtual-cluster.sh +++ b/hack/run-e2e-conformance-virtual-cluster.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -xeo pipefail -cluster_version=${CLUSTER_VERSION:-1.29.3} +cluster_version=${CLUSTER_VERSION:-1.32.0} cluster_name=${CLUSTER_NAME:-virtual} domain_name=$cluster_name.lab diff --git a/pkg/consts/constants.go b/pkg/consts/constants.go index 4ce478730..6aadef648 100644 --- a/pkg/consts/constants.go +++ b/pkg/consts/constants.go @@ -93,6 +93,14 @@ const ( MCPPauseAnnotationState = "sriovnetwork.openshift.io/state" MCPPauseAnnotationTime = "sriovnetwork.openshift.io/time" + // NodeStateKeepUntilAnnotation contains name of the "keep until time" annotation for SriovNetworkNodeState object. + // The "keep until time" specifies the earliest time at which the state object can be removed + // if the daemon's pod is not found on the node. + NodeStateKeepUntilAnnotation = "sriovnetwork.openshift.io/keep-state-until" + // DefaultNodeStateCleanupDelayMinutes contains default delay before removing stale SriovNetworkNodeState CRs + // (the CRs that no longer have a corresponding node with the daemon). + DefaultNodeStateCleanupDelayMinutes = 30 + CheckpointFileName = "sno-initial-node-state.json" Unknown = "Unknown" diff --git a/test/conformance/tests/test_policy_configuration.go b/test/conformance/tests/test_policy_configuration.go index 35a079ff2..6d5d8b259 100644 --- a/test/conformance/tests/test_policy_configuration.go +++ b/test/conformance/tests/test_policy_configuration.go @@ -602,6 +602,11 @@ var _ = Describe("[sriov] operator", Ordered, func() { NetworkNamespace: namespaces.Test, }} + // for real BM env we enable link state + if !cluster.VirtualCluster() { + sriovNetwork.Spec.LinkState = "enable" + } + // We need this to be able to run the connectivity checks on Mellanox cards if intf.DeviceID == "1015" { sriovNetwork.Spec.SpoofChk = off diff --git a/test/conformance/tests/test_sriov_operator.go b/test/conformance/tests/test_sriov_operator.go index c9a60474d..73c53412c 100644 --- a/test/conformance/tests/test_sriov_operator.go +++ b/test/conformance/tests/test_sriov_operator.go @@ -501,9 +501,14 @@ var _ = Describe("[sriov] operator", func() { Expect(err).ToNot(HaveOccurred()) Eventually(func() bool { - stdout, stderr, err := pod.ExecCommand(clients, hostNetPod, "ip", "link", "show") - Expect(err).ToNot(HaveOccurred()) - Expect(stderr).To(Equal("")) + var stdout, stderr string + // Adding a retry because some of the time we get `Dump was interrupted and may be inconsistent.` + // output from the ip link command + Eventually(func(g Gomega) { + stdout, stderr, err = pod.ExecCommand(clients, hostNetPod, "ip", "link", "show") + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(stderr).To(Equal("")) + }, time.Minute, 2*time.Second).Should(Succeed()) found := false for _, line := range strings.Split(stdout, "\n") { diff --git a/test/scripts/kargs_test.sh b/test/scripts/kargs_test.sh index 053bd5200..3e191f230 100755 --- a/test/scripts/kargs_test.sh +++ b/test/scripts/kargs_test.sh @@ -6,6 +6,7 @@ SUT_SCRIPT="${SCRIPTPATH}/../../bindata/scripts/kargs.sh" test_RpmOstree_Add_All_Arguments() { + echo "ID=\"rhel\"" > ${FAKE_HOST}/etc/os-release echo "a b c=d eee=fff" > ${FAKE_HOST}/proc/cmdline touch ${FAKE_HOST}/run/ostree-booted @@ -19,6 +20,7 @@ test_RpmOstree_Add_All_Arguments() { test_RpmOstree_Add_Only_Missing_Arguments() { + echo "ID=\"rhel\"" > ${FAKE_HOST}/etc/os-release echo "a b c=d eee=fff K=L" > ${FAKE_HOST}/proc/cmdline touch ${FAKE_HOST}/run/ostree-booted @@ -31,6 +33,7 @@ test_RpmOstree_Add_Only_Missing_Arguments() { } test_RpmOstree_Delete_All_Arguments() { + echo "ID=\"rhel\"" > ${FAKE_HOST}/etc/os-release echo "a b c=d eee=fff X=Y W=Z" > ${FAKE_HOST}/proc/cmdline touch ${FAKE_HOST}/run/ostree-booted @@ -43,6 +46,7 @@ test_RpmOstree_Delete_All_Arguments() { } test_RpmOstree_Delete_Only_Exist_Arguments() { + echo "ID=\"rhel\"" > ${FAKE_HOST}/etc/os-release echo "a b c=d eee=fff X=Y" > ${FAKE_HOST}/proc/cmdline touch ${FAKE_HOST}/run/ostree-booted