diff --git a/.snyk b/.snyk index afe848d71..a2abc8d1a 100644 --- a/.snyk +++ b/.snyk @@ -17,5 +17,6 @@ exclude: - "vendor/github.com/onsi/gomega**" - "vendor/github.com/onsi/ginkgo/**" - "vendor/golang.org/**" + - "vendor/github.com/spf13/cobra/**" - "vendor/github.com/stretchr/testify/**" - "vendor/github.com/stretchr/objx/**" diff --git a/internal/workarounds/interface.go b/internal/workarounds/interface.go new file mode 100644 index 000000000..8cc91fa36 --- /dev/null +++ b/internal/workarounds/interface.go @@ -0,0 +1,31 @@ +/* + * Copyright 2024 Red Hat, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package workarounds + +import ( + "context" + + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// TODO: explain why we are adding an interface here + +type Interface interface { + IssueReference() string + Describe() string + Apply(ctx context.Context, cli client.Client) error +} diff --git a/internal/workarounds/prom.go b/internal/workarounds/prom.go new file mode 100644 index 000000000..edc230ea8 --- /dev/null +++ b/internal/workarounds/prom.go @@ -0,0 +1,139 @@ +/* + * Copyright 2024 Red Hat, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package workarounds + +import ( + "context" + "time" + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + k8swait "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/klog/v2" + + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/yaml" +) + +const ( + promConfigMapNamespace = "openshift-monitoring" + promConfigMapName = "cluster-monitoring-config" + promConfigMapKey = "config.yaml" + + promConfigKey = "prometheusK8s" + promConfigVolumeKey = "volumeClaimTemplate" +) + +type Prometheus struct { + PollInterval time.Duration + PollTimeout time.Duration +} + +func ForPrometheus() *Prometheus { + return &Prometheus{ + PollInterval: 5 * time.Second, + PollTimeout: 5 * time.Minute, + } +} + +func (prom *Prometheus) IssueReference() string { + return "https://github.com/kubernetes-sigs/aws-ebs-csi-driver/issues/1784" +} + +func (prom *Prometheus) Describe() string { + return "Prometheus instance cannot use storage and fails to go running; only one instance out of two will go running, and this later prevents node draining" +} + +func (prom *Prometheus) Apply(ctx context.Context, cli client.Client) error { + var cm corev1.ConfigMap + key := client.ObjectKey{ + Namespace: promConfigMapNamespace, + Name: promConfigMapName, + } + immediate := true + return k8swait.PollUntilContextTimeout(ctx, prom.PollInterval, prom.PollTimeout, immediate, func(ctx2 context.Context) (bool, error) { + err := cli.Get(ctx2, key, &cm) + if err != nil { + if apierrors.IsNotFound(err) { + klog.InfoS("configmap not found, nothing to do", "namespace", key.Namespace, "name", key.Name) + return true, nil + } + return true, err + } + + err = UpdateConfigMap(&cm) + if err != nil { + return true, err + } + + err = cli.Update(ctx2, &cm) + return (err == nil), nil // retry on conflicts + }) +} + +func UpdateConfigMap(cm *corev1.ConfigMap) error { + data, ok := cm.Data[promConfigMapKey] + if !ok { + klog.InfoS("configmap holds no config data, nothing to do") + return nil + } + + klog.InfoS("configmap payload fixed", "oldData", data) + + newData, err := UpdateConfigData([]byte(data)) + if err != nil { + klog.ErrorS(err, "configmap payload update failed") + return err + } + + klog.InfoS("configmap payload fixed", "newData", string(newData)) + + cm.Data[promConfigMapKey] = string(newData) + return nil +} + +func UpdateConfigData(data []byte) ([]byte, error) { + var err error + var r unstructured.Unstructured + + err = yaml.Unmarshal(data, &r.Object) + if err != nil { + return nil, err + } + + promK8S, ok, err := unstructured.NestedMap(r.Object, promConfigKey) + if err != nil { + return nil, err + } + if !ok { + klog.InfoS("configmap payload not found, nothing to do", "payload", promConfigKey) + return nil, nil + } + + err = unstructured.SetNestedMap(promK8S, map[string]interface{}{}, promConfigVolumeKey) + if err != nil { + klog.ErrorS(err, "configmap payload update failed", "namespace", "payload", promConfigKey, "volume", promConfigVolumeKey) + } + + err = unstructured.SetNestedMap(r.Object, promK8S, promConfigKey) + if err != nil { + klog.ErrorS(err, "configmap payload update failed", "namespace", "payload", promConfigKey) + } + + return yaml.Marshal(&r.Object) +} diff --git a/internal/workarounds/prom_test.go b/internal/workarounds/prom_test.go new file mode 100644 index 000000000..17c77243e --- /dev/null +++ b/internal/workarounds/prom_test.go @@ -0,0 +1,116 @@ +/* + * Copyright 2024 Red Hat, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package workarounds + +import ( + "bytes" + "fmt" + "testing" + + "github.com/google/go-cmp/cmp" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + serializer "k8s.io/apimachinery/pkg/runtime/serializer/json" + "k8s.io/client-go/kubernetes/scheme" +) + +func TestUpdateConfigMap(t *testing.T) { + expectedConfig := `apiVersion: v1 +data: + config.yaml: | + prometheusK8s: + volumeClaimTemplate: {} + telemeterClient: + enabled: false +kind: ConfigMap +metadata: + creationTimestamp: null + name: cluster-monitoring-config + namespace: openshift-monitoring +` + cm := corev1.ConfigMap{ + TypeMeta: metav1.TypeMeta{ + Kind: "ConfigMap", + APIVersion: corev1.SchemeGroupVersion.String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Namespace: promConfigMapNamespace, + Name: promConfigMapName, + }, + Data: map[string]string{ + promConfigMapKey: `telemeterClient: + enabled: false +prometheusK8s: + volumeClaimTemplate: + metadata: + name: prometheus-data + annotations: + openshift.io/cluster-monitoring-drop-pvc: "yes" + spec: + resources: + requests: + storage: 20Gi`, + }, + } + + err := UpdateConfigMap(&cm) + if err != nil { + t.Fatalf("UpdateConfigData failed: %v", err) + } + + yamlSerializer := serializer.NewSerializerWithOptions( + serializer.DefaultMetaFactory, scheme.Scheme, scheme.Scheme, + serializer.SerializerOptions{Yaml: true, Pretty: true, Strict: true}) + + buff := &bytes.Buffer{} + if err := yamlSerializer.Encode(&cm, buff); err != nil { + // supervised testing environment + // should never be happening + panic(fmt.Errorf("failed to encode KubeletConfigConfigMap object %w", err)) + } + + fixedConfig := buff.String() + + if diff := cmp.Diff(fixedConfig, expectedConfig); diff != "" { + t.Errorf("unexpected diff: %v", diff) + } +} + +// example from real cluster +const _ = `apiVersion: v1 +data: + config.yaml: |- + telemeterClient: + enabled: false + prometheusK8s: + volumeClaimTemplate: + metadata: + name: prometheus-data + annotations: + openshift.io/cluster-monitoring-drop-pvc: "yes" + spec: + resources: + requests: + storage: 20Gi +kind: ConfigMap +metadata: + creationTimestamp: "2024-12-05T08:57:33Z" + name: cluster-monitoring-config + namespace: openshift-monitoring + resourceVersion: "1748" + uid: fa0b71d1-6dd7-41da-8d75-807435523a02` diff --git a/test/e2e/install/install_test.go b/test/e2e/install/install_test.go index 89fb6eef8..10326aef2 100644 --- a/test/e2e/install/install_test.go +++ b/test/e2e/install/install_test.go @@ -41,9 +41,10 @@ import ( "github.com/openshift-kni/numaresources-operator/pkg/status" machineconfigv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1" + nropmcp "github.com/openshift-kni/numaresources-operator/internal/machineconfigpools" nrowait "github.com/openshift-kni/numaresources-operator/internal/wait" + "github.com/openshift-kni/numaresources-operator/internal/workarounds" - nropmcp "github.com/openshift-kni/numaresources-operator/internal/machineconfigpools" e2eclient "github.com/openshift-kni/numaresources-operator/test/utils/clients" "github.com/openshift-kni/numaresources-operator/test/utils/configuration" "github.com/openshift-kni/numaresources-operator/test/utils/crds" @@ -59,12 +60,16 @@ const ( containerNameRTE = "resource-topology-exporter" ) -var _ = Describe("[Install] continuousIntegration", func() { +var _ = Describe("[Install] continuousIntegration", Serial, func() { var initialized bool + var wrs []workarounds.Interface BeforeEach(func() { if !initialized { Expect(e2eclient.ClientsEnabled).To(BeTrue(), "failed to create runtime-controller client") + wrs = append(wrs, workarounds.ForPrometheus()) + + applyWorkarounds(wrs) } initialized = true }) @@ -150,10 +155,14 @@ var _ = Describe("[Install] continuousIntegration", func() { var _ = Describe("[Install] durability", Serial, func() { var initialized bool + var wrs []workarounds.Interface BeforeEach(func() { if !initialized { Expect(e2eclient.ClientsEnabled).To(BeTrue(), "failed to create runtime-controller client") + wrs = append(wrs, workarounds.ForPrometheus()) + + applyWorkarounds(wrs) } initialized = true }) @@ -442,3 +451,19 @@ func logRTEPodsLogs(cli client.Client, k8sCli *kubernetes.Clientset, ctx context } } } + +func applyWorkarounds(wrs []workarounds.Interface) { + GinkgoHelper() + + if len(wrs) == 0 { + return // nothing to do + } + + ctx := context.Background() + + By(fmt.Sprintf("applying %d workarounds", len(wrs))) + for idx, wr := range wrs { + By(fmt.Sprintf("%02d: %s (%s)", idx, wr.Describe(), wr.IssueReference())) + Expect(wr.Apply(ctx, e2eclient.Client)).To(Succeed()) + } +}