Skip to content

Commit

Permalink
Merge pull request #1107 from rbaturov/expose-metrics-securely-rte
Browse files Browse the repository at this point in the history
CNF-11234: Enable RTE metrics to be scraped securely by Prometheus
  • Loading branch information
openshift-merge-bot[bot] authored Dec 22, 2024
2 parents 6a57ee1 + ae455f8 commit 1dd2f17
Show file tree
Hide file tree
Showing 12 changed files with 333 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ metadata:
}
]
capabilities: Basic Install
createdAt: "2024-12-18T20:58:55Z"
createdAt: "2024-12-19T08:31:49Z"
olm.skipRange: '>=4.18.0 <4.19.0'
operatorframework.io/cluster-monitoring: "true"
operators.operatorframework.io/builder: operator-sdk-v1.36.1
Expand Down Expand Up @@ -359,6 +359,7 @@ spec:
resources:
- configmaps
- serviceaccounts
- services
verbs:
- '*'
- apiGroups:
Expand Down
1 change: 1 addition & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ rules:
resources:
- configmaps
- serviceaccounts
- services
verbs:
- '*'
- apiGroups:
Expand Down
65 changes: 56 additions & 9 deletions controllers/numaresourcesoperator_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,11 @@ import (
"github.com/openshift-kni/numaresources-operator/pkg/hash"
"github.com/openshift-kni/numaresources-operator/pkg/images"
"github.com/openshift-kni/numaresources-operator/pkg/loglevel"
rtemetricsmanifests "github.com/openshift-kni/numaresources-operator/pkg/metrics/manifests/monitor"
"github.com/openshift-kni/numaresources-operator/pkg/objectnames"
apistate "github.com/openshift-kni/numaresources-operator/pkg/objectstate/api"
"github.com/openshift-kni/numaresources-operator/pkg/objectstate/compare"
"github.com/openshift-kni/numaresources-operator/pkg/objectstate/merge"
rtestate "github.com/openshift-kni/numaresources-operator/pkg/objectstate/rte"
rteupdate "github.com/openshift-kni/numaresources-operator/pkg/objectupdate/rte"
"github.com/openshift-kni/numaresources-operator/pkg/status"
Expand All @@ -83,15 +86,16 @@ type poolDaemonSet struct {
// NUMAResourcesOperatorReconciler reconciles a NUMAResourcesOperator object
type NUMAResourcesOperatorReconciler struct {
client.Client
Scheme *runtime.Scheme
Platform platform.Platform
APIManifests apimanifests.Manifests
RTEManifests rtemanifests.Manifests
Namespace string
Images images.Data
ImagePullPolicy corev1.PullPolicy
Recorder record.EventRecorder
ForwardMCPConds bool
Scheme *runtime.Scheme
Platform platform.Platform
APIManifests apimanifests.Manifests
RTEManifests rtemanifests.Manifests
RTEMetricsManifests rtemetricsmanifests.Manifests
Namespace string
Images images.Data
ImagePullPolicy corev1.PullPolicy
Recorder record.EventRecorder
ForwardMCPConds bool
}

// TODO: narrow down
Expand All @@ -118,6 +122,7 @@ type NUMAResourcesOperatorReconciler struct {
//+kubebuilder:rbac:groups=nodetopology.openshift.io,resources=numaresourcesoperators,verbs=*
//+kubebuilder:rbac:groups=nodetopology.openshift.io,resources=numaresourcesoperators/status,verbs=get;update;patch
//+kubebuilder:rbac:groups=nodetopology.openshift.io,resources=numaresourcesoperators/finalizers,verbs=update
//+kubebuilder:rbac:groups="",resources=services,verbs=*

// Reconcile is part of the main kubernetes reconciliation loop which aims to
// move the current state of the cluster closer to the desired state.
Expand Down Expand Up @@ -578,6 +583,30 @@ func (r *NUMAResourcesOperatorReconciler) syncNUMAResourcesOperatorResources(ctx
return nil, fmt.Errorf("failed to apply (%s) %s/%s: %w", objState.Desired.GetObjectKind().GroupVersionKind(), objState.Desired.GetNamespace(), objState.Desired.GetName(), err)
}
}

for _, obj := range r.RTEMetricsManifests.ToObjects() {
// Check if the object already exists
existingObj := obj.DeepCopyObject().(client.Object)
err := r.Client.Get(ctx, client.ObjectKeyFromObject(obj), existingObj)
if err != nil && !apierrors.IsNotFound(err) {
return nil, fmt.Errorf("failed to get %s/%s: %w", obj.GetNamespace(), obj.GetName(), err)
}
if apierrors.IsNotFound(err) {
err := controllerutil.SetControllerReference(instance, obj, r.Scheme)
if err != nil {
return nil, fmt.Errorf("failed to set controller reference to %s %s: %w", obj.GetNamespace(), obj.GetName(), err)
}
err = r.Client.Create(ctx, obj)
if err != nil {
return nil, fmt.Errorf("failed to create %s/%s: %w", obj.GetNamespace(), obj.GetName(), err)
}
} else {
if err := updateIfNeeded(ctx, existingObj, obj, r.Client); err != nil {
return nil, err
}
}
}

if len(dsPoolPairs) < len(trees) {
klog.Warningf("daemonset and tree size mismatch: expected %d got in daemonsets %d", len(trees), len(dsPoolPairs))
}
Expand Down Expand Up @@ -780,3 +809,21 @@ func getTreesByNodeGroup(ctx context.Context, cli client.Client, nodeGroups []nr
return nil, fmt.Errorf("unsupported platform")
}
}

func updateIfNeeded(ctx context.Context, existingObj, desiredObj client.Object, cli client.Client) error {
merged, err := merge.MetadataForUpdate(existingObj, desiredObj)
if err != nil {
return fmt.Errorf("could not merge object %s with existing: %w", desiredObj.GetName(), err)
}
isEqual, err := compare.Object(existingObj, merged)
if err != nil {
return fmt.Errorf("could not compare object %s with existing: %w", desiredObj.GetName(), err)
}
if !isEqual {
err = cli.Update(ctx, desiredObj)
if err != nil {
return fmt.Errorf("failed to update %s/%s: %w", desiredObj.GetNamespace(), desiredObj.GetName(), err)
}
}
return nil
}
30 changes: 23 additions & 7 deletions controllers/numaresourcesoperator_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ import (
"github.com/openshift-kni/numaresources-operator/internal/api/annotations"
testobjs "github.com/openshift-kni/numaresources-operator/internal/objects"
"github.com/openshift-kni/numaresources-operator/pkg/images"
rtemetricsmanifests "github.com/openshift-kni/numaresources-operator/pkg/metrics/manifests/monitor"
"github.com/openshift-kni/numaresources-operator/pkg/objectnames"
"github.com/openshift-kni/numaresources-operator/pkg/objectstate/rte"
"github.com/openshift-kni/numaresources-operator/pkg/status"
Expand All @@ -70,16 +71,20 @@ func NewFakeNUMAResourcesOperatorReconciler(plat platform.Platform, platVersion
if err != nil {
return nil, err
}

rtemetricsmanifests, err := rtemetricsmanifests.GetManifests(testNamespace)
if err != nil {
return nil, err
}
recorder := record.NewFakeRecorder(bufferSize)

return &NUMAResourcesOperatorReconciler{
Client: fakeClient,
Scheme: scheme.Scheme,
Platform: plat,
APIManifests: apiManifests,
RTEManifests: rteManifests,
Namespace: testNamespace,
Client: fakeClient,
Scheme: scheme.Scheme,
Platform: plat,
APIManifests: apiManifests,
RTEManifests: rteManifests,
RTEMetricsManifests: rtemetricsmanifests,
Namespace: testNamespace,
Images: images.Data{
Builtin: testImageSpec,
},
Expand Down Expand Up @@ -1475,6 +1480,17 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
Namespace: testNamespace,
}
Expect(reconciler.Client.Get(context.TODO(), mcp2DSKey, ds)).ToNot(HaveOccurred())

By("Check All RTE metrics components are created")
for _, obj := range reconciler.RTEMetricsManifests.ToObjects() {
objectKey := client.ObjectKeyFromObject(obj)
switch obj.(type) {
case *corev1.Service:
service := &corev1.Service{}
Expect(reconciler.Client.Get(context.TODO(), objectKey, service)).ToNot(HaveOccurred())
default:
}
}
})
When("daemonsets are ready", func() {
var dsDesiredNumberScheduled int32
Expand Down
27 changes: 17 additions & 10 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ import (
intkloglevel "github.com/openshift-kni/numaresources-operator/internal/kloglevel"
"github.com/openshift-kni/numaresources-operator/pkg/hash"
"github.com/openshift-kni/numaresources-operator/pkg/images"
rtemetricsmanifests "github.com/openshift-kni/numaresources-operator/pkg/metrics/manifests/monitor"
"github.com/openshift-kni/numaresources-operator/pkg/numaresourcesscheduler/controlplane"
schedmanifests "github.com/openshift-kni/numaresources-operator/pkg/numaresourcesscheduler/manifests/sched"
rteupdate "github.com/openshift-kni/numaresources-operator/pkg/objectupdate/rte"
Expand Down Expand Up @@ -262,18 +263,24 @@ func main() {
klog.ErrorS(err, "unable to render RTE manifests", "controller", "NUMAResourcesOperator")
os.Exit(1)
}
rteMetricsManifests, err := rtemetricsmanifests.GetManifests(namespace)
if err != nil {
klog.ErrorS(err, "unable to load the RTE metrics manifests")
os.Exit(1)
}

if err = (&controllers.NUMAResourcesOperatorReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Recorder: mgr.GetEventRecorderFor("numaresources-controller"),
APIManifests: apiManifests,
RTEManifests: rteManifestsRendered,
Platform: clusterPlatform,
Images: imgs,
ImagePullPolicy: pullPolicy,
Namespace: namespace,
ForwardMCPConds: params.enableMCPCondsForward,
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Recorder: mgr.GetEventRecorderFor("numaresources-controller"),
APIManifests: apiManifests,
RTEManifests: rteManifestsRendered,
RTEMetricsManifests: rteMetricsManifests,
Platform: clusterPlatform,
Images: imgs,
ImagePullPolicy: pullPolicy,
Namespace: namespace,
ForwardMCPConds: params.enableMCPCondsForward,
}).SetupWithManager(mgr); err != nil {
klog.ErrorS(err, "unable to create controller", "controller", "NUMAResourcesOperator")
os.Exit(1)
Expand Down
64 changes: 64 additions & 0 deletions pkg/metrics/manifests/manifests.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Copyright 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package manifests

import (
"embed"
"fmt"
"path/filepath"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/scheme"
)

//go:embed yaml
var src embed.FS

func Service(namespace string) (*corev1.Service, error) {
obj, err := loadObject(filepath.Join("yaml", "service.yaml"))
if err != nil {
return nil, err
}

service, ok := obj.(*corev1.Service)
if !ok {
return nil, fmt.Errorf("unexpected type, got %t", obj)
}

if namespace != "" {
service.Namespace = namespace
}
return service, nil
}

func deserializeObjectFromData(data []byte) (runtime.Object, error) {
decode := scheme.Codecs.UniversalDeserializer().Decode
obj, _, err := decode(data, nil, nil)
if err != nil {
return nil, err
}
return obj, nil
}

func loadObject(path string) (runtime.Object, error) {
data, err := src.ReadFile(path)
if err != nil {
return nil, err
}
return deserializeObjectFromData(data)
}
27 changes: 27 additions & 0 deletions pkg/metrics/manifests/manifests_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package manifests

import (
"testing"
)

func TestLoad(t *testing.T) {
if obj, err := Service(""); obj == nil || err != nil {
t.Errorf("Service() failed: err=%v", err)
}
}
52 changes: 52 additions & 0 deletions pkg/metrics/manifests/monitor/monitor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package sched

import (
corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/openshift-kni/numaresources-operator/pkg/metrics/manifests"
)

type Manifests struct {
Service *corev1.Service
}

func (mf Manifests) ToObjects() []client.Object {
return []client.Object{
mf.Service,
}
}

func (mf Manifests) Clone() Manifests {
return Manifests{
Service: mf.Service.DeepCopy(),
}
}

func GetManifests(namespace string) (Manifests, error) {
var err error
mf := Manifests{}

mf.Service, err = manifests.Service(namespace)
if err != nil {
return mf, err
}

return mf, nil
}
Loading

0 comments on commit 1dd2f17

Please sign in to comment.