Skip to content

Commit

Permalink
e2e/gpu: use generated policy
Browse files Browse the repository at this point in the history
Previously, the GPU test statically used an allow-all policy. This is
now changed, as our Genpolicy supports GPU pods. Now, an ad-hoc and
per-pod generated policy will be used in the test, similar to what the
other tests do.
  • Loading branch information
msanft committed Jan 20, 2025
1 parent 52ffaa8 commit a57e9f8
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 46 deletions.
31 changes: 10 additions & 21 deletions e2e/gpu/gpu_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,23 @@
package gpu

import (
"bytes"
"context"
"flag"
"os"
"testing"
"time"

"github.com/edgelesssys/contrast/e2e/internal/contrasttest"
"github.com/edgelesssys/contrast/e2e/internal/kubeclient"
"github.com/edgelesssys/contrast/internal/kuberesource"
"github.com/edgelesssys/contrast/internal/manifest"
"github.com/edgelesssys/contrast/internal/platforms"
"github.com/stretchr/testify/require"
)

const (
gpuPodName = "gpu-pod"
gpuName = "NVIDIA H100 PCIe"
gpuDeploymentName = "gpu-tester"
gpuName = "NVIDIA H100 PCIe"
)

// TestGPU runs e2e tests on an GPU-enabled Contrast.
Expand All @@ -34,7 +34,7 @@ func TestGPU(t *testing.T) {
runtimeHandler, err := manifest.RuntimeHandler(platform)
require.NoError(t, err)

resources := kuberesource.OpenSSL()
resources := kuberesource.GPU()
coordinator := kuberesource.CoordinatorBundle()

resources = append(resources, coordinator...)
Expand All @@ -52,31 +52,20 @@ func TestGPU(t *testing.T) {

require.True(t, t.Run("contrast verify", ct.Verify), "contrast verify needs to succeed for subsequent tests")

applyGPUPod := func(t *testing.T) {
yaml, err := os.ReadFile("./e2e/gpu/testdata/gpu-pod.yaml")
require.NoError(t, err)

yaml = bytes.ReplaceAll(
bytes.ReplaceAll(yaml, []byte("@@REPLACE_NAMESPACE@@"), []byte(ct.Namespace)),
[]byte("@@REPLACE_RUNTIME@@"), []byte(ct.RuntimeClassName),
)

ct.ApplyFromYAML(t, yaml)
}

require.True(t, t.Run("apply GPU pod", applyGPUPod), "GPU pod needs to deploy successfully for subsequent tests")

t.Run("check GPU availability", func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), ct.FactorPlatformTimeout(5*time.Minute))
defer cancel()

require := require.New(t)

err := ct.Kubeclient.WaitForPod(ctx, ct.Namespace, gpuPodName)
require.NoError(err, "GPU pod %s did not start", gpuPodName)
require.NoError(ct.Kubeclient.WaitFor(ctx, kubeclient.Ready, kubeclient.Deployment{}, ct.Namespace, gpuDeploymentName))

pods, err := ct.Kubeclient.PodsFromDeployment(ctx, ct.Namespace, gpuDeploymentName)
require.NoError(err)
require.Len(pods, 1, "pod not found: %s/%s", ct.Namespace, gpuDeploymentName)

argv := []string{"/bin/sh", "-c", "nvidia-smi"}
stdout, stderr, err := ct.Kubeclient.Exec(ctx, ct.Namespace, gpuPodName, argv)
stdout, stderr, err := ct.Kubeclient.Exec(ctx, ct.Namespace, pods[0].Name, argv)
require.NoError(err, "stderr: %q", stderr)

require.Contains(stdout, gpuName, "nvidia-smi output should contain %s", gpuName)
Expand Down
25 changes: 0 additions & 25 deletions e2e/gpu/testdata/gpu-pod.yaml

This file was deleted.

36 changes: 36 additions & 0 deletions internal/kuberesource/sets.go
Original file line number Diff line number Diff line change
Expand Up @@ -743,3 +743,39 @@ done
client,
}
}

// GPU returns the resources for deploying a GPU test pod.
func GPU() []any {
tester := Deployment("gpu-tester", "").
WithSpec(DeploymentSpec().
WithReplicas(1).
WithSelector(LabelSelector().
WithMatchLabels(map[string]string{"app.kubernetes.io/name": "gpu-tester"}),
).
WithTemplate(PodTemplateSpec().
WithLabels(map[string]string{"app.kubernetes.io/name": "gpu-tester"}).
WithAnnotations(map[string]string{
"io.katacontainers.config.hypervisor.default_memory": "15258",
"cdi.k8s.io/gpu": "nvidia.com/pgpu=0",
}).
WithSpec(PodSpec().
WithContainers(
Container().
WithName("gpu-tester").
WithImage("ghcr.io/edgelesssys/contrast/ubuntu:24.04").
WithCommand("/bin/sh", "-c", "sleep inf").
WithEnv(EnvVar().
WithName("NVIDIA_VISIBLE_DEVICES").WithValue("all"),
).
WithResources(ResourceRequirements().
WithLimits(corev1.ResourceList{
corev1.ResourceName("nvidia.com/GH100_H100_PCIE"): resource.MustParse("1"),
}),
),
),
),
),
)

return []any{tester}
}

0 comments on commit a57e9f8

Please sign in to comment.