From c028863493350bbc566605028ed83a783062f15c Mon Sep 17 00:00:00 2001 From: Karel Suta Date: Mon, 16 Dec 2024 11:21:06 +0100 Subject: [PATCH 1/2] E2E tests: Upgrade MNIST test script dependencies --- go.mod | 2 +- go.sum | 4 ++-- test/e2e/mnist.py | 4 ++-- test/e2e/mnist_pip_requirements.txt | 6 +++--- test/e2e/mnist_rayjob_raycluster_test.go | 6 +++--- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/go.mod b/go.mod index fd131ca3e..5833ceb62 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,7 @@ require ( github.com/openshift/api v0.0.0-20230823114715-5fdd7511b790 github.com/openshift/client-go v0.0.0-20221019143426-16aed247da5c github.com/project-codeflare/appwrapper v0.27.0 - github.com/project-codeflare/codeflare-common v0.0.0-20240930133152-11fd6e3be6b3 + github.com/project-codeflare/codeflare-common v0.0.0-20241216183607-222395d38924 github.com/ray-project/kuberay/ray-operator v1.2.1 go.uber.org/zap v1.27.0 golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 diff --git a/go.sum b/go.sum index 0b5d76171..f11f3adf1 100644 --- a/go.sum +++ b/go.sum @@ -226,8 +226,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/project-codeflare/appwrapper v0.27.0 h1:WiWw0Hi4rEXuFIEpm8nq1UqJHgVB6YtGcWzRrhRUTyE= github.com/project-codeflare/appwrapper v0.27.0/go.mod h1:7FpO90DLv0BAq4rwZtXKS9aRRfkR9RvXsj3pgYF0HtQ= -github.com/project-codeflare/codeflare-common v0.0.0-20240930133152-11fd6e3be6b3 h1:Eupu9yxaGTddtoxb9SjrYJlokRHEYU5NNVRQmdXSNVs= -github.com/project-codeflare/codeflare-common v0.0.0-20240930133152-11fd6e3be6b3/go.mod h1:v7XKwaDoCspsHQlWJNarO7gOpR+iumSS+c1bWs3kJOI= +github.com/project-codeflare/codeflare-common v0.0.0-20241216183607-222395d38924 h1:jM+gYqn8eGmUoeQLGGYxlJgXZ1gbZgB2UtpKU9z0x9s= +github.com/project-codeflare/codeflare-common v0.0.0-20241216183607-222395d38924/go.mod h1:DPSv5khRiRDFUD43SF8da+MrVQTWmxNhuKJmwSLOyO0= github.com/prometheus/client_golang v1.20.4 h1:Tgh3Yr67PaOv/uTqloMsCEdeuFTatm5zIq5+qNN23vI= github.com/prometheus/client_golang v1.20.4/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= diff --git a/test/e2e/mnist.py b/test/e2e/mnist.py index 5a89a8b38..97b22fc9c 100644 --- a/test/e2e/mnist.py +++ b/test/e2e/mnist.py @@ -72,8 +72,8 @@ def __init__(self, data_dir=PATH_DATASETS, hidden_size=64, learning_rate=2e-4): nn.Linear(hidden_size, self.num_classes), ) - self.val_accuracy = Accuracy() - self.test_accuracy = Accuracy() + self.val_accuracy = Accuracy(task="multiclass", num_classes=10) + self.test_accuracy = Accuracy(task="multiclass", num_classes=10) def forward(self, x): x = self.model(x) diff --git a/test/e2e/mnist_pip_requirements.txt b/test/e2e/mnist_pip_requirements.txt index 4c9d5fcb8..9f0543d4d 100644 --- a/test/e2e/mnist_pip_requirements.txt +++ b/test/e2e/mnist_pip_requirements.txt @@ -1,3 +1,3 @@ -pytorch_lightning==1.9.5 -torchmetrics==0.9.1 -torchvision==0.12.0 +pytorch_lightning==2.4.0 +torchmetrics==1.6.0 +torchvision==0.20.1 diff --git a/test/e2e/mnist_rayjob_raycluster_test.go b/test/e2e/mnist_rayjob_raycluster_test.go index b2e724834..443da1522 100644 --- a/test/e2e/mnist_rayjob_raycluster_test.go +++ b/test/e2e/mnist_rayjob_raycluster_test.go @@ -358,9 +358,9 @@ func constructRayJob(_ Test, namespace *corev1.Namespace, rayCluster *rayv1.RayC Entrypoint: "python /home/ray/jobs/mnist.py", RuntimeEnvYAML: ` pip: - - pytorch_lightning==1.9.5 - - torchmetrics==0.9.1 - - torchvision==0.12.0 + - pytorch_lightning==2.4.0 + - torchmetrics==1.6.0 + - torchvision==0.20.1 env_vars: MNIST_DATASET_URL: "` + GetMnistDatasetURL() + `" PIP_INDEX_URL: "` + GetPipIndexURL() + `" From 9c96f449aec20f8b1bb9729d73a5eb0e347dfa59 Mon Sep 17 00:00:00 2001 From: David Grove Date: Sat, 14 Dec 2024 00:01:46 -0500 Subject: [PATCH 2/2] Fix improper controller-runtime cache configuration When AppWrappers are enabled, it is not correct to configure the controller-runtime cache with a filter that only allows services, secrets, etc with the RayCluster label to be cached. This breaks any AppWrapper that contains one of these resource kinds. --- main.go | 54 ++++++++++++++++++++++++++---------------------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/main.go b/main.go index d9e46c221..044716fee 100644 --- a/main.go +++ b/main.go @@ -171,36 +171,34 @@ func main() { kubeConfig.QPS = ptr.Deref(cfg.ClientConnection.QPS, rest.DefaultQPS) setupLog.V(2).Info("REST client", "qps", kubeConfig.QPS, "burst", kubeConfig.Burst) - selector, err := labels.Parse(controllers.RayClusterNameLabel) - exitOnError(err, "unable to parse label selector") - - cacheOpts := cache.Options{ - ByObject: map[client.Object]cache.ByObject{ - &corev1.Secret{}: { - Label: selector, - }, - &corev1.Service{}: { - Label: selector, - }, - &corev1.ServiceAccount{}: { - Label: selector, - }, - &networkingv1.Ingress{}: { - Label: selector, - }, - &networkingv1.NetworkPolicy{}: { - Label: selector, - }, - &rbacv1.ClusterRoleBinding{}: { - Label: selector, - }, - }, - } - - if isOpenShift(ctx, kubeClient.DiscoveryClient) { - cacheOpts.ByObject[&routev1.Route{}] = cache.ByObject{ + cacheOpts := cache.Options{} + if cfg.AppWrapper == nil || !ptr.Deref(cfg.AppWrapper.Enabled, false) { + selector, err := labels.Parse(controllers.RayClusterNameLabel) + exitOnError(err, "unable to parse label selector") + cacheOpts.ByObject = make(map[client.Object]cache.ByObject, 7) + cacheOpts.ByObject[&corev1.Secret{}] = cache.ByObject{ + Label: selector, + } + cacheOpts.ByObject[&corev1.Service{}] = cache.ByObject{ + Label: selector, + } + cacheOpts.ByObject[&corev1.ServiceAccount{}] = cache.ByObject{ + Label: selector, + } + cacheOpts.ByObject[&networkingv1.Ingress{}] = cache.ByObject{ Label: selector, } + cacheOpts.ByObject[&networkingv1.NetworkPolicy{}] = cache.ByObject{ + Label: selector, + } + cacheOpts.ByObject[&rbacv1.ClusterRoleBinding{}] = cache.ByObject{ + Label: selector, + } + if isOpenShift(ctx, kubeClient.DiscoveryClient) { + cacheOpts.ByObject[&routev1.Route{}] = cache.ByObject{ + Label: selector, + } + } } mgr, err := ctrl.NewManager(kubeConfig, ctrl.Options{