From d23b349cc46badc2d710b1c4634968bb8b372b53 Mon Sep 17 00:00:00 2001
From: Mihir Patel <mihir.v.patel7@gmail.com>
Date: Tue, 31 Oct 2023 00:02:36 -0700
Subject: [PATCH] Secure Code Eval changes (#2679)

* fix code eval

* do not set default

* add test

* add errors

* fix params

* fix test

* skip

* upload

* fix test

* comment
---
 composer/metrics/nlp.py                       | 22 +++++++++----------
 .../mosaicml_lambda_eval_client.py            |  4 ++++
 .../test_in_context_learning_datasets.py      |  6 +++++
 3 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/composer/metrics/nlp.py b/composer/metrics/nlp.py
index bac9c44070..e1f9ee5558 100644
--- a/composer/metrics/nlp.py
+++ b/composer/metrics/nlp.py
@@ -528,16 +528,16 @@ def __init__(self, dist_sync_on_step: bool = False):
         self.add_state('correct', default=torch.tensor(0.), dist_reduce_fx='sum')
         self.add_state('total', default=torch.tensor(0.), dist_reduce_fx='sum')
 
-        self.eval_device = 'LAMBDA'
         if not 'CODE_EVAL_DEVICE' in os.environ:
-            if 'MOSAICML_PLATFORM' in os.environ:
-                log.info('Defaulting to MOSAICML evaluation on the MosaicML Platform')
-                self.eval_device = 'MOSAICML'
-            else:
-                log.info(f"'CODE_EVAL_DEVICE' env var was not set, so defaulting to 'LAMBDA' as eval device")
-                os.environ['CODE_EVAL_DEVICE'] = 'LAMBDA'
-        else:
-            self.eval_device = os.environ['CODE_EVAL_DEVICE'].upper()
+            raise ValueError(
+                'Attempting to use InContextLearningCodeEvalAccuracy but environment '
+                'variable `CODE_EVAL_DEVICE` is not set. Please set it to `CODE_EVAL_DEVICE` '
+                'to one of `LOCAL` (for unsafe local eval), `LAMBDA` (for AWS lambda ',
+                'evaluation), or `MOSAICML` (for lambda eval through MAPI).')
+        self.eval_device = os.environ['CODE_EVAL_DEVICE'].upper()
+        if self.eval_device not in ('LOCAL', 'LAMBDA', 'MOSAICML'):
+            raise ValueError('Environment variable `CODE_EVAL_DEVICE` must be one of `LOCAL`, '
+                             '`LAMBDA`, or `MOSAICML`.')
 
     def get_client(self) -> EvalClient:
         """Returns a client for the appropriate remote platform."""
@@ -554,9 +554,9 @@ def get_client(self) -> EvalClient:
         elif self.eval_device == 'MOSAICML':
             client = MosaicMLLambdaEvalClient()
         else:
-            raise Exception(
+            raise ValueError(
                 'Remote platforms apart from Lambdas/MOSAICML are not yet supported. Please set environment variable '
-                'CODE_EVAL_DEVICE to LOCAL or LAMBDA, or run on the MosaicML Platform.')
+                '`CODE_EVAL_DEVICE` to `LOCAL`, `LAMBDA`, or `MOSAICML`.')
         return client
 
     def estimator(self, n: int, c: int, k: int) -> float:
diff --git a/composer/utils/eval_client/mosaicml_lambda_eval_client.py b/composer/utils/eval_client/mosaicml_lambda_eval_client.py
index c3071b9b7c..fabb6b32be 100644
--- a/composer/utils/eval_client/mosaicml_lambda_eval_client.py
+++ b/composer/utils/eval_client/mosaicml_lambda_eval_client.py
@@ -5,6 +5,7 @@
 import logging
 import os
 import time
+from http import HTTPStatus
 from typing import Dict, List
 
 import mcli
@@ -53,6 +54,9 @@ def invoke(self, payload: List[List[List[Dict[str, str]]]]) -> List[List[List[bo
                         log.error(f'Failed to get code eval output after {self.num_retries} retries. Error: {e}')
                     log.warning(f'Failed to get code eval output, retrying in {self.backoff**i} seconds.')
                     time.sleep(self.backoff**i)
+                elif e.status == HTTPStatus.UNAUTHORIZED:
+                    raise RuntimeError('Failed to get code eval output due to UNAUTHORIZED error. '
+                                       'Please ensure you have access to MosaicMLLambdaEvalClient.') from e
                 else:
                     log.error(f'Failed to get code eval output with unexpected MAPIException. Error: {e}')
                     break
diff --git a/tests/datasets/test_in_context_learning_datasets.py b/tests/datasets/test_in_context_learning_datasets.py
index 8ebb7816af..5af591e242 100644
--- a/tests/datasets/test_in_context_learning_datasets.py
+++ b/tests/datasets/test_in_context_learning_datasets.py
@@ -1343,6 +1343,12 @@ def test_qa_task_with_cot_evaluation(device, world_size, num_fewshot, dataset_ur
     assert in_memory_logger.data['metrics/gsm8k/InContextLearningQAAccuracy'][0][1].item() == 0
 
 
+@pytest.mark.gpu  # Run on MosaicML platform
+def test_code_eval_requires_envvar():
+    with pytest.raises(ValueError, match='Attempting to use InContextLearningCodeEvalAccuracy but.*'):
+        InContextLearningCodeEvalAccuracy()
+
+
 @pytest.mark.parametrize('dataset_uri', ['human_eval_small.jsonl'])
 @device('gpu')
 @world_size(1, 2)