From d23b349cc46badc2d710b1c4634968bb8b372b53 Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Tue, 31 Oct 2023 00:02:36 -0700 Subject: [PATCH] Secure Code Eval changes (#2679) * fix code eval * do not set default * add test * add errors * fix params * fix test * skip * upload * fix test * comment --- composer/metrics/nlp.py | 22 +++++++++---------- .../mosaicml_lambda_eval_client.py | 4 ++++ .../test_in_context_learning_datasets.py | 6 +++++ 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/composer/metrics/nlp.py b/composer/metrics/nlp.py index bac9c44070..e1f9ee5558 100644 --- a/composer/metrics/nlp.py +++ b/composer/metrics/nlp.py @@ -528,16 +528,16 @@ def __init__(self, dist_sync_on_step: bool = False): self.add_state('correct', default=torch.tensor(0.), dist_reduce_fx='sum') self.add_state('total', default=torch.tensor(0.), dist_reduce_fx='sum') - self.eval_device = 'LAMBDA' if not 'CODE_EVAL_DEVICE' in os.environ: - if 'MOSAICML_PLATFORM' in os.environ: - log.info('Defaulting to MOSAICML evaluation on the MosaicML Platform') - self.eval_device = 'MOSAICML' - else: - log.info(f"'CODE_EVAL_DEVICE' env var was not set, so defaulting to 'LAMBDA' as eval device") - os.environ['CODE_EVAL_DEVICE'] = 'LAMBDA' - else: - self.eval_device = os.environ['CODE_EVAL_DEVICE'].upper() + raise ValueError( + 'Attempting to use InContextLearningCodeEvalAccuracy but environment ' + 'variable `CODE_EVAL_DEVICE` is not set. Please set it to `CODE_EVAL_DEVICE` ' + 'to one of `LOCAL` (for unsafe local eval), `LAMBDA` (for AWS lambda ', + 'evaluation), or `MOSAICML` (for lambda eval through MAPI).') + self.eval_device = os.environ['CODE_EVAL_DEVICE'].upper() + if self.eval_device not in ('LOCAL', 'LAMBDA', 'MOSAICML'): + raise ValueError('Environment variable `CODE_EVAL_DEVICE` must be one of `LOCAL`, ' + '`LAMBDA`, or `MOSAICML`.') def get_client(self) -> EvalClient: """Returns a client for the appropriate remote platform.""" @@ -554,9 +554,9 @@ def get_client(self) -> EvalClient: elif self.eval_device == 'MOSAICML': client = MosaicMLLambdaEvalClient() else: - raise Exception( + raise ValueError( 'Remote platforms apart from Lambdas/MOSAICML are not yet supported. Please set environment variable ' - 'CODE_EVAL_DEVICE to LOCAL or LAMBDA, or run on the MosaicML Platform.') + '`CODE_EVAL_DEVICE` to `LOCAL`, `LAMBDA`, or `MOSAICML`.') return client def estimator(self, n: int, c: int, k: int) -> float: diff --git a/composer/utils/eval_client/mosaicml_lambda_eval_client.py b/composer/utils/eval_client/mosaicml_lambda_eval_client.py index c3071b9b7c..fabb6b32be 100644 --- a/composer/utils/eval_client/mosaicml_lambda_eval_client.py +++ b/composer/utils/eval_client/mosaicml_lambda_eval_client.py @@ -5,6 +5,7 @@ import logging import os import time +from http import HTTPStatus from typing import Dict, List import mcli @@ -53,6 +54,9 @@ def invoke(self, payload: List[List[List[Dict[str, str]]]]) -> List[List[List[bo log.error(f'Failed to get code eval output after {self.num_retries} retries. Error: {e}') log.warning(f'Failed to get code eval output, retrying in {self.backoff**i} seconds.') time.sleep(self.backoff**i) + elif e.status == HTTPStatus.UNAUTHORIZED: + raise RuntimeError('Failed to get code eval output due to UNAUTHORIZED error. ' + 'Please ensure you have access to MosaicMLLambdaEvalClient.') from e else: log.error(f'Failed to get code eval output with unexpected MAPIException. Error: {e}') break diff --git a/tests/datasets/test_in_context_learning_datasets.py b/tests/datasets/test_in_context_learning_datasets.py index 8ebb7816af..5af591e242 100644 --- a/tests/datasets/test_in_context_learning_datasets.py +++ b/tests/datasets/test_in_context_learning_datasets.py @@ -1343,6 +1343,12 @@ def test_qa_task_with_cot_evaluation(device, world_size, num_fewshot, dataset_ur assert in_memory_logger.data['metrics/gsm8k/InContextLearningQAAccuracy'][0][1].item() == 0 +@pytest.mark.gpu # Run on MosaicML platform +def test_code_eval_requires_envvar(): + with pytest.raises(ValueError, match='Attempting to use InContextLearningCodeEvalAccuracy but.*'): + InContextLearningCodeEvalAccuracy() + + @pytest.mark.parametrize('dataset_uri', ['human_eval_small.jsonl']) @device('gpu') @world_size(1, 2)