Skip to content

Commit

Permalink
Add deprecation warnings for ICL datasets/helper functions/metrics (m…
Browse files Browse the repository at this point in the history
…osaicml#3125)

* add deprecation warning

* fix strings

* simplify deprecation warning

* fix warnings in nlp.py

* simplify multitokeneoscriteria warning

* change nlp deprecation number

* linting

---------

Co-authored-by: Max Marion <[email protected]>
Co-authored-by: Max Marion <[email protected]>
Co-authored-by: Daniel King <[email protected]>
  • Loading branch information
4 people authored Apr 19, 2024
1 parent 7b25d90 commit 4e0068a
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 3 deletions.
12 changes: 10 additions & 2 deletions composer/datasets/in_context_learning_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import json
import os
import random
import warnings
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Union

import torch
Expand All @@ -17,14 +18,14 @@
from composer.core.data_spec import _split_list, default_split_batch
from composer.datasets.utils import stop_sequences_criteria
from composer.utils import MissingConditionalImportError, dist, get_file
from composer.utils.warnings import VersionedDeprecationWarning

if TYPE_CHECKING:
import transformers
from datasets import Dataset as HFDataset # pyright: ignore[reportGeneralTypeIssues]

# Allow models to have slightly more tokens than were used in the most verbose CoT in the dataset
_MAX_ANSWER_BUFFER_LENGTH = 10

__all__ = [
'InContextLearningLMTaskDataset',
'InContextLearningMultipleChoiceTaskDataset',
Expand Down Expand Up @@ -125,7 +126,6 @@ def _make_padded_input(
input (torch.tensor): The padded and encoded context
continuation_span (torch.tensor): The _inclusive_ range of indices corresponding to the continuation
"""

inp = torch.tensor(
(context_enc + continuation_enc),
dtype=torch.long,
Expand Down Expand Up @@ -293,6 +293,14 @@ def __init__(
hf_parsing_map: Optional[Dict] = None,
generation_kwargs: Optional[Dict] = None,
):
warnings.warn(
VersionedDeprecationWarning(
'`InContextLearningDataset`, it\'s subclasses, and eval utility functions have been deprecated and migrated'
+ ' to MosaicML\'s llm-foundry repo under the llmfoundry.eval.datasets.in_context_learning module: ' +
'https://github.com/mosaicml/llm-foundry/blob/main/scripts/eval/README.md',
remove_version='0.23.0',
),
)
try:
import datasets

Expand Down
10 changes: 10 additions & 0 deletions composer/datasets/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,15 @@

import logging
import textwrap
import warnings
from typing import Callable, List, Optional

import torch
from torchvision import transforms
from torchvision.datasets import VisionDataset

from composer.utils.warnings import VersionedDeprecationWarning

__all__ = [
'add_vision_dataset_transform',
'MultiTokenEOSCriteria',
Expand Down Expand Up @@ -78,6 +81,13 @@ def __init__(
tokenizer: transformers.PreTrainedTokenizerBase,
batch_size: int,
) -> None:
warnings.warn(
VersionedDeprecationWarning(
'`MultiTokenEOSCriteria` has been deprecated and migrated to MosaicML\'s llm-foundry repo under the llmfoundry.eval.datasets.in_context_learning module: '
+ 'https://github.com/mosaicml/llm-foundry/blob/main/scripts/eval/README.md',
remove_version='0.23.0',
),
)
self.done_tracker = [False] * batch_size
self.stop_sequence = stop_sequence
self.stop_sequence_ids = tokenizer.encode(stop_sequence, add_special_tokens=False)
Expand Down
10 changes: 9 additions & 1 deletion composer/metrics/nlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,14 @@ class InContextLearningMetric(Metric):
"""Base class for In-context learning (ICL) metrics."""

def __init__(self, *args, **kwargs):
warnings.warn(
VersionedDeprecationWarning(
'`InContextLearningMetric` and it\'s subclasses have been deprecated and ' +
'migrated to MosaicML\'s llm-foundry repo under the llmfoundry.eval.datasets.in_context_learning module: '
+ 'https://github.com/mosaicml/llm-foundry/blob/main/scripts/eval/README.md',
remove_version='0.23.0',
),
)
super().__init__(*args, **kwargs)
self.needs_batch = True

Expand Down Expand Up @@ -273,7 +281,7 @@ def rename_args(
raise ValueError('Cannot use both `outputs` and `output_logits`')
if output_logits is not None:
warnings.warn(
VersionedDeprecationWarning('`output_logits` has been renamed to `outputs`.', remove_version='0.21.0'),
VersionedDeprecationWarning('`output_logits` has been renamed to `outputs`.', remove_version='0.23.0'),
)
outputs = output_logits

Expand Down

0 comments on commit 4e0068a

Please sign in to comment.