Skip to content

Commit

Permalink
case sensitivity, updating challenges
Browse files Browse the repository at this point in the history
  • Loading branch information
SilenNaihin committed Oct 20, 2023
1 parent 0e51b12 commit 825c3ad
Show file tree
Hide file tree
Showing 7 changed files with 13 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"eval_id": "6e2bf1f0-6842-4704-8ed1-b17c2065bbac",
"ground": {
"answer": "The csv labelled",
"case_sensitive": true,
"eval": {
"type": "file"
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"eval_id": "1758058c-f726-484f-96fa-f05e278e5ff5",
"ground": {
"answer": "The twitter handles of the two hosts of Latent Space.",
"case_sensitive": false,
"eval": {
"type": "file"
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@
"eval_id": "1758058c-f726-484f-96fa-f05e278e5ff5",
"ground": {
"answer": "Toran is from Scotland https://uk.linkedin.com/in/toran-richards.",
"case_sensitive": true,
"eval": {
"type": "file"
},
"files": [
"output.txt"
],
"should_contain": [
"cotland"
"Scotland"
],
"should_not_contain": []
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"eval_id": "1758058c-f726-484f-96fa-f05e278e5ff5",
"ground": {
"answer": "https://www.amazon.com/gp/bestsellers/2021/books, second book, the answer is Reed",
"case_sensitive": true,
"eval": {
"type": "file"
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"eval_id": "1758058c-f726-484f-96fa-f05e278e5ff5",
"ground": {
"answer": "Get to https://www.forbes.com/special-report/2012/30-under-30/30-under-30_games.html, then https://www.linkedin.com/in/brianjcho/details/experience/ is the first 27 year old, then find his longest working job on Linkedin which is Riot Games.",
"case_sensitive": true,
"eval": {
"type": "file"
},
Expand Down
6 changes: 6 additions & 0 deletions benchmark/agbenchmark/utils/challenge.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@ def scoring(self, config: Dict[str, Any], content: str, ground: Ground) -> float
print("\033[1;34mScoring content:\033[0m", content)
if ground.should_contain:
for should_contain_word in ground.should_contain:
if not getattr(ground, 'case_sensitive', True):
should_contain_word = should_contain_word.lower()
content = content.lower()
print_content = (
f"\033[1;34mWord that should exist\033[0m - {should_contain_word}:"
)
Expand All @@ -134,6 +137,9 @@ def scoring(self, config: Dict[str, Any], content: str, ground: Ground) -> float

if ground.should_not_contain:
for should_not_contain_word in ground.should_not_contain:
if not getattr(ground, 'case_sensitive', True):
should_not_contain_word = should_not_contain_word.lower()
content = content.lower()
print_content = f"\033[1;34mWord that should not exist\033[0m - {should_not_contain_word}:"
if should_not_contain_word in content:
print(print_content, "False")
Expand Down
1 change: 1 addition & 0 deletions benchmark/agbenchmark/utils/data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ class Ground(BaseModel):
should_contain: Optional[List[str]] = None
should_not_contain: Optional[List[str]] = None
files: List[str]
case_sensitive: Optional[bool] = True
eval: Eval


Expand Down

0 comments on commit 825c3ad

Please sign in to comment.