Skip to content

Commit

Permalink
update fixtures so a relevant top-terms test can be conducted
Browse files Browse the repository at this point in the history
  • Loading branch information
Paige Gulley committed Jul 12, 2024
1 parent 875e012 commit 6ba268c
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 4 deletions.
2 changes: 1 addition & 1 deletion test/api_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ def test_no_pub_date(self):
def test_top_terms(self):
response = self._client.post(
f"/v1/{INDEX_NAME}/terms/article_title/top",
json={"q": "'the big e' AND language:en'"},
json={"q": "mediacloud"},
timeout=TIMEOUT,
)

Expand Down
20 changes: 17 additions & 3 deletions test/create_fixtures.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import copy
import datetime as dt
import logging
from random import randrange
from random import randrange, sample
from test import ELASTICSEARCH_URL, INDEX_NAME, NUMBER_OF_TEST_STORIES

import mcmetadata.titles as titles
Expand Down Expand Up @@ -66,14 +66,28 @@
"text_extraction": "trafilatura",
}

random_wordlist = [
"robust",
"traditional",
"the",
"find",
"great",
"simple",
"a",
"time",
"mediacloud",
"robot",
"enough",
]

imported_count = 0
for idx in range(0, NUMBER_OF_TEST_STORIES):
fixture = copy.copy(base_fixture)
fixture["url"] += str(idx)
fixture["original_url"] = fixture["url"]
fixture["normalized_url"] = urls.normalize_url(fixture["url"]) # type: ignore [assignment]
fixture["article_title"] += str(idx)
fixture["text_content"] += str(idx)
fixture["article_title"] += " ".join(sample(random_wordlist, 1)) + " " + str(idx)
fixture["text_content"] += " ".join(sample(random_wordlist, 10)) + " " + str(idx)
pub_date = dt.date(2023, 1, 1) + dt.timedelta(days=randrange(365))
if (idx % 1000) != 0:
fixture["publication_date"] = pub_date.isoformat()
Expand Down

0 comments on commit 6ba268c

Please sign in to comment.