From 6ba268c76e666f2b1b561fe20ed21dba31fd396b Mon Sep 17 00:00:00 2001 From: Paige Gulley Date: Fri, 12 Jul 2024 15:17:45 -0400 Subject: [PATCH] update fixtures so a relevant top-terms test can be conducted --- test/api_test.py | 2 +- test/create_fixtures.py | 20 +++++++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/test/api_test.py b/test/api_test.py index fdc4798..93357bd 100644 --- a/test/api_test.py +++ b/test/api_test.py @@ -357,7 +357,7 @@ def test_no_pub_date(self): def test_top_terms(self): response = self._client.post( f"/v1/{INDEX_NAME}/terms/article_title/top", - json={"q": "'the big e' AND language:en'"}, + json={"q": "mediacloud"}, timeout=TIMEOUT, ) diff --git a/test/create_fixtures.py b/test/create_fixtures.py index 37d866b..908bdea 100644 --- a/test/create_fixtures.py +++ b/test/create_fixtures.py @@ -1,7 +1,7 @@ import copy import datetime as dt import logging -from random import randrange +from random import randrange, sample from test import ELASTICSEARCH_URL, INDEX_NAME, NUMBER_OF_TEST_STORIES import mcmetadata.titles as titles @@ -66,14 +66,28 @@ "text_extraction": "trafilatura", } +random_wordlist = [ + "robust", + "traditional", + "the", + "find", + "great", + "simple", + "a", + "time", + "mediacloud", + "robot", + "enough", +] + imported_count = 0 for idx in range(0, NUMBER_OF_TEST_STORIES): fixture = copy.copy(base_fixture) fixture["url"] += str(idx) fixture["original_url"] = fixture["url"] fixture["normalized_url"] = urls.normalize_url(fixture["url"]) # type: ignore [assignment] - fixture["article_title"] += str(idx) - fixture["text_content"] += str(idx) + fixture["article_title"] += " ".join(sample(random_wordlist, 1)) + " " + str(idx) + fixture["text_content"] += " ".join(sample(random_wordlist, 10)) + " " + str(idx) pub_date = dt.date(2023, 1, 1) + dt.timedelta(days=randrange(365)) if (idx % 1000) != 0: fixture["publication_date"] = pub_date.isoformat()