From 6ba268c76e666f2b1b561fe20ed21dba31fd396b Mon Sep 17 00:00:00 2001
From: Paige Gulley <pgulley@campus-vpn-172-26-69-106.vpn.umass.edu>
Date: Fri, 12 Jul 2024 15:17:45 -0400
Subject: [PATCH] update fixtures so a relevant top-terms test can be conducted

---
 test/api_test.py        |  2 +-
 test/create_fixtures.py | 20 +++++++++++++++++---
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/test/api_test.py b/test/api_test.py
index fdc4798..93357bd 100644
--- a/test/api_test.py
+++ b/test/api_test.py
@@ -357,7 +357,7 @@ def test_no_pub_date(self):
     def test_top_terms(self):
         response = self._client.post(
             f"/v1/{INDEX_NAME}/terms/article_title/top",
-            json={"q": "'the big e' AND language:en'"},
+            json={"q": "mediacloud"},
             timeout=TIMEOUT,
         )
 
diff --git a/test/create_fixtures.py b/test/create_fixtures.py
index 37d866b..908bdea 100644
--- a/test/create_fixtures.py
+++ b/test/create_fixtures.py
@@ -1,7 +1,7 @@
 import copy
 import datetime as dt
 import logging
-from random import randrange
+from random import randrange, sample
 from test import ELASTICSEARCH_URL, INDEX_NAME, NUMBER_OF_TEST_STORIES
 
 import mcmetadata.titles as titles
@@ -66,14 +66,28 @@
     "text_extraction": "trafilatura",
 }
 
+random_wordlist = [
+    "robust",
+    "traditional",
+    "the",
+    "find",
+    "great",
+    "simple",
+    "a",
+    "time",
+    "mediacloud",
+    "robot",
+    "enough",
+]
+
 imported_count = 0
 for idx in range(0, NUMBER_OF_TEST_STORIES):
     fixture = copy.copy(base_fixture)
     fixture["url"] += str(idx)
     fixture["original_url"] = fixture["url"]
     fixture["normalized_url"] = urls.normalize_url(fixture["url"])  # type: ignore [assignment]
-    fixture["article_title"] += str(idx)
-    fixture["text_content"] += str(idx)
+    fixture["article_title"] += " ".join(sample(random_wordlist, 1)) + " " + str(idx)
+    fixture["text_content"] += " ".join(sample(random_wordlist, 10)) + " " + str(idx)
     pub_date = dt.date(2023, 1, 1) + dt.timedelta(days=randrange(365))
     if (idx % 1000) != 0:
         fixture["publication_date"] = pub_date.isoformat()