-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #37 from mediacloud/feature-unit-tests
Unit tests and CI
- Loading branch information
Showing
12 changed files
with
206 additions
and
60 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
name: Test Code | ||
|
||
on: | ||
- push | ||
|
||
jobs: | ||
test: | ||
if: github.event_name == 'push' | ||
runs-on: ubuntu-latest | ||
name: Unit tests in Docker | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- name: Configure sysctl limits | ||
run: | | ||
sudo swapoff -a | ||
sudo sysctl -w vm.swappiness=1 | ||
sudo sysctl -w fs.file-max=262144 | ||
sudo sysctl -w vm.max_map_count=262144 | ||
- name: Run Elasticsearch | ||
uses: elastic/elastic-github-actions/elasticsearch@master | ||
with: | ||
stack-version: 8.8.2 | ||
security-enabled: false | ||
- name: Elasticsearch is reachable | ||
run: | | ||
curl --verbose --show-error http://localhost:9200 | ||
- name: Install python dependencies | ||
run: | | ||
pip install -r requirements.txt | ||
- name: Install fixtures | ||
run: | | ||
python -m test.create_fixtures | ||
- name: Run all tests | ||
run: | | ||
pytest |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -140,3 +140,4 @@ cython_debug/ | |
# App-specific ignores | ||
config.yml | ||
*.back | ||
.idea |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
Testing | ||
======= | ||
|
||
To run unit tests locally (via `pytest`) you need to have an elasticsearch index running. | ||
|
||
You can do this via Docker: | ||
* docker pull elasticsearch:8.8.2 | ||
* docker run --rm --name es-news-search-api -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" -e "xpack.security.enabled=false" elasticsearch:8.8.2 | ||
|
||
Then if you hit http://127.0.0.1:9200 you should see some json text response indicating it is running. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
altair==5.2.* | ||
elasticsearch==8.8.* | ||
fastapi==0.104.* | ||
matplotlib==3.8.* | ||
pandas==2.1.* | ||
pydantic==2.5.* | ||
requests | ||
streamlit==1.29.* | ||
uvicorn[standard] | ||
wordcloud==1.9.* | ||
pyyaml==6.0.* | ||
pytest==7.* | ||
httpx==0.25.* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
import os | ||
|
||
INDEX_NAME = "mediacloud_test" | ||
ELASTICSEARCH_URL = "http://127.0.0.1:9200" | ||
FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "fixtures") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
import os | ||
from unittest import TestCase | ||
from fastapi.testclient import TestClient | ||
|
||
from test import INDEX_NAME, ELASTICSEARCH_URL | ||
os.environ["eshosts"] = ELASTICSEARCH_URL | ||
# make sure to set this env var before importing the app | ||
from api import app | ||
|
||
TIMEOUT = 30 | ||
|
||
|
||
class ApiTest(TestCase): | ||
|
||
def setUp(self): | ||
self._client = TestClient(app) | ||
|
||
def test_overview_all(self): | ||
# make sure all stories come back and domain is right | ||
response = self._client.post(f'/v1/{INDEX_NAME}/search/overview', json={"q": "*"}, timeout=TIMEOUT) | ||
assert response.status_code == 200 | ||
results = response.json() | ||
assert 'total' in results | ||
assert results['total'] > 1000 | ||
assert 'matches' in results | ||
for story in results['matches']: | ||
assert 'canonical_domain' in story | ||
assert story['canonical_domain'] == 'example.com' | ||
|
||
def test_overview_no_results(self): | ||
response = self._client.post(f'/v1/{INDEX_NAME}/search/overview', json={"q": "asdfdf"}, timeout=TIMEOUT) | ||
assert response.status_code == 404 | ||
|
||
def test_overview_by_content(self): | ||
response = self._client.post(f'/v1/{INDEX_NAME}/search/overview', json={"q": "article"}, timeout=TIMEOUT) | ||
assert response.status_code == 200 | ||
results = response.json() | ||
assert 'total' in results | ||
assert results['total'] > 1000 | ||
response = self._client.post(f'/v1/{INDEX_NAME}/search/overview', json={"q": "1"}, timeout=TIMEOUT) | ||
assert response.status_code == 200 | ||
results = response.json() | ||
assert 'total' in results | ||
assert results['total'] < 1000 | ||
|
||
def test_overview_by_pub_date(self): | ||
response = self._client.post(f'/v1/{INDEX_NAME}/search/overview', | ||
json={"q": "* AND publication_date:[2023-11-01 TO 2023-12-10]"}, timeout=TIMEOUT) | ||
assert response.status_code == 200 | ||
results = response.json() | ||
assert 'total' in results | ||
assert results['total'] > 300 | ||
assert results['total'] < 1200 | ||
|
||
def test_paging(self): | ||
response = self._client.post(f'/v1/{INDEX_NAME}/search/result', | ||
json={"q": "*"}, timeout=TIMEOUT) | ||
assert response.status_code == 200 | ||
results = response.json() | ||
assert len(results) == 1000 | ||
next_page_token = response.headers.get('x-resume-token') | ||
assert next_page_token is not None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import json | ||
import logging | ||
import os | ||
import copy | ||
import hashlib | ||
from elasticsearch import Elasticsearch | ||
from elasticsearch.exceptions import ConflictError | ||
|
||
from test import INDEX_NAME, ELASTICSEARCH_URL, FIXTURES_DIR | ||
|
||
logging.basicConfig(level=logging.INFO) | ||
logger = logging.getLogger(__name__) | ||
|
||
es_client = Elasticsearch(hosts=[ELASTICSEARCH_URL], basic_auth=("elastic", "changeme"), verify_certs=False) | ||
|
||
# first create the index | ||
es_mappings = { | ||
"properties": { | ||
"original_url": {"type": "keyword"}, | ||
"url": {"type": "keyword"}, | ||
"normalized_url": {"type": "keyword"}, | ||
"canonical_domain": {"type": "keyword"}, | ||
"publication_date": {"type": "date", "ignore_malformed": True}, | ||
"language": {"type": "text", "fields": {"keyword": {"type": "keyword"}}}, | ||
"full_language": {"type": "keyword"}, | ||
"text_extraction": {"type": "keyword"}, | ||
"article_title": { | ||
"type": "text", | ||
"fields": {"keyword": {"type": "keyword"}}, | ||
}, | ||
"normalized_article_title": { | ||
"type": "text", | ||
"fields": {"keyword": {"type": "keyword"}}, | ||
}, | ||
"text_content": {"type": "text", "fields": {"keyword": {"type": "keyword"}}}, | ||
"indexed_date": {"type": "date"}, | ||
} | ||
} | ||
|
||
es_client.indices.create(index=INDEX_NAME, mappings=es_mappings, ignore=400) # Ignore 400 to handle index already exists | ||
logger.info(f"Index '{INDEX_NAME}' with field mappings created successfully (or already exists.") | ||
|
||
# now import the fixtures | ||
base_fixture = { | ||
"original_url": "http://example.com/article", | ||
"url": "http://example.com/article", | ||
"normalized_url": "http://example.com/article", | ||
"article_title": "Sample Article ", | ||
"normalized_article_title": "sample_article_", | ||
"text_content": "This is the content of the sample article ", | ||
"canonical_domain": "example.com", | ||
"publication_date": "2023-11-01", | ||
"indexed_date": "2023-12-01", | ||
"language": "en", | ||
"full_language": "en-us", | ||
"text_extraction": "trafilatura", | ||
} | ||
|
||
imported_count = 0 | ||
for idx in range(0, 2000): | ||
fixture = copy.copy(base_fixture) | ||
fixture['original_url'] += str(idx) | ||
fixture['url'] += str(idx) | ||
fixture['normalized_url'] += str(idx) | ||
fixture['article_title'] += str(idx) | ||
fixture['normalized_article_title'] += str(idx) | ||
fixture['text_content'] += str(idx) | ||
fixture['publication_date'] = "2023-" + str(10+int(idx / 1000)) + "-" + str(1 + (idx % 29)).zfill(2) | ||
fixture['indexed_date'] = "2023-" + str(10+int(idx / 1000)) + "-" + str(1 + (idx % 29)).zfill(2) | ||
url_hash = hashlib.sha256(fixture['url'].encode("utf-8")).hexdigest() | ||
try: | ||
response = es_client.index(index=INDEX_NAME, id=url_hash, document=fixture) | ||
imported_count += 1 | ||
except ConflictError: | ||
logger.warning(" duplicate fixture, ignoring") | ||
logger.info(f" Imported {imported_count}") |