Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FD-1745] Add UMLS as a new search API #6

Merged
merged 3 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@
2. **Install the package** <br>
If working on a new feature it is possible to install a package version within
the remote or local branch
**NOTE** If testing changes to search-dragon in `locutus` don't forget to deploy a `locutus` branch with the correct `search-dragon` version in the requirements.txt file!
**NOTE** Any new env variables created, e.g. api keys, will need to be added to the `locutus` deployment files.
```
# remote
pip install git+https://github.com/NIH-NCPI/locutus_utilities.git@{branch_name}
pip install git+https://github.com/NIH-NCPI/search-dragon.git@{branch_name}

# local
pip install -e .
Expand Down
10 changes: 0 additions & 10 deletions src/search_dragon/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import logging
import requests

LOGGING_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"

Expand All @@ -16,12 +15,3 @@
# Add handlers to the logger
logger.addHandler(console_handler)


def fetch_data(url):
""" """
response = requests.get(url)
if response.status_code == 200:
return response.json()
else:
print(f"Failed to fetch data: {response.status_code}")
return None
10 changes: 9 additions & 1 deletion src/search_dragon/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1,9 @@
__version__ = "1.0.1"
__version__ = "1.0.2"

'''
Change log
1.0.0 - Initial creation
1.0.1 - Add OLS api
1.0.2 - Add UMLS api

'''
40 changes: 39 additions & 1 deletion src/search_dragon/external_apis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,48 @@
import os
import requests
from search_dragon import logger

class OntologyAPI:
def __init__(self, base_url, api_id, api_name):
self.base_url = base_url
self.api_id = api_id
self.api_name = api_name

def fetch_data(self, url):
""" """
response = requests.get(url)
if response.status_code == 200:
return response.json()
else:
print(f"Failed to fetch data: {response.status_code}")
return None

def remove_duplicates(self, data):
"""
Remove duplicate records where the 'uri' field is the same.

Args:
data (list): List of records to filter.

Returns:
list: Filtered data with duplicates removed.
"""
seen_uris = set()
filtered_data = []
excluded_data = []

for item in data:
uri = item.get("code_iri")
if uri in seen_uris:
excluded_data.append(item)
else:
seen_uris.add(uri)
filtered_data.append(item)


# Log the excluded records count
message = (
f"Records({len(excluded_data)}) were excluded as duplicates based on 'uri'.{excluded_data}"
)
logger.info(message)

return filtered_data
47 changes: 33 additions & 14 deletions src/search_dragon/external_apis/ols_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,16 @@

"""
from search_dragon.external_apis import OntologyAPI
from search_dragon import logger, fetch_data

OLS_API_BASE_URL = "https://www.ebi.ac.uk/ols4/api/"
OLS_API = "ols"
OLS_NAME = "Ontology Lookup Service"
from search_dragon import logger

class OLSSearchAPI(OntologyAPI):
def __init__(self):
super().__init__(base_url=OLS_API_BASE_URL, api_id=OLS_API, api_name=OLS_NAME)
super().__init__(
base_url="https://www.ebi.ac.uk/ols4/api/",
api_id="ols",
api_name="Ontology Lookup Service",
)
self.total_results_id = 'numFound'

def collect_data(self, search_url, results_per_page, start_index):
"""
Expand Down Expand Up @@ -47,18 +48,16 @@ def collect_data(self, search_url, results_per_page, start_index):
paginated_url = f"{search_url}&rows={results_per_page}&start={start_index}"
logger.info(f"Fetching data from {paginated_url}")

data = fetch_data(paginated_url)


data = self.fetch_data(paginated_url)

results = data.get("response", {}).get("docs", [])
raw_data.extend(results)

total_results = data.get("response", {}).get("numFound", 0)
total_results = data.get("response", {}).get(self.total_results_id, 0)
logger.info(f"Total results found: {total_results}")
logger.info(f"Retrieved {len(results)} results (start_index: {start_index}).")

# Check if the start_index exceeds total results
# Check if the start_index exceeds total results
if start_index >= total_results:
message = f"start_index ({start_index}) exceeds total available results ({total_results})."
logger.error(message)
Expand All @@ -69,7 +68,7 @@ def collect_data(self, search_url, results_per_page, start_index):
more_results_available = n_results_used < total_results

except Exception as e:
logger.error(f"Error fetching data from {paginated_url}: {e}")
logger.error(f"Error fetching data from {search_url}: {e}")
return [], more_results_available

return raw_data, more_results_available
Expand Down Expand Up @@ -112,7 +111,23 @@ def format_ontology(self, ontology_list):

return ontology_param

def build_url(self, keywords, ontology_list):
def format_results_per_page(self, results_per_page):
"""
Formats the results_per_page into a format readable by the api.
"""
page_size_param = f"rows={results_per_page}"

return page_size_param

def format_start_index(self, start_index):
"""
Formats the start_index into a format readable by the api.
"""
start_param = f"start={start_index}"

return start_param

def build_url(self, keywords, ontology_list, start_index, results_per_page):
"""
Constructs the search URL by combining the base URL, formatted keyword, and ontology parameters.

Expand All @@ -128,9 +143,13 @@ def build_url(self, keywords, ontology_list):

keyword_param = self.format_keyword(keywords)
ontology_param = self.format_ontology(ontology_list)
start_param = self.format_start_index(start_index)
page_size_param = self.format_results_per_page(results_per_page)

# Join the query params with & then join the params to the base url
url_blocks.append("&".join([keyword_param,ontology_param]))
url_blocks.append(
"&".join([keyword_param, ontology_param, start_param, page_size_param])
)
complete_url = "".join(url_blocks)

return complete_url
Expand Down
Loading