Skip to content

Commit

Permalink
[ENH] Add new query param for nodes that defaults to a prepopulated i…
Browse files Browse the repository at this point in the history
…ndex (#30)

* add new query param that can receive a list of nodes

* add validation for node url list query param

* add test for unrecognized node URLs

* rename expected env file

* add startup event for creating federation node index

* rename variable storing user-defined local nodes

* raise warning and use local nodes when public nodes fetching fails

* switch to using federation node index in endpoints

* move dynamic default setting + validation of node url param out of query model

* add and implement function for adding trailing slash to urls

* add test for check_nodes_are_recognized()

* refactor node url param validation into separate function and test

* add tests of federation node index creation and nodes endpoint

* update README

* add test case for whitespace in local nodes parsing

* add test case for query node URL list with duplicates

* update regex for local nodes to be whitespace-insensitive

Co-authored-by: Sebastian Urchs <[email protected]>

* add template fed.env file

---------

Co-authored-by: Sebastian Urchs <[email protected]>
  • Loading branch information
alyssadai and surchs authored Nov 21, 2023
1 parent f85c44b commit dce8feb
Show file tree
Hide file tree
Showing 10 changed files with 333 additions and 29 deletions.
11 changes: 7 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,22 @@ Please refer to our [**official documentation**](https://neurobagel.org/overview

## Launching the API
### 1. Set the Neurobagel nodes to federate over
Create an `.env` file with the variable `NB_NODES` set to the URLs of the nodes to be federated over.
The URLs should be stored as a **space-separated, unquoted** string.
Create a `fed.env` file with the variable `LOCAL_NB_NODES` containing the URLs and (arbitrary) names of the nodes to be federated over.
Each node should be wrapped in brackets `()`, with the URL and name of the node (in that order) separated by a comma.
The variable must be an **unquoted** string.

This repo contains a [template `fed.env`](/fed.env) file that you can edit.

e.g.,
```bash
NB_NODES=https://myfirstnode.org/ https://mysecondnode.org/
LOCAL_NB_NODES=(https://myfirstnode.org/,First Node)(https://mysecondnode.org/,Second Node)
```

### 2. Run the Docker container
```bash
docker pull neurobagel/federation_api

# Make sure to run the next command in the same directory where your .env file is
docker run -d --name=federation -p 8080:8000 --env-file=.env neurobagel/federation_api
docker run -d --name=federation -p 8080:8000 --env-file=fed.env neurobagel/federation_api
```
NOTE: You can replace the port number `8080` for the `-p` flag with any port on the host you wish to use for the API.
13 changes: 10 additions & 3 deletions app/api/crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ async def get(
min_num_sessions: int,
assessment: str,
image_modal: str,
node_urls: list[str],
):
"""
Makes GET requests to one or more Neurobagel node APIs using send_get_request utility function where the parameters are Neurobagel query parameters.
Expand All @@ -34,6 +35,8 @@ async def get(
Non-imaging assessment completed by subjects.
image_modal : str
Imaging modality of subject scans.
node_urls : list[str]
List of Neurobagel nodes to send the query to.
Returns
-------
Expand All @@ -42,6 +45,10 @@ async def get(
"""
cross_node_results = []

node_urls = util.validate_query_node_url_list(node_urls)

# Node API query parameters
params = {}
if min_age:
params["min_age"] = min_age
Expand All @@ -60,8 +67,8 @@ async def get(
if image_modal:
params["image_modal"] = image_modal

nodes_dict = util.parse_nodes_as_dict(util.NEUROBAGEL_NODES)
for node_url, node_name in nodes_dict.items():
for node_url in node_urls:
node_name = util.FEDERATION_NODES[node_url]
response = util.send_get_request(node_url + "query/", params)

for result in response:
Expand Down Expand Up @@ -89,7 +96,7 @@ async def get_terms(data_element_URI: str):
cross_node_results = []
params = {data_element_URI: data_element_URI}

for node_url in util.parse_nodes_as_dict(util.NEUROBAGEL_NODES).keys():
for node_url in util.FEDERATION_NODES:
response = util.send_get_request(
node_url + "attributes/" + data_element_URI, params
)
Expand Down
7 changes: 5 additions & 2 deletions app/api/models.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""Data models."""

from typing import Optional, Union

from pydantic import BaseModel
from fastapi import Query
from pydantic import BaseModel, Field

CONTROLLED_TERM_REGEX = r"^[a-zA-Z]+[:]\S+$"

Expand All @@ -18,6 +18,9 @@ class QueryModel(BaseModel):
min_num_sessions: int = None
assessment: str = None
image_modal: str = None
# TODO: Replace default value with union of local and public nodes once https://github.com/neurobagel/federation-api/issues/28 is merged
# syntax from https://github.com/tiangolo/fastapi/issues/4445#issuecomment-1117632409
node_url: list[str] | None = Field(Query(default=[]))


class CohortQueryResponse(BaseModel):
Expand Down
3 changes: 1 addition & 2 deletions app/api/routers/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,5 @@
async def get_nodes():
"""Returns a dict of all available nodes apis where key is node URL and value is node name."""
return [
{"NodeName": v, "ApiURL": k}
for k, v in util.parse_nodes_as_dict(util.NEUROBAGEL_NODES).items()
{"NodeName": v, "ApiURL": k} for k, v in util.FEDERATION_NODES.items()
]
1 change: 1 addition & 0 deletions app/api/routers/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ async def get_query(query: QueryModel = Depends(QueryModel)):
query.min_num_sessions,
query.assessment,
query.image_modal,
query.node_url,
)

return response
98 changes: 87 additions & 11 deletions app/api/utility.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,107 @@
"""Constants for federation."""
"""Constants and utility functions for federation."""

import os
import re
import warnings

import httpx
from fastapi import HTTPException

# Neurobagel nodes
NEUROBAGEL_NODES = os.environ.get(
# Neurobagel nodes - TODO: remove default value?
LOCAL_NODES = os.environ.get(
"LOCAL_NB_NODES", "(https://api.neurobagel.org/, OpenNeuro)"
)
FEDERATION_NODES = {}


def add_trailing_slash(url: str) -> str:
"""Add trailing slash to a URL if it does not already have one."""
if not url.endswith("/"):
url += "/"
return url


def parse_nodes_as_dict(nodes: str) -> list:
"""Returns user-defined Neurobagel nodes as a dict.
"""
Transforms a string of user-defined Neurobagel nodes (from an environment variable) to a dict where the keys are the node URLs, and the values are the node names.
It uses a regular expression to match the url, name pairs.
Makes sure node URLs end with a slash."""
pattern = re.compile(r"\((?P<url>https?://[^\s]+), (?P<label>[^\)]+)\)")
Makes sure node URLs end with a slash.
"""
pattern = re.compile(r"\((?P<url>https?://[^)]+),\s?(?P<label>[^\)]+)\)")
matches = pattern.findall(nodes)
for i in range(len(matches)):
url, label = matches[i]
if not url.endswith("/"):
matches[i] = (url + "/", label)
nodes_dict = {url: label for url, label in matches}
nodes_dict = {add_trailing_slash(url): label for url, label in matches}
return nodes_dict


async def create_federation_node_index():
"""
Creates an index of nodes for federation, which is a dict where the keys are the node URLs, and the values are the node names.
Fetches the names and URLs of public Neurobagel nodes from a remote directory file, and combines them with the user-defined local nodes.
"""
local_nodes = parse_nodes_as_dict(LOCAL_NODES)

node_directory_url = "https://raw.githubusercontent.com/neurobagel/menu/main/node_directory/neurobagel_public_nodes.json"
node_directory_response = httpx.get(
url=node_directory_url,
)
if node_directory_response.is_success:
public_nodes = {
add_trailing_slash(node["ApiURL"]): node["NodeName"]
for node in node_directory_response.json()
}
else:
warnings.warn(
f"""
Unable to fetch directory of public Neurobagel nodes from {node_directory_url}.
The federation API will only register the nodes defined locally for this API: {local_nodes}.
Details of the response from the source:
Status code {node_directory_response.status_code}
{node_directory_response.reason_phrase}: {node_directory_response.text}
"""
)
public_nodes = {}

# This step will remove any duplicate keys from the local and public node dicts, giving priority to the local nodes.
FEDERATION_NODES.update(
{
**public_nodes,
**local_nodes,
}
)


def check_nodes_are_recognized(node_urls: list):
"""
Check that all node URLs specified in the query exist in the node index for the API instance.
If not, raise an informative exception where the unrecognized node URLs are listed in alphabetical order.
"""
unrecognized_nodes = sorted(
set(node_urls) - set(FEDERATION_NODES.keys())
) # Resulting set is sorted alphabetically to make the error message deterministic
if unrecognized_nodes:
raise HTTPException(
status_code=422,
detail=f"Unrecognized Neurobagel node URL(s): {unrecognized_nodes}. "
f"The following nodes are available for federation: {list(FEDERATION_NODES.keys())}",
)


def validate_query_node_url_list(node_urls: list) -> list:
"""Format and validate node URLs passed as values to the query endpoint, including setting a default list of node URLs when none are provided."""
# Remove and ignore node URLs that are empty strings
node_urls = list(filter(None, node_urls))
if node_urls:
node_urls = [add_trailing_slash(node_url) for node_url in node_urls]
# Remove duplicates while preserving order
node_urls = list(dict.fromkeys(node_urls))
check_nodes_are_recognized(node_urls)
else:
# default to searching over all known nodes
node_urls = list(FEDERATION_NODES.keys())
return node_urls


def send_get_request(url: str, params: list):
"""
Makes a GET request to one or more Neurobagel nodes.
Expand Down
22 changes: 20 additions & 2 deletions app/main.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,35 @@
"""Main app."""

from contextlib import asynccontextmanager

import uvicorn
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.openapi.docs import get_redoc_html, get_swagger_ui_html
from fastapi.responses import ORJSONResponse, RedirectResponse

from .api import utility as util
from .api.routers import attributes, nodes, query

favicon_url = "https://raw.githubusercontent.com/neurobagel/documentation/main/docs/imgs/logo/neurobagel_favicon.png"


@asynccontextmanager
async def lifespan(app: FastAPI):
"""
Collect and store locally defined and public node details for federation upon startup and clears the index upon shutdown.
"""
await util.create_federation_node_index()
yield
util.FEDERATION_NODES.clear()


app = FastAPI(
default_response_class=ORJSONResponse, docs_url=None, redoc_url=None
default_response_class=ORJSONResponse,
docs_url=None,
redoc_url=None,
lifespan=lifespan,
)
favicon_url = "https://raw.githubusercontent.com/neurobagel/documentation/main/docs/imgs/logo/neurobagel_favicon.png"

app.add_middleware(
CORSMiddleware,
Expand Down
3 changes: 3 additions & 0 deletions fed.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Replace the value of LOCAL_NB_NODES below with the URL-name pairs of
# any locally hosted nodes you wish to make available for federation
LOCAL_NB_NODES=(https://myfirstnode.org/,First Node)(https://mysecondnode.org/,Second Node)
101 changes: 101 additions & 0 deletions tests/test_nodes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import os

import httpx
import pytest

from app.api import utility as util


@pytest.mark.parametrize(
"local_nodes",
[
"(https://mylocalnode.org, Local Node)",
"(https://mylocalnode.org/, Local Node) (https://firstpublicnode.org/, First Public Node)",
],
)
def test_nodes_discovery_endpoint(test_app, monkeypatch, local_nodes):
"""Test that a federation node index is correctly created from locally set and remote node lists."""
monkeypatch.setenv("LOCAL_NB_NODES", local_nodes)
monkeypatch.setattr(
util,
"LOCAL_NODES",
os.environ.get(
"LOCAL_NB_NODES", "(https://api.neurobagel.org/, OpenNeuro)"
),
)

def mock_httpx_get(**kwargs):
return httpx.Response(
status_code=200,
json=[
{
"NodeName": "First Public Node",
"ApiURL": "https://firstpublicnode.org",
},
{
"NodeName": "Second Public Node",
"ApiURL": "https://secondpublicnode.org",
},
],
)

monkeypatch.setattr(httpx, "get", mock_httpx_get)

with test_app:
response = test_app.get("/nodes/")
assert util.FEDERATION_NODES == {
"https://firstpublicnode.org/": "First Public Node",
"https://secondpublicnode.org/": "Second Public Node",
"https://mylocalnode.org/": "Local Node",
}
assert response.json() == [
{
"NodeName": "First Public Node",
"ApiURL": "https://firstpublicnode.org/",
},
{
"NodeName": "Second Public Node",
"ApiURL": "https://secondpublicnode.org/",
},
{"NodeName": "Local Node", "ApiURL": "https://mylocalnode.org/"},
]


def test_failed_public_nodes_fetching_raises_warning(test_app, monkeypatch):
"""Test that when request for remote list of public nodes fails, an informative warning is raised and the federation node index only includes local nodes."""
monkeypatch.setenv(
"LOCAL_NB_NODES", "(https://mylocalnode.org, Local Node)"
)
monkeypatch.setattr(
util,
"LOCAL_NODES",
os.environ.get(
"LOCAL_NB_NODES", "(https://api.neurobagel.org/, OpenNeuro)"
),
)

def mock_httpx_get(**kwargs):
return httpx.Response(
status_code=404, json={}, text="Some error message"
)

monkeypatch.setattr(httpx, "get", mock_httpx_get)

with pytest.warns(UserWarning) as w:
with test_app:
response = test_app.get("/nodes/")
assert util.FEDERATION_NODES == {
"https://mylocalnode.org/": "Local Node"
}
assert response.json() == [
{
"NodeName": "Local Node",
"ApiURL": "https://mylocalnode.org/",
}
]

for warn_substr in [
"Unable to fetch directory of public Neurobagel nodes",
"The federation API will only register the nodes defined locally for this API: {'https://mylocalnode.org/': 'Local Node'}",
]:
assert warn_substr in w[0].message.args[0]
Loading

0 comments on commit dce8feb

Please sign in to comment.