Skip to content

Commit

Permalink
[ENH] Switch node config format (#39)
Browse files Browse the repository at this point in the history
* read local nodes from JSON instead of env var and handle missing nodes gracefully

* update local node setting in tests and test new warnings

* handle cases where local nodes JSON is not a list or is an empty dict

* update unit test of local nodes parser

* replace template .env with template nodes config JSON

* update instructions to use local nodes config in README

* update docstring and warning message

* remove unneeded tmp path fixture
  • Loading branch information
alyssadai authored Dec 4, 2023
1 parent 27aaeae commit 308e66e
Show file tree
Hide file tree
Showing 6 changed files with 208 additions and 88 deletions.
42 changes: 31 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,46 @@

</div>

Please refer to our [**official documentation**](https://neurobagel.org/overview/) for more information on how to use the federation API.
Please refer to our [**official documentation**](https://neurobagel.org/federate/) for more information on Neurobagel federation and how to use the federation API.

## Launching the API
### 1. Set the Neurobagel nodes to federate over
Create a `fed.env` file with the variable `LOCAL_NB_NODES` containing the URLs and (arbitrary) names of the nodes to be federated over.
Each node should be wrapped in brackets `()`, with the URL and name of the node (in that order) separated by a comma.
The variable must be an **unquoted** string.
### 1. Set the local Neurobagel nodes to federate over
Create a configuration JSON file called `local_nb_nodes.json` containing the URLs and (arbitrary) names of the local nodes you wish to federate over.
Each node must be denoted by a dictionary `{}` with two key-value pairs: `"NodeName"` for the name of the node, and `"ApiURL"` for the url of the API exposed for that node.
Multiple nodes must be wrapped in a list `[]`.

This repo contains a [template `fed.env`](/fed.env) file that you can edit.
This repo contains a [template `local_nb_nodes.json`](/local_nb_nodes.json) file that you can edit.

e.g.,
```bash
LOCAL_NB_NODES=(https://myfirstnode.org/,First Node)(https://mysecondnode.org/,Second Node)
Examples:

`local_nb_nodes.json` with one local node
```json
{
"NodeName": "First Node",
"ApiURL": "https://firstnode.org"
}
```

`local_nb_nodes.json` with two local nodes
```json
[
{
"NodeName": "First Node",
"ApiURL": "https://firstnode.org"
},
{
"NodeName": "Second Node",
"ApiURL": "https://secondnode.org"
}
]
```

### 2. Run the Docker container
```bash
docker pull neurobagel/federation_api

# Make sure to run the next command in the same directory where your .env file is
docker run -d --name=federation -p 8080:8000 --env-file=fed.env neurobagel/federation_api
# Run this next command in the same directory where your `local_nb_nodes.json` file is located
docker run -d -v local_nb_nodes.json:/usr/src/local_nb_nodes.json:ro \
--name=federation -p 8080:8000 neurobagel/federation_api
```
NOTE: You can replace the port number `8080` for the `-p` flag with any port on the host you wish to use for the API.
74 changes: 50 additions & 24 deletions app/api/utility.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
"""Constants and utility functions for federation."""

import os
import re
import json
import warnings
from pathlib import Path

import httpx
from fastapi import HTTPException

# Neurobagel nodes - TODO: remove default value?
LOCAL_NODES = os.environ.get(
"LOCAL_NB_NODES", "(https://api.neurobagel.org/, OpenNeuro)"
)
LOCAL_NODE_INDEX_PATH = Path(__file__).parents[2] / "local_nb_nodes.json"
FEDERATION_NODES = {}


Expand All @@ -21,47 +18,76 @@ def add_trailing_slash(url: str) -> str:
return url


def parse_nodes_as_dict(nodes: str) -> list:
def parse_nodes_as_dict(path: Path) -> dict:
"""
Transforms a string of user-defined Neurobagel nodes (from an environment variable) to a dict where the keys are the node URLs, and the values are the node names.
It uses a regular expression to match the url, name pairs.
Reads names and URLs of user-defined Neurobagel nodes from a JSON file (if available) and stores them in a dict
where the keys are the node URLs, and the values are the node names.
Makes sure node URLs end with a slash.
"""
pattern = re.compile(r"\((?P<url>https?://[^)]+),\s?(?P<label>[^\)]+)\)")
matches = pattern.findall(nodes)
nodes_dict = {add_trailing_slash(url): label for url, label in matches}
return nodes_dict
# TODO: Add more validation of input JSON, including for JSONDecodeError (invalid JSON)
if path.exists() and path.stat().st_size > 0:
with open(path, "r") as f:
local_nodes = json.load(f)
if local_nodes:
if isinstance(local_nodes, list):
return {
add_trailing_slash(node["ApiURL"]): node["NodeName"]
for node in local_nodes
}
return {
add_trailing_slash(local_nodes["ApiURL"]): local_nodes[
"NodeName"
]
}

return {}


async def create_federation_node_index():
"""
Creates an index of nodes for federation, which is a dict where the keys are the node URLs, and the values are the node names.
Fetches the names and URLs of public Neurobagel nodes from a remote directory file, and combines them with the user-defined local nodes.
"""
local_nodes = parse_nodes_as_dict(LOCAL_NODES)

node_directory_url = "https://raw.githubusercontent.com/neurobagel/menu/main/node_directory/neurobagel_public_nodes.json"
local_nodes = parse_nodes_as_dict(LOCAL_NODE_INDEX_PATH)

if not local_nodes:
warnings.warn(
f"No local Neurobagel nodes defined or found. Federation will be limited to nodes available from the Neurobagel public node directory {node_directory_url}. "
"(To specify one or more local nodes to federate over, define them in a 'local_nb_nodes.json' file in the current directory and relaunch the API.)\n"
)

node_directory_response = httpx.get(
url=node_directory_url,
)
# TODO: Handle network errors gracefully
if node_directory_response.is_success:
public_nodes = {
add_trailing_slash(node["ApiURL"]): node["NodeName"]
for node in node_directory_response.json()
}
else:
warnings.warn(
f"""
Unable to fetch directory of public Neurobagel nodes from {node_directory_url}.
The federation API will only register the nodes defined locally for this API: {local_nodes}.
Details of the response from the source:
Status code {node_directory_response.status_code}
{node_directory_response.reason_phrase}: {node_directory_response.text}
"""
failed_get_warning = "\n".join(
[
f"Unable to fetch directory of public Neurobagel nodes from {node_directory_url}.",
"Details of the response from the source:",
f"Status code {node_directory_response.status_code}: {node_directory_response.reason_phrase}\n",
]
)
public_nodes = {}

if local_nodes:
warnings.warn(
failed_get_warning
+ f"Federation will be limited to the nodes defined locally for this API: {local_nodes}."
)
else:
warnings.warn(failed_get_warning)
raise RuntimeError(
"No local or public Neurobagel nodes available for federation. "
"Please define at least one local node in a 'local_nb_nodes.json' file in the current directory and try again."
)

# This step will remove any duplicate keys from the local and public node dicts, giving priority to the local nodes.
FEDERATION_NODES.update(
{
Expand Down
3 changes: 0 additions & 3 deletions fed.env

This file was deleted.

10 changes: 10 additions & 0 deletions local_nb_nodes.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[
{
"NodeName": "First Node",
"ApiURL": "https://myfirstnode.org/"
},
{
"NodeName": "Second Node",
"ApiURL": "https://mysecondnode.org/"
}
]
98 changes: 90 additions & 8 deletions tests/test_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,19 @@
@pytest.mark.parametrize(
"local_nodes",
[
"(https://mylocalnode.org, Local Node)",
"(https://mylocalnode.org/, Local Node) (https://firstpublicnode.org/, First Public Node)",
{"https://mylocalnode.org/": "Local Node"},
{
"https://mylocalnode.org/": "Local Node",
"https://firstpublicnode.org/": "First Public Node",
},
],
)
def test_nodes_discovery_endpoint(test_app, monkeypatch, local_nodes):
"""Test that a federation node index is correctly created from locally set and remote node lists."""
monkeypatch.setattr(util, "LOCAL_NODES", local_nodes)


def mock_parse_nodes_as_dict(path):
return local_nodes

def mock_httpx_get(**kwargs):
return httpx.Response(
status_code=200,
Expand All @@ -30,6 +35,7 @@ def mock_httpx_get(**kwargs):
],
)

monkeypatch.setattr(util, "parse_nodes_as_dict", mock_parse_nodes_as_dict)
monkeypatch.setattr(httpx, "get", mock_httpx_get)

with test_app:
Expand All @@ -54,15 +60,16 @@ def mock_httpx_get(**kwargs):

def test_failed_public_nodes_fetching_raises_warning(test_app, monkeypatch):
"""Test that when request for remote list of public nodes fails, an informative warning is raised and the federation node index only includes local nodes."""
monkeypatch.setattr(
util, "LOCAL_NODES", "(https://mylocalnode.org, Local Node)"
)

def mock_parse_nodes_as_dict(path):
return {"https://mylocalnode.org/": "Local Node"}

def mock_httpx_get(**kwargs):
return httpx.Response(
status_code=404, json={}, text="Some error message"
)

monkeypatch.setattr(util, "parse_nodes_as_dict", mock_parse_nodes_as_dict)
monkeypatch.setattr(httpx, "get", mock_httpx_get)

with pytest.warns(UserWarning) as w:
Expand All @@ -78,8 +85,83 @@ def mock_httpx_get(**kwargs):
}
]

assert len(w) == 1
for warn_substr in [
"Unable to fetch directory of public Neurobagel nodes",
"The federation API will only register the nodes defined locally for this API: {'https://mylocalnode.org/': 'Local Node'}",
"Federation will be limited to the nodes defined locally for this API: {'https://mylocalnode.org/': 'Local Node'}",
]:
assert warn_substr in w[0].message.args[0]


def test_unset_local_nodes_raises_warning(test_app, monkeypatch):
"""Test that when no local nodes are set, an informative warning is raised and the federation node index only includes remote nodes."""

def mock_parse_nodes_as_dict(path):
return {}

def mock_httpx_get(**kwargs):
return httpx.Response(
status_code=200,
json=[
{
"NodeName": "First Public Node",
"ApiURL": "https://firstpublicnode.org",
},
{
"NodeName": "Second Public Node",
"ApiURL": "https://secondpublicnode.org",
},
],
)

monkeypatch.setattr(util, "parse_nodes_as_dict", mock_parse_nodes_as_dict)
monkeypatch.setattr(httpx, "get", mock_httpx_get)

with pytest.warns(UserWarning) as w:
with test_app:
response = test_app.get("/nodes/")
assert util.FEDERATION_NODES == {
"https://firstpublicnode.org/": "First Public Node",
"https://secondpublicnode.org/": "Second Public Node",
}
assert response.json() == [
{
"NodeName": "First Public Node",
"ApiURL": "https://firstpublicnode.org/",
},
{
"NodeName": "Second Public Node",
"ApiURL": "https://secondpublicnode.org/",
},
]

assert len(w) == 1
assert "No local Neurobagel nodes defined or found" in w[0].message.args[0]


def test_no_available_nodes_raises_error(monkeypatch, test_app):
"""Test that when no local or remote nodes are available, an informative error is raised."""

def mock_parse_nodes_as_dict(path):
return {}

def mock_httpx_get(**kwargs):
return httpx.Response(
status_code=404, json={}, text="Some error message"
)

monkeypatch.setattr(util, "parse_nodes_as_dict", mock_parse_nodes_as_dict)
monkeypatch.setattr(httpx, "get", mock_httpx_get)

with pytest.warns(UserWarning) as w, pytest.raises(
RuntimeError
) as exc_info:
with test_app:
pass

# Two warnings are expected, one for the failed GET request for public nodes, and one for the lack of local nodes.
assert len(w) == 2
assert (
"No local or public Neurobagel nodes available for federation"
in str(exc_info.value)
)
Loading

0 comments on commit 308e66e

Please sign in to comment.