Skip to content

Commit

Permalink
Downloads through GA4GH DRS are recorded in audit service (#1117)
Browse files Browse the repository at this point in the history
  • Loading branch information
paulineribeyre authored Oct 5, 2023
1 parent ea885f0 commit 9139d4d
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 20 deletions.
2 changes: 0 additions & 2 deletions fence/blueprints/data/blueprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
)
from fence.config import config
from fence.errors import Forbidden, InternalError, UserError, Unauthorized
from fence.resources.audit.utils import enable_audit_logging
from fence.utils import get_valid_expiration


Expand Down Expand Up @@ -328,7 +327,6 @@ def upload_file(file_id):


@blueprint.route("/download/<path:file_id>", methods=["GET"])
@enable_audit_logging
def download_file(file_id):
"""
Get a presigned url to download a file given by file_id.
Expand Down
5 changes: 4 additions & 1 deletion fence/blueprints/data/indexd.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
AccountSasPermissions,
generate_blob_sas,
)
from fence import auth

from fence import auth
from fence.auth import (
get_jwt,
current_token,
Expand All @@ -47,7 +47,9 @@
give_service_account_billing_access_if_necessary,
)
from fence.resources.ga4gh.passports import sync_gen3_users_authz_from_ga4gh_passports
from fence.resources.audit.utils import enable_audit_logging
from fence.utils import get_valid_expiration_from_request

from . import multipart_upload
from ...models import AssumeRoleCacheAWS, query_for_user, query_for_user_by_id
from ...models import AssumeRoleCacheGCP
Expand All @@ -66,6 +68,7 @@
ANONYMOUS_USERNAME = "anonymous"


@enable_audit_logging
def get_signed_url_for_file(
action,
file_id,
Expand Down
1 change: 1 addition & 0 deletions fence/blueprints/ga4gh.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import flask
from flask import request

from fence.errors import UserError
from fence.config import config

Expand Down
19 changes: 17 additions & 2 deletions fence/resources/audit/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ def create_audit_log_for_request(response):
in `enable_audit_logging` decorator), record an audit log. The data we
need to record the logs are stored in `flask.g.audit_data` before reaching
this code.
TODO The audit service has the ability to record presigned URL "upload" logs but we are not
currently sending those logs. We would need to:
- add the `@enable_audit_logging` decorator to `init_multipart_upload` (single upload requests
are handled by `get_signed_url_for_file` which is already decorated).
- update this function to send the appropriate data when those endpoints are called.
- add upload unit tests to `test_audit_service.py`.
"""
try:
method = flask.request.method
Expand All @@ -49,11 +56,19 @@ def create_audit_log_for_request(response):
# could use `flask.request.url` but we don't want the root URL
request_url += f"?{flask.request.query_string.decode('utf-8')}"

if method == "GET" and endpoint.startswith("/data/download/"):
if method == "GET" and (
endpoint.startswith("/data/download/")
or endpoint.startswith("/ga4gh/drs/v1/objects/")
):
if endpoint.startswith("/data/download/"):
guid = endpoint[len("/data/download/") :]
else:
guid = endpoint[len("/ga4gh/drs/v1/objects/") :]
guid = guid.split("/access/")[0]
flask.current_app.audit_service_client.create_presigned_url_log(
status_code=response.status_code,
request_url=request_url,
guid=endpoint[len("/data/download/") :],
guid=guid,
action="download",
**audit_data,
)
Expand Down
56 changes: 41 additions & 15 deletions tests/test_audit_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,10 @@ def __init__(self, data, status_code=200):


@pytest.mark.parametrize("indexd_client_with_arborist", ["s3_and_gs"], indirect=True)
@pytest.mark.parametrize("endpoint", ["download", "ga4gh-drs"])
@pytest.mark.parametrize("protocol", ["gs", None])
def test_presigned_url_log(
endpoint,
protocol,
client,
user_client,
Expand All @@ -133,9 +135,12 @@ def test_presigned_url_log(
monkeypatch.setitem(config, "ENABLE_AUDIT_LOGS", {"presigned_url": True})

guid = "dg.hello/abc"
path = f"/data/download/{guid}"
if protocol:
path += f"?protocol={protocol}"
if endpoint == "download":
path = f"/data/download/{guid}"
if protocol:
path += f"?protocol={protocol}"
else:
path = f"/ga4gh/drs/v1/objects/{guid}/access/{protocol or 's3'}"
resource_paths = ["/my/resource/path1", "/path2"]
indexd_client_with_arborist(resource_paths)
headers = {
Expand Down Expand Up @@ -182,7 +187,9 @@ def test_presigned_url_log(
@pytest.mark.parametrize(
"indexd_client_with_arborist", ["s3_and_gs_acl_no_authz"], indirect=True
)
@pytest.mark.parametrize("endpoint", ["download", "ga4gh-drs"])
def test_presigned_url_log_acl(
endpoint,
client,
user_client,
mock_arborist_requests,
Expand All @@ -206,7 +213,10 @@ def test_presigned_url_log_acl(

protocol = "gs"
guid = "dg.hello/abc"
path = f"/data/download/{guid}?protocol={protocol}"
if endpoint == "download":
path = f"/data/download/{guid}?protocol={protocol}"
else:
path = f"/ga4gh/drs/v1/objects/{guid}/access/{protocol}"
indexd_client_with_arborist(None)
headers = {
"Authorization": "Bearer "
Expand Down Expand Up @@ -244,7 +254,8 @@ def test_presigned_url_log_acl(


@pytest.mark.parametrize("public_indexd_client", ["s3_and_gs"], indirect=True)
def test_presigned_url_log_public(client, public_indexd_client, monkeypatch):
@pytest.mark.parametrize("endpoint", ["download", "ga4gh-drs"])
def test_presigned_url_log_public(endpoint, client, public_indexd_client, monkeypatch):
"""
Same as `test_presigned_url_log`, but with an anonymous user downloading
public data.
Expand All @@ -254,8 +265,12 @@ def test_presigned_url_log_public(client, public_indexd_client, monkeypatch):
)
monkeypatch.setitem(config, "ENABLE_AUDIT_LOGS", {"presigned_url": True})

protocol = "s3"
guid = "dg.hello/abc"
path = f"/data/download/{guid}"
if endpoint == "download":
path = f"/data/download/{guid}?protocol={protocol}"
else:
path = f"/ga4gh/drs/v1/objects/{guid}/access/{protocol}"

with audit_service_mocker as audit_service_requests:
audit_service_requests.post.return_value = MockResponse(
Expand All @@ -275,13 +290,15 @@ def test_presigned_url_log_public(client, public_indexd_client, monkeypatch):
"guid": guid,
"resource_paths": [],
"action": "download",
"protocol": "s3",
"protocol": protocol,
},
)


@pytest.mark.parametrize("indexd_client_with_arborist", ["s3_and_gs"], indirect=True)
@pytest.mark.parametrize("endpoint", ["download", "ga4gh-drs"])
def test_presigned_url_log_disabled(
endpoint,
client,
user_client,
mock_arborist_requests,
Expand All @@ -307,7 +324,10 @@ def test_presigned_url_log_disabled(

protocol = "gs"
guid = "dg.hello/abc"
path = f"/data/download/{guid}"
if endpoint == "download":
path = f"/data/download/{guid}"
else:
path = f"/ga4gh/drs/v1/objects/{guid}/access/{protocol}"
if protocol:
path += f"?protocol={protocol}"
resource_paths = ["/my/resource/path1", "/path2"]
Expand All @@ -324,9 +344,6 @@ def test_presigned_url_log_disabled(
)
}

# protocol=None should fall back to s3 (first indexed location):
expected_protocol = protocol or "s3"

with audit_service_mocker as audit_service_requests:
audit_service_requests.post.return_value = MockResponse(
data={},
Expand All @@ -339,17 +356,26 @@ def test_presigned_url_log_disabled(


@pytest.mark.parametrize("indexd_client", ["s3_and_gs"], indirect=True)
def test_presigned_url_log_unauthorized(client, indexd_client, db_session, monkeypatch):
@pytest.mark.parametrize("endpoint", ["download", "ga4gh-drs"])
def test_presigned_url_log_unauthorized(
endpoint, client, indexd_client, db_session, monkeypatch
):
"""
If Fence does not return a presigned URL, no audit log should be created.
If Fence does not return a presigned URL, an audit log with the appropriate status
code should be created.
"""
audit_service_mocker = mock.patch(
"fence.resources.audit.client.requests", new_callable=mock.Mock
)
monkeypatch.setitem(config, "ENABLE_AUDIT_LOGS", {"presigned_url": True})

protocol = "s3"
guid = "dg.hello/abc"
path = f"/data/download/{guid}"
path = f"/data/download/{guid}?protocol={protocol}"
if endpoint == "download":
path = f"/data/download/{guid}?protocol={protocol}"
else:
path = f"/ga4gh/drs/v1/objects/{guid}/access/{protocol}"
with audit_service_mocker as audit_service_requests:
audit_service_requests.post.return_value = MockResponse(
data={},
Expand All @@ -367,7 +393,7 @@ def test_presigned_url_log_unauthorized(client, indexd_client, db_session, monke
"guid": guid,
"resource_paths": [],
"action": "download",
"protocol": "s3",
"protocol": protocol,
},
)

Expand Down

0 comments on commit 9139d4d

Please sign in to comment.