Skip to content

Commit

Permalink
Emit diagnostics when the number of query results is truncated.
Browse files Browse the repository at this point in the history
  • Loading branch information
LTLA committed Nov 25, 2024
1 parent 3e87e4b commit 005e71d
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 3 deletions.
32 changes: 29 additions & 3 deletions src/sewerrat/query.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
from typing import Optional, List, Dict
from typing import Optional, List, Dict, Literal
import requests
import warnings

from . import _utils as ut


def query(url, text: Optional[str] = None, user: Optional[str] = None, path: Optional[str] = None, after: Optional[int] = None, before: Optional[int] = None, number: int = 100) -> List[Dict]:
def query(
url: str,
text: Optional[str] = None,
user: Optional[str] = None,
path: Optional[str] = None,
after: Optional[int] = None,
before: Optional[int] = None,
number: int = 100,
on_truncation: Literal["message", "warning", "none"] = "message") -> List[Dict]:
"""
Query the metadata in the SewerRat backend based on free text, the owner,
creation time, etc. This function does not require filesystem access.
Expand Down Expand Up @@ -38,6 +47,10 @@ def query(url, text: Optional[str] = None, user: Optional[str] = None, path: Opt
number:
Integer specifying the maximum number of results to return.
on_truncation:
String specifying the action to take when the number of search
results is capped by ``number``.
Returns:
List of dictionaries where each inner dictionary corresponds to a
Expand Down Expand Up @@ -72,7 +85,11 @@ def query(url, text: Optional[str] = None, user: Optional[str] = None, path: Opt
else:
raise ValueError("at least one search filter must be present")

stub = "/query?translate=true&limit=" + str(number)
if on_truncation != "none":
original_number = number
number += 1

stub = "/query?translate=true"
collected = []

while len(collected) < number:
Expand All @@ -86,4 +103,13 @@ def query(url, text: Optional[str] = None, user: Optional[str] = None, path: Opt
break
stub = payload["next"]

if on_truncation != "none":
if len(collected) > original_number:
msg = "truncated query results to the first " + str(original_number) + " matches"
if on_truncation == "warning":
warnings.warn(msg)
else:
print(msg)
collected = collected[:original_number]

return collected
30 changes: 30 additions & 0 deletions tests/test_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import sewerrat
import os
import tempfile
import time
import pytest


def test_query_truncation(capfd):
mydir = tempfile.mkdtemp()
with open(os.path.join(mydir, "metadata.json"), "w") as handle:
handle.write('{ "first": "Aaron", "last": "Lun" }')

os.mkdir(os.path.join(mydir, "diet"))
with open(os.path.join(mydir, "diet", "metadata.json"), "w") as handle:
handle.write('{ "meal": "lunch", "ingredients": "water" }')

_, url = sewerrat.start_sewerrat()
sewerrat.register(mydir, ["metadata.json"], url=url)

res = sewerrat.query(url, "lun", number=0)
out, err = capfd.readouterr()
assert "truncated" in out
assert len(res) == 0

with pytest.warns(UserWarning, match="truncated"):
res = sewerrat.query(url, "lun", number=0, on_truncation="warning")
assert len(res) == 0

res = sewerrat.query(url, "lun", number=0, on_truncation="none")
assert len(res) == 0

0 comments on commit 005e71d

Please sign in to comment.