Skip to content

Commit

Permalink
Merge branch 'master' into gordonwatts/issue132
Browse files Browse the repository at this point in the history
  • Loading branch information
gordonwatts committed Dec 18, 2020
2 parents 6550120 + 29b475d commit 973723f
Show file tree
Hide file tree
Showing 11 changed files with 133 additions and 143 deletions.
14 changes: 10 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ Before you can use this library you'll need:

The API access information is normally placed in a `.servicex` file (to keep this confidential information form accidentally getting checked into a public repository). The `servicex` library searches for configuration information in several locations to determine what end-point it should connect to, in the following order:

1. A `.servicex` file in the current working directory
1. A `.servicex` file in the current working directory (it can also be named `servicex.yaml` or `servicex.yml`)
1. A `.servicex` file in the user's home directory (`$HOME` on Linux and Mac, and your profile
directory on Windows).
1. The `config_defaults.yaml` file distributed with the `servicex` package.

If no endpoint is specified, then the library defaults to the developer endpoint, which is `http://localhost:5000` for the web-service API, and `localhost:9000` for the `minio` endpoint. No passwords are required.
If no endpoint is specified, then the library defaults to the developer endpoint, which is `http://localhost:5000` for the web-service API. No passwords are used in this case.

Create a `.servicex` file, in the `yaml` format, in the appropriate place for your work that contains the following (for the `xaod` backend; use `uproot` for the uproot backend):

Expand Down Expand Up @@ -136,15 +136,21 @@ do_query(ds) # Cache is not ignored
As mentioned above, the `.servicex` file is read to pull a configuration. The search path for this file:

1. Your current working directory
2. Your home directory
2. Any working directory above your current working directory.
3. Your home directory

The file can be named any of the following (ordered by precedence):

- `servicex.yaml`
- `servicex.yml`
- `.servicex`

The file can contain an `api_endpoint` as mentioned above. In addition the other following things can be put in:

- `cache_path`: Location where queries, data, and a record of queries are written. This should be an absolute path the person running the library has r/w access to. On windows, make sure to escape `\` - and best to follow standard `yaml` conventions and put the path in quotes - especially if it contains a space. Top level yaml item (don't indent it accidentally!). Defaults to `/tmp/servicex` (with the temp directory as appropriate for your platform) Examples:
- Windows: `cache_path: "C:\\Users\\gordo\\Desktop\\cacheme"`
- Linux: `cache_path: "/home/servicex-cache"`

- `minio_endpoint`, `minio_username`, `minio_password` - these are only interesting if you are using a pre-RC2 release of `servicex` - when the `minio` information wasn't part of the API exchange. This feature is depreciated and will be removed around the time `servicex` moves to RC3.
- `backend_types` - a list of yaml dictionaries that contains some defaults for the backends. By default only the `return_data` is there, which for `xaod` is `root` and `uproot` is `parquet`. Allows `servicex` to convert to `pandas.DataFrame` or `awkward` if requested by the user.

All strings are expanded using python's [os.path.expand](https://docs.python.org/3/library/os.path.html#os.path.expandvars) method - so `$NAME` and `${NAME}` will work to expand existing environment variables.
Expand Down
31 changes: 25 additions & 6 deletions servicex/ConfigSettings.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,35 @@ def _add_local_source(self):
'''
Look for a '.xxx" file in the local directory
'''
p = Path(f'.{self.appname}')
self._add_from_path(p)
self._add_from_path(Path(f'{self.appname}.yaml'), walk_up_tree=True)
self._add_from_path(Path(f'{self.appname}.yml'), walk_up_tree=True)
self._add_from_path(Path(f'.{self.appname}'), walk_up_tree=True)

def _add_from_path(self, p: Path):
if p.exists():
yaml_data = yaml_util.load_yaml(p, loader=self.loader) or {}
self.add(ConfigSource(yaml_data, str(p.resolve())))
# def _add_from_path(self, p: Path):
# if p.exists():
# yaml_data = yaml_util.load_yaml(p, loader=self.loader) or {}
# self.add(ConfigSource(yaml_data, str(p.resolve())))

def _add_from_path(self, p: Path, walk_up_tree: bool = False):
p.resolve()
name = p.name
dir = p.parent.resolve()
while True:
f = dir / name
if f.exists():
yaml_data = yaml_util.load_yaml(f, loader=self.loader) or {}
self.add(ConfigSource(yaml_data, str(f)))
break
if not walk_up_tree:
break
if dir == dir.parent:
break
dir = dir.parent

def _add_home_source(self):
'''
Look for a '.xxx" file in the local directory
'''
self._add_from_path(Path.home() / f'{self.appname}.yaml')
self._add_from_path(Path.home() / f'{self.appname}.yml')
self._add_from_path(Path.home() / f'.{self.appname}')
12 changes: 10 additions & 2 deletions servicex/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@

_ignore_cache = False

# Make sure that generated download path names are below this to avoid os errors
MAX_PATH_LEN = 235


@contextmanager
def ignore_cache():
Expand Down Expand Up @@ -175,6 +178,11 @@ def data_file_location(self, request_id: str, data_name: str) -> Path:
'''
Return the path to the file that should be written out for this
data_name. This is where the output file should get stored.
Truncate the leftmost characters from filenames to avoid throwing a
OSError: [Errno 63] File name too long error Assume that the most unique part of
the name is the right hand side
'''
(self._path / 'data' / request_id).mkdir(parents=True, exist_ok=True)
return self._path / 'data' / request_id / sanitize_filename(data_name)
parent = self._path / 'data' / request_id
parent.mkdir(parents=True, exist_ok=True)
sanitized = sanitize_filename(data_name)
return parent / sanitized[-1 * (MAX_PATH_LEN - len(parent.name)):]
14 changes: 0 additions & 14 deletions servicex/config_default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,6 @@ api_endpoints:
- endpoint: http://localhost:5000
# token: xxx

# These are default settings, depreciated, and should not be used.
# They will be removed in the next version.
api_endpoint:
minio_endpoint: localhost:9000
# The username and password for accessing files generated by servicex.
# NOTE:
# 1. If minio_username and password are specified they will be used.
# 2. If only an endpoint username password is specified they will be used.
# 3. If nothing is specified, then the default_xxxx username passwords will be used.
# minio_password: xxxx
# minio_username: xxxx
default_minio_username: miniouser
default_minio_password: leftfoot1

# This is the path of the cache. The "/tmp" will be translated, platform appropriate.
cache_path: /tmp/servicex

Expand Down
48 changes: 7 additions & 41 deletions servicex/minio_adaptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,11 @@
import asyncio
from concurrent.futures.thread import ThreadPoolExecutor
from pathlib import Path
from typing import Any, AsyncIterator, Optional, cast, Dict
from typing import Any, AsyncIterator, List, Optional, Dict, cast
import logging

import backoff
from backoff import on_exception
from confuse import ConfigView
from minio import Minio, ResponseError

from .utils import ServiceXException
Expand All @@ -59,8 +58,8 @@ def __init__(self, minio_endpoint: str,
secure=self._secured)

@on_exception(backoff.constant, ResponseError, interval=0.1)
def get_files(self, request_id):
return [f.object_name for f in self._client.list_objects(request_id)]
def get_files(self, request_id) -> List[str]:
return [str(f.object_name) for f in self._client.list_objects(request_id)]

def get_access_url(self, request_id: str, object_name: str) -> str:
'''Given a request ID and the file name in that request id, return a URL
Expand Down Expand Up @@ -122,8 +121,7 @@ class MinioAdaptorFactory:
'''A factor that will return, when asked, the proper minio adaptor to use for a request
to get files from ServiceX.
'''
def __init__(self, c: Optional[ConfigView] = None,
always_return: Optional[MinioAdaptor] = None):
def __init__(self, always_return: Optional[MinioAdaptor] = None):
'''Create the factor with a possible adaptor to always use
Args:
Expand All @@ -133,8 +131,6 @@ def __init__(self, c: Optional[ConfigView] = None,
# Get the defaults setup.
self._always = always_return
self._config_adaptor = None
if self._always is None and c is not None:
self._config_adaptor = self._from_config(c)

def from_best(self, transaction_info: Optional[Dict[str, str]] = None) -> MinioAdaptor:
'''Using the information we have, create the proper Minio Adaptor with the correct
Expand Down Expand Up @@ -162,38 +158,8 @@ def from_best(self, transaction_info: Optional[Dict[str, str]] = None) -> MinioA
bool(transaction_info['minio-secured']),
transaction_info['minio-access-key'],
transaction_info['minio-secret-key'])
if self._config_adaptor is not None:
logging.getLogger(__name__).debug('Using the config-file minio_adaptor')
return self._config_adaptor
raise ServiceXException("Do not know how to create a Minio Login info")

def _from_config(self, c: ConfigView) -> MinioAdaptor:
'''Extract the Minio config information from the config file(s). This will be used
if minio login information isn't returned from the request.
Args:
c (ConfigView): The loaded config
Returns:
MinioAdaptor: The adaptor that uses the config's login information.
'''
c_api = c['api_endpoint']
end_point = cast(str, c_api['minio_endpoint'].as_str_expanded())

# Grab the username and password if they are explicitly listed.
if 'minio_username' in c_api:
username = c_api['minio_username'].as_str_expanded()
password = c_api['minio_password'].as_str_expanded()
elif 'username' in c_api:
username = c_api['username'].as_str_expanded()
password = c_api['password'].as_str_expanded()
else:
username = c_api['default_minio_username'].as_str_expanded()
password = c_api['default_minio_password'].as_str_expanded()

return MinioAdaptor(end_point,
access_key=cast(str, username),
secretkey=cast(str, password))
raise ServiceXException("Do not know or have enough information to create a Minio "
f"Login info ({transaction_info})")


async def find_new_bucket_files(adaptor: MinioAdaptor,
Expand All @@ -206,7 +172,7 @@ async def find_new_bucket_files(adaptor: MinioAdaptor,
seen = []
async for _ in update:
# Sadly, this is blocking, and so may hold things up
files = adaptor.get_files(request_id)
files = cast(List[str], adaptor.get_files(request_id))

# If there are new files, pass them on
for f in files:
Expand Down
3 changes: 2 additions & 1 deletion servicex/servicex.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,8 @@ async def _get_files(self, selection_query: str, data_type: str,
yield r

# Cache the final status
await self._update_query_status(client, request_id)
if cached_files is None:
await self._update_query_status(client, request_id)

except ServiceXUnknownRequestID as e:
self._cache.remove_query(query)
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
author_email="[email protected]",
maintainer="Gordon Watts (IRIS-HEP/UW Seattle)",
maintainer_email="[email protected]",
url="https://github.com/iris-hep/func_adl_xAOD",
url="https://github.com/ssl-hep/ServiceX_frontend",
license="TBD",
python_requires='>=3.6',
test_suite="tests",
Expand All @@ -47,7 +47,7 @@
'make_it_sync==1.0.0',
'google-auth==1.17',
'confuse==1.3.0',
'pyarrow>=1.0, <2.0'
'pyarrow>=1.0, <3.0'
],
extras_require={
'test': [
Expand Down
56 changes: 56 additions & 0 deletions tests/test_ConfigSettings.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,59 @@ def test_home_file_default(confuse_config):

finally:
config_file.unlink()


def test_local_more_important_than_home(confuse_config):
'Make sure that we pick the local directory over the home one'
local_config_file = Path('.servicex_test_settings')
home_config_file = Path.home() / '.servicex_test_settings'
try:
with home_config_file.open('w') as f:
f.write('testing_value: 30\n')
with local_config_file.open('w') as f:
f.write('testing_value: 20\n')

confuse_config.clear()
confuse_config.read()

assert get_it(confuse_config) == 20

finally:
local_config_file.unlink()
home_config_file.unlink()


def test_one_level_up(confuse_config):
'Make sure the config file that is one file up is found'
config_file = Path('.').resolve().parent / ('.servicex_test_settings')
try:
with config_file.open('w') as f:
f.write('testing_value: 30\n')

confuse_config.clear()
confuse_config.read()

assert get_it(confuse_config) == 30

finally:
config_file.unlink()


def test_local_more_importnat_than_one_level_up(confuse_config):
'Assure that our local file is found first'
one_up_config_file = Path('.').resolve().parent / ('.servicex_test_settings')
config_file = Path('.').resolve() / ('.servicex_test_settings')
try:
with config_file.open('w') as f:
f.write('testing_value: 30\n')
with one_up_config_file.open('w') as f:
f.write('testing_value: 20\n')

confuse_config.clear()
confuse_config.read()

assert get_it(confuse_config) == 30

finally:
config_file.unlink()
one_up_config_file.unlink()
14 changes: 14 additions & 0 deletions tests/test_cache.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import string
import random
from servicex.utils import ServiceXException
import pytest

Expand Down Expand Up @@ -148,6 +150,18 @@ def test_data_file_location(tmp_path):
assert str(p).startswith(str(tmp_path))


def test_data_file_location_long_path(tmp_path):
c = Cache(tmp_path)
letters = string.ascii_lowercase
file_significant_name = 'junk.root'
long_file_name = ''.join(random.choice(letters) for i in range(230))

p = c.data_file_location('123-456', long_file_name+file_significant_name)

assert(len(p.name) == 235 - len(p.parent.name))
assert p.name.endswith(file_significant_name)


def test_data_file_location_twice(tmp_path):
c = Cache(tmp_path)
_ = c.data_file_location('123-456', 'junk1.root')
Expand Down
Loading

0 comments on commit 973723f

Please sign in to comment.