Skip to content

Commit

Permalink
Merge pull request #243 from hackalog/dev
Browse files Browse the repository at this point in the history
Release to Main
  • Loading branch information
hackalog authored Feb 1, 2023
2 parents 411a53f + ad6ead4 commit b2b6f21
Show file tree
Hide file tree
Showing 33 changed files with 725 additions and 390 deletions.
31 changes: 15 additions & 16 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
docker:
# specify the version you desire here
# use `-browsers` prefix for selenium tests, e.g. `3.6.1-browsers`
- image: cimg/python:3.8.0
- image: continuumio/miniconda3

# Specify service dependencies here if necessary
# CircleCI maintains a library of pre-built images
Expand All @@ -19,39 +19,38 @@ jobs:

steps:
- checkout

- run:
name: Set up Anaconda
name: Set up Conda
command: |
wget -q http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh;
chmod +x ~/miniconda.sh;
~/miniconda.sh -b -p ~/miniconda;
export PATH=~/miniconda/bin:$PATH
echo "export PATH=~/miniconda/bin:$PATH" >> $BASH_ENV;
conda update --yes --quiet conda;
conda init bash
sed -ne '/>>> conda initialize/,/<<< conda initialize/p' ~/.bashrc >> $BASH_ENV
conda update --yes --quiet conda;
export CONDA_EXE=/opt/conda/bin/conda
sed -ne '/>>> conda initialize/,/<<< conda initialize/p' ~/.bashrc >> $BASH_ENV
- run:
name: Build cookiecutter environment and test-env project
command: |
conda create -n cookiecutter --yes python=3.8
conda create -n cookiecutter --yes python=3.8 make
conda activate cookiecutter
pip install cookiecutter
pip install ruamel.yaml
mkdir /home/circleci/.cookiecutter_replay
cp circleci-cookiecutter-easydata.json /home/circleci/.cookiecutter_replay/cookiecutter-easydata.json
mkdir -p /root/repo/.cookiecutter_replay
cp circleci-cookiecutter-easydata.json /root/repo/.cookiecutter_replay/cookiecutter-easydata.json
pwd
which make
cookiecutter --config-file .cookiecutter-easydata-test-circleci.yml . -f --no-input
conda deactivate
- run:
name: Create test-env environment and contrive to always use it
command: |
conda activate cookiecutter
cd test-env
export CONDA_EXE=/home/circleci/miniconda/bin/conda
export CONDA_EXE=/opt/conda/bin/conda
make create_environment
python scripts/tests/add-extra-channel-dependency.py
conda activate test-env
conda install -c anaconda make
touch environment.yml
make update_environment
echo "conda activate test-env" >> $BASH_ENV;
Expand Down
51 changes: 0 additions & 51 deletions .travis.yml

This file was deleted.

18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,24 @@ python -m pip install -f requirements.txt

cookiecutter https://github.com/hackalog/easydata

### To find out more
------------
A good place to start is with reproducible environments. We have a tutorial here: [Getting Started with EasyData Environments](https://github.com/hackalog/easydata/wiki/Getting-Started-with-EasyData-Environments).

The next place to look is in the customized documentation that is in any EasyData created repo. It is customized to the settings that you put in your template. These are reference documents that can be found under `references/easydata` that are customized to your repo that cover:
* more on conda environments
* more on paths
* git configuration (including setting up ssh with GitHub)
* git workflows
* tricks for using Jupyter notebooks in an EasyData environment
* troubleshooting
* recommendations for how to share your work

Furthermore, see:
* [The EasyData documentation on read the docs](https://cookiecutter-easydata.readthedocs.io/en/latest/?badge=latest): this contains up-to-date working exmaples of how to use EasyData for reproducible datasets and some ways to use notebooks reproducibly
* [Talks and Tutorials based on EasyData](https://github.com/hackalog/easydata/wiki/EasyData-Talks-and-Tutorials)
* [Catalog of EasyData Documentation](https://github.com/hackalog/easydata/wiki/Catalog-of-EasyData-Documentation)
* [The EasyData wiki](https://github.com/hackalog/easydata/wiki) Check here for further troubleshooting and how-to guides for particular problems that aren't in the `references/easydata` docs (including a `git` tutorial)

### The resulting directory structure
------------
Expand Down
6 changes: 3 additions & 3 deletions cookiecutter.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{
"project_name": "project_name",
"repo_name": "{{ cookiecutter.project_name.lower().replace(' ', '_') }}",
"default_branch": ["master", "main"],
"default_branch": ["main", "master"],
"module_name": "src",
"author_name": "Your name (or your organization/company/team)",
"author_name": "Your name (or the copyright holder)",
"description": "A short description of this project.",
"open_source_license": ["MIT", "BSD-2-Clause", "Proprietary"],
"python_version": ["3.7", "3.6", "latest", "3.8"],
"python_version": ["latest", "3.11", "3.10", "3.9", "3.8", "3.7"],
"conda_path": "~/anaconda3/bin/conda",
"upstream_location": ["github.com", "gitlab.com", "bitbucket.org", "your-custom-repo"]
}
2 changes: 1 addition & 1 deletion docs/00-xyz-sample-notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(ds.DESCR)"
"print(ds.README)"
]
},
{
Expand Down
14 changes: 7 additions & 7 deletions docs/Add-csv-template.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
"* `csv_path`: The desired path to your .csv file (in this case `epidemiology.csv`) relative to paths['raw_data_path']\n",
"* `download_message`: The message to display to indicate to the user how to manually download your .csv file.\n",
"* `license_str`: Information on the license for the dataset\n",
"* `descr_str`: Information on the dataset itself"
"* `readme_str`: Information on the dataset itself"
]
},
{
Expand Down Expand Up @@ -123,7 +123,7 @@
"metadata": {},
"outputs": [],
"source": [
"descr_str = \"\"\"\n",
"readme_str = \"\"\"\n",
"The epidemiology table from Google's [COVID-19 Open-Data dataset](https://github.com/GoogleCloudPlatform/covid-19-open-data). \n",
"\n",
"The full dataset contains datasets of daily time-series data related to COVID-19 for over 20,000 distinct locations around the world. The data is at the spatial resolution of states/provinces for most regions and at county/municipality resolution for many countries such as Argentina, Brazil, Chile, Colombia, Czech Republic, Mexico, Netherlands, Peru, United Kingdom, and USA. All regions are assigned a unique location key, which resolves discrepancies between ISO / NUTS / FIPS codes, etc. The different aggregation levels are:\n",
Expand Down Expand Up @@ -170,7 +170,7 @@
" csv_path=csv_path,\n",
" download_message=download_message,\n",
" license_str=license_str,\n",
" descr_str=descr_str,\n",
" readme_str=readme_str,\n",
" overwrite_catalog=True)"
]
},
Expand Down Expand Up @@ -206,9 +206,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"By default, the workflow helper function also created a `covid-19-epidemiology_raw` dataset that has an empty `ds.data`, but keeps a record of the location of the final `epidemiology.csv` file relative to in `ds.EXTRA`.\n",
"By default, the workflow helper function also created a `covid-19-epidemiology_raw` dataset that has an empty `ds.data`, but keeps a record of the location of the final `epidemiology.csv` file relative to in `ds.FILESET`.\n",
"\n",
"The `.EXTRA` functionality is covered in other documentation."
"The `.FILESET` functionality is covered in other documentation."
]
},
{
Expand Down Expand Up @@ -236,7 +236,7 @@
"metadata": {},
"outputs": [],
"source": [
"ds_raw.EXTRA"
"ds_raw.FILESET"
]
},
{
Expand All @@ -246,7 +246,7 @@
"outputs": [],
"source": [
"# fq path to epidemiology.csv file\n",
"ds_raw.extra_file('epidemiology.csv')"
"ds_raw.fileset_file('epidemiology.csv')"
]
},
{
Expand Down
10 changes: 5 additions & 5 deletions docs/Add-derived-dataset.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(ds.DESCR)"
"print(ds.README)"
]
},
{
Expand Down Expand Up @@ -219,7 +219,7 @@
" source_dataset_name\n",
" dataset_name\n",
" data_function\n",
" added_descr_txt\n",
" added_readme_txt\n",
"\n",
"We'll want our `data_function` to be defined in the project module (in this case `src`) for reproducibility reasons (which we've already done with `subselect_by_key` above)."
]
Expand Down Expand Up @@ -250,7 +250,7 @@
"metadata": {},
"outputs": [],
"source": [
"added_descr_txt = f\"\"\"The dataset {dataset_name} is the subselection \\\n",
"added_readme_txt = f\"\"\"The dataset {dataset_name} is the subselection \\\n",
"to the {key} dataset.\"\"\""
]
},
Expand Down Expand Up @@ -281,7 +281,7 @@
" source_dataset_name=source_dataset_name,\n",
" dataset_name=dataset_name,\n",
" data_function=data_function,\n",
" added_descr_txt=added_descr_txt,\n",
" added_readme_txt=added_readme_txt,\n",
" overwrite_catalog=True)"
]
},
Expand Down Expand Up @@ -318,7 +318,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(ds.DESCR)"
"print(ds.README)"
]
},
{
Expand Down
12 changes: 6 additions & 6 deletions docs/New-Dataset-Template.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@
"metadata": {},
"source": [
"### Create a process function\n",
"By default, we recommend that you use the `process_extra_files` functionality and then use a transformer function to create a derived dataset, but you can optionally create your own."
"By default, we recommend that you use the `process_fileset_files` functionality and then use a transformer function to create a derived dataset, but you can optionally create your own."
]
},
{
Expand All @@ -176,11 +176,11 @@
"metadata": {},
"outputs": [],
"source": [
"from src.data.extra import process_extra_files\n",
"process_function = process_extra_files\n",
"from src.data.fileset import process_fileset_files\n",
"process_function = process_fileset_files\n",
"process_function_kwargs = {'file_glob':'*.csv',\n",
" 'do_copy': True,\n",
" 'extra_dir': ds_name+'.extra',\n",
" 'fileset_dir': ds_name+'.fileset',\n",
" 'extract_dir': ds_name}"
]
},
Expand Down Expand Up @@ -355,7 +355,7 @@
"metadata": {},
"outputs": [],
"source": [
"ds.EXTRA"
"ds.FILESET"
]
},
{
Expand All @@ -364,7 +364,7 @@
"metadata": {},
"outputs": [],
"source": [
"ds.extra_file('epidemiology.csv')"
"ds.fileset_file('epidemiology.csv')"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions docs/New-Edge-Template.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
"metadata": {},
"outputs": [],
"source": [
"source_ds.EXTRA"
"source_ds.FILESET"
]
},
{
Expand Down Expand Up @@ -178,7 +178,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(ds.DESCR)"
"print(ds.README)"
]
},
{
Expand Down
3 changes: 3 additions & 0 deletions docs/test_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import requests

from src import paths
from src.log import logger


CCDS_ROOT = Path(__file__).parents[1].resolve()
DOCS_DIR = CCDS_ROOT / "docs"
Expand All @@ -35,6 +37,7 @@ def test_notebook_csv(self):
csv_url = "https://storage.googleapis.com/covid19-open-data/v2/epidemiology.csv"
csv_dest = paths['raw_data_path'] / "epidemiology.csv"
if not csv_dest.exists():
logger.debug("Downloading epidemiology.csv")
csv_file = requests.get(csv_url)
with open(csv_dest, 'wb') as f:
f.write(csv_file.content)
Expand Down
11 changes: 2 additions & 9 deletions {{ cookiecutter.repo_name }}/.circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ jobs:
docker:
# specify the version you desire here
# use `-browsers` prefix for selenium tests, e.g. `3.6.1-browsers`
- image: circleci/python:3.7.0
- image: continuumio/miniconda3


# Specify service dependencies here if necessary
# CircleCI maintains a library of pre-built images
Expand All @@ -20,14 +21,6 @@ jobs:
steps:
- checkout

- run:
name: Set up Anaconda
command: |
wget -q http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh;
chmod +x ~/miniconda.sh;
~/miniconda.sh -b -p ~/miniconda;
echo "export PATH=~/miniconda/bin:$PATH" >> $BASH_ENV;
- run:
name: Create environment and contrive to always use it
command: |
Expand Down
9 changes: 2 additions & 7 deletions {{ cookiecutter.repo_name }}/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -75,17 +75,12 @@ test: update_environment
$(if $(CI_RUNNING),--ignore=$(TESTS_NO_CI)) \
$(MODULE_NAME)

## Run all Unit Tests with coverage
## Run all Unit and code coverage tests
test_with_coverage: update_environment
$(SET) LOGLEVEL=DEBUG; coverage run -m pytest --pyargs --doctest-modules --doctest-continue-on-failure --verbose \
$(if $(CI_RUNNING),--ignore=$(TESTS_NO_CI)) \
$(MODULE_NAME)

.PHONY: lint
## Lint using flake8
lint:
flake8 $(MODULE_NAME)

.phony: help_update_easydata
help_update_easydata:
@$(PYTHON_INTERPRETER) scripts/help-update.py
Expand All @@ -105,7 +100,7 @@ debug:
# Self Documenting Commands #
#################################################################################

HELP_VARS := PROJECT_NAME DEBUG_FILE ARCH PLATFORM
HELP_VARS := PROJECT_NAME DEBUG_FILE ARCH PLATFORM SHELL

.DEFAULT_GOAL := show-help
.PHONY: show-help
Expand Down
Loading

0 comments on commit b2b6f21

Please sign in to comment.