Skip to content

Commit

Permalink
Merge pull request #245 from acwooding/extend-environment
Browse files Browse the repository at this point in the history
Extend environment management to handle arbitrary conda channels
  • Loading branch information
hackalog authored Feb 1, 2023
2 parents b7aff58 + 2e4563b commit ad6ead4
Show file tree
Hide file tree
Showing 10 changed files with 132 additions and 63 deletions.
3 changes: 2 additions & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,15 @@ jobs:
pwd
which make
cookiecutter --config-file .cookiecutter-easydata-test-circleci.yml . -f --no-input
- run:
name: Create test-env environment and contrive to always use it
command: |
conda activate cookiecutter
cd test-env
export CONDA_EXE=/opt/conda/bin/conda
make create_environment
python scripts/tests/add-extra-channel-dependency.py
conda activate test-env
conda install -c anaconda make
touch environment.yml
Expand Down
9 changes: 2 additions & 7 deletions {{ cookiecutter.repo_name }}/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -75,17 +75,12 @@ test: update_environment
$(if $(CI_RUNNING),--ignore=$(TESTS_NO_CI)) \
$(MODULE_NAME)

## Run all Unit Tests with coverage
## Run all Unit and code coverage tests
test_with_coverage: update_environment
$(SET) LOGLEVEL=DEBUG; coverage run -m pytest --pyargs --doctest-modules --doctest-continue-on-failure --verbose \
$(if $(CI_RUNNING),--ignore=$(TESTS_NO_CI)) \
$(MODULE_NAME)

.PHONY: lint
## Lint using flake8
lint:
flake8 $(MODULE_NAME)

.phony: help_update_easydata
help_update_easydata:
@$(PYTHON_INTERPRETER) scripts/help-update.py
Expand All @@ -105,7 +100,7 @@ debug:
# Self Documenting Commands #
#################################################################################

HELP_VARS := PROJECT_NAME DEBUG_FILE ARCH PLATFORM
HELP_VARS := PROJECT_NAME DEBUG_FILE ARCH PLATFORM SHELL

.DEFAULT_GOAL := show-help
.PHONY: show-help
Expand Down
23 changes: 8 additions & 15 deletions {{ cookiecutter.repo_name }}/Makefile.envs
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,20 @@

include Makefile.include

$(LOCKFILE): check_installation .make.bootstrap .make.pip-requirements.txt .make.environment-default.yml .make.conda-forge-requirements.txt
$(LOCKFILE): check_installation .make.bootstrap split_environment_files
ifeq (conda, $(VIRTUALENV))
$(CONDA_EXE) env update -n $(PROJECT_NAME) -f .make.environment-default.yml --prune
$(CONDA_EXE) install -n $(PROJECT_NAME) --file .make.conda-forge-requirements.txt --channel defaults --channel conda-forge --strict-channel-priority --yes
$(foreach channel, $(shell $(CAT) .make.channel-order.include),\
$(CONDA_EXE) install -n $(PROJECT_NAME) --file .make.$(channel)-environment.txt --channel defaults --channel $(channel) --strict-channel-priority --yes $(CMDSEP))
$(CONDA_EXE) run -n $(PROJECT_NAME) --no-capture pip install -r .make.pip-requirements.txt
$(CONDA_EXE) env export -n $(PROJECT_NAME) -f $(LOCKFILE)
else
$(error Unsupported Environment `$(VIRTUALENV)`. Use conda)
endif

# extract multi-phase dependencies from environment.yml
.make.environment-pip.yml: environment.yml .make.bootstrap
$(CONDA_EXE) run -n $(PROJECT_NAME) --no-capture $(PYTHON_INTERPRETER) scripts/split_pip.py pip-yaml $(PROJECT_DIR)environment.yml > $@

.make.pip-requirements.txt: environment.yml .make.bootstrap
$(CONDA_EXE) run -n $(PROJECT_NAME) --no-capture $(PYTHON_INTERPRETER) scripts/split_pip.py pip $(PROJECT_DIR)environment.yml > $@

.make.conda-forge-requirements.txt: environment.yml .make.bootstrap
$(CONDA_EXE) run -n $(PROJECT_NAME) --no-capture $(PYTHON_INTERPRETER) scripts/split_pip.py conda-forge $(PROJECT_DIR)environment.yml > $@

.make.environment-default.yml: environment.yml .make.bootstrap
$(CONDA_EXE) run -n $(PROJECT_NAME) --no-capture $(PYTHON_INTERPRETER) scripts/split_pip.py default $(PROJECT_DIR)environment.yml > $@
.PHONY: split_environment_files
# extract multi-phase dependencies from environment.yml and create ordering file
split_environment_files: environment.yml .make.bootstrap
$(CONDA_EXE) run -n $(PROJECT_NAME) --no-capture $(PYTHON_INTERPRETER) scripts/split_pip.py $(PROJECT_DIR)environment.yml

.make.bootstrap: scripts/bootstrap.yml
$(CONDA_EXE) env update -n $(PROJECT_NAME) -f scripts/bootstrap.yml
Expand Down Expand Up @@ -69,6 +61,7 @@ endif
# Checks that the conda environment is active
environment_enabled:
ifeq (conda,$(VIRTUALENV))
$(CONDA_EXE) config --env --set channel_priority strict
ifneq ($(notdir ${CONDA_DEFAULT_ENV}), $(PROJECT_NAME))
$(error Run "$(VIRTUALENV) activate $(PROJECT_NAME)" before proceeding...)
endif
Expand Down
3 changes: 1 addition & 2 deletions {{ cookiecutter.repo_name }}/Makefile.include
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,4 @@ CAT ?= cat
SET ?= export
WHICH ?= which
DEVNULL ?= /dev/null

$(warning From here on, using SHELL = $(SHELL))
CMDSEP ?= ;
1 change: 1 addition & 0 deletions {{ cookiecutter.repo_name }}/Makefile.win32
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ CAT = type
SET = set
WHICH = where
DEVNULL = nul
CMDSEP = &

# Some UNIXish packages force the installation of a Bourne-compatible shell, and Make
# prefers using this when it sees it. We thus force the usage of the good ole Batch
Expand Down
2 changes: 1 addition & 1 deletion {{ cookiecutter.repo_name }}/environment.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{% macro pyver() -%}
{% if cookiecutter.python_version == 'latest' -%}
- python=3
- python
{% else -%}
- python={{ cookiecutter.python_version }}
{% endif -%}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ When adding packages to your python environment, **do not `pip install` or `cond
Your `environment.yml` file will look something like this:
```
name: {{ cookiecutter.repo_name }}
dependencies:
- pip
- pip:
- -e . # conda >= 4.4 only
Expand All @@ -106,7 +107,7 @@ name: {{ cookiecutter.repo_name }}
```
To add any package available from conda, add it to the end of the list. If you have a PYPI dependency that's not avaible via conda, add it to the list of pip installable dependencies under ` - pip:`.

You can include any {{ cookiecutter.upstream_location }} python-based project in the `pip` section via `git+https://{{ cookiecutter.upstream_location }}/<my_git_handle>/<package>`.
You can include any `{{ cookiecutter.upstream_location }}` python-based project in the `pip` section via `git+https://{{ cookiecutter.upstream_location }}/<my_git_handle>/<package>`.

In particular, if you're working off of a fork or a work in progress branch of a repo in {{ cookiecutter.upstream_location }} (say, your personal version of <package>), you can change `git+https://{{ cookiecutter.upstream_location }}/<my_git_handle>/<package>` to

Expand All @@ -117,6 +118,43 @@ Once you're done your edits, run `make update_environment` and voila, you're upd

To share your updated environment, check in your `environment.yml` file. (More on this in [Sharing your Work](sharing-your-work.md))

#### Adding packages from other conda channels
Say we want to add a package only available from the `conda-forge` conda channel and not the default conda channel. (The conda channel is what follows `-c` when using `conda install -c my-channel my-package`. Suppose we want to use `make` on windows. Then we need to use `conda-forge` since the default conda channel only has linux and macOS installations of `make`. To normally conda install this, we would use `conda install -c conda-forge make`. **We won't do that here**.

Instead, we add a `channel-order` section that starts with `defaults` and lists the other channels we want to use in the order we want to install from them (note that this is a custom EasyData section to the `environment.yml`). Then we add our package in the dependency list in the form `channel-name::package-name`, for example, `conda-forge::make`.

In this case an updated `environment.yml` file looks like this:
```
name: {{ cookiecutter.repo_name }}
channel-order:
- defaults
- conda-forge
dependencies:
- pip
- pip:
- -e . # conda >= 4.4 only
- python-dotenv>=0.5.1
- nbval
- nbdime
- umap-learn
- gdown
- setuptools
- wheel
- git>=2.5 # for git worktree template updating
- sphinx
- bokeh
- click
- colorcet
- coverage
- coveralls
- datashader
- holoviews
- matplotlib
- jupyter
- conda-forge::make
...
```


#### Lock files
Now, we'll admit that this workflow isn't perfectly reproducible in the sense that conda still has to resolve versions from the `environment.yml`. To make it more reproducible, running either `make create_environment` or `make update_environment` will generate an `environment.{$ARCH}.lock.yml` (e.g. `environment.i386.lock.yml`). This file keeps a record of the exact environment that is currently installed in your conda environment `{{ cookiecutter.repo_name }}`. If you ever need to reproduce an environment exactly, you can install from the `.lock.yml` file. (Note: These are architecture dependent).
Expand Down
14 changes: 11 additions & 3 deletions {{ cookiecutter.repo_name }}/scripts/bootstrap.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
{% macro pyver() -%}
{% if cookiecutter.python_version == 'latest' -%}
- python
{% else -%}
- python={{ cookiecutter.python_version }}
{% endif -%}
{% endmacro -%}
name: {{ cookiecutter.repo_name }}
channels:
- defaults
- defaults
dependencies:
- python=3.7
- pyyaml
- pyyaml
{{ pyver()|indent(3, true) }}
86 changes: 53 additions & 33 deletions {{ cookiecutter.repo_name }}/scripts/split_pip.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,19 @@
import json
import sys
import yaml
from collections import defaultdict

ACCEPTABLE_FORMATS = ["default", "pip", "pip-yaml", "conda-forge"]

def env_split(conda_env, kind="default"):
"""Given a conda_environment dict, split into pip/nonpip versions
def env_split(conda_env, channel_order):
"""Given a conda_environment dict, and a channel order, split into versions for each channel.
Returns:
conda_env: (list)
remaining setup bits of the environment.yml file
channel_dict: (dict)
dict containing the list of dependencies by channel name
conda_env: dict
Python object corresponding to environment.yml"""
# Cheater way to make deep Copies
json_copy = json.dumps(conda_env)
Expand All @@ -17,49 +23,63 @@ def env_split(conda_env, kind="default"):

pipdeps = None
deplist = conda_env.pop('dependencies')
conda_forge_list = []
channel_dict = defaultdict(list)

for k, dep in enumerate(deplist[:]): # Note: copy list, as we mutate it
if isinstance(dep, dict): # nested yaml
if dep.get('pip', None):
pipdeps = ["pip", deplist.pop(k)]
channel_dict['pip'] = deplist.pop(k)
else:
prefix = 'conda-forge::'
if dep.startswith(prefix):
conda_forge_list.append(dep[len(prefix):])
prefix_check = dep.split('::')
if len(prefix_check) > 1:
channel = prefix_check[0]
if not channel in channel_order:
raise Exception(f'the channel {channel} required for {dep} is not specified in a channel-order section of the environment file')
channel_dict[f'{channel}'].append(prefix_check[1])
deplist.remove(dep)

conda_env['dependencies'] = deplist
pip_env['dependencies'] = pipdeps
return conda_env, pip_env, conda_forge_list
channel_dict['defaults'] = deplist
conda_env.pop('channel-order', None)
return conda_env, channel_dict

def get_channel_order(conda_env):
"""
Given a conda_environment dict, get the channels from the channel order.
"""
channel_order = conda_env.get('channel-order')

if channel_order is None:
channel_order = ['defaults']
if not 'defaults' in channel_order:
channel_order.insert(0, 'defaults')
channel_order.append('pip')
return channel_order

def usage():
print(f"""
Usage: split_pip.py [{"|".join(ACCEPTABLE_FORMATS)}] path/to/environment.yml
Usage: split_pip.py path/to/environment.yml
""")
if __name__ == '__main__':
if len(sys.argv) != 3:
usage()
exit(1)

kind = sys.argv[1]
if kind not in ACCEPTABLE_FORMATS:
if len(sys.argv) != 2:
usage()
exit(1)

with open(sys.argv[2], 'r') as yamlfile:
with open(sys.argv[1], 'r') as yamlfile:
conda_env = yaml.safe_load(yamlfile)

cenv, penv, forgelist = env_split(conda_env)
if kind == "pip-yaml":
_ = yaml.dump(penv, sys.stdout, allow_unicode=True, default_flow_style=False)
elif kind == "pip":
print("\n".join(penv["dependencies"].pop(-1)["pip"]))
elif kind == "pip-yaml":
_ = yaml.dump(penv, sys.stdout, allow_unicode=True, default_flow_style=False)
elif kind == "default":
_ = yaml.dump(cenv, sys.stdout, allow_unicode=True, default_flow_style=False)
elif kind == "conda-forge":
print("\n".join(forgelist))
else:
raise Exception(f"Invalid Kind: {kind}")
#check for acceptable formats
channel_order = get_channel_order(conda_env)
with open('.make.channel-order.include', 'w') as f:
f. write(' '.join(channel_order[:-1])) #exclude pip as a channel here

cenv, channel_dict = env_split(conda_env, channel_order)

for kind in channel_order:
if kind == "pip":
filename = '.make.pip-requirements.txt'
with open(filename, 'w') as f:
f.write("\n".join(channel_dict['pip']['pip']))
else:
filename = f'.make.{kind}-environment.txt'
with open(filename, 'w') as f:
f.write("\n".join(channel_dict[kind]))
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import sys
import yaml


if __name__ == "__main__":
channel_order = ['defaults', 'pytorch']
dependency_new = "pytorch::cpuonly"

with open("environment.yml", "rt", encoding="utf-8") as file_env:
env = yaml.safe_load(file_env)
env["dependencies"].append(dependency_new)
env["channel-order"] = channel_order
with open("environment.yml", "wt", encoding="utf-8") as file_env:
yaml.safe_dump(env, file_env)

0 comments on commit ad6ead4

Please sign in to comment.