From b75485ca061fa067c8d68fc992a3955224e23ada Mon Sep 17 00:00:00 2001 From: Philipp Schlegel Date: Thu, 24 Oct 2024 10:48:34 +0100 Subject: [PATCH] various fixes + improvements for H01 and MICrONS: - default to "minnie65_public" for "cortex65" - fetch_neurons: allow explicitly setting materialization version - if `materialization="auto"` (default) try finding mat matching roots - make _get_somas work for H01 dataset - fix a bunch of docstrings --- navis/interfaces/cave_utils.py | 309 ++++++++++++++++++++++++++------- navis/interfaces/h01.py | 67 ++++--- navis/interfaces/microns.py | 71 ++++---- 3 files changed, 334 insertions(+), 113 deletions(-) diff --git a/navis/interfaces/cave_utils.py b/navis/interfaces/cave_utils.py index 141a1c78..ad0c1219 100644 --- a/navis/interfaces/cave_utils.py +++ b/navis/interfaces/cave_utils.py @@ -1,8 +1,21 @@ +# This script is part of navis (http://www.github.com/navis-org/navis). +# Copyright (C) 2018 Philipp Schlegel +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + + from ..core import MeshNeuron, NeuronList from .. import config, utils import pandas as pd import numpy as np -import warnings from concurrent.futures import ThreadPoolExecutor, as_completed from functools import lru_cache @@ -28,6 +41,8 @@ logger = config.get_logger(__name__) dataset = None +SILENCE_FIND_MAT_VERSION = False + @lru_cache(None) def get_cave_client(datastack="cortex65", server_address=None): @@ -47,6 +62,7 @@ def get_cave_client(datastack="cortex65", server_address=None): return CAVEclient(datastack, server_address) + @lru_cache(None) def _get_cloudvol(url, cache=True): """Get (cached) CloudVolume for given segmentation. @@ -63,7 +79,8 @@ def _get_cloudvol(url, cache=True): url, cache=cache, use_https=True, parallel=10, progress=False, fill_missing=True ) -def _get_somas(root_ids, table="nucleus_detection_v0", datastack="cortex65"): + +def _get_somas(root_ids, client, materialization="auto"): """Fetch somas based on nuclei segmentation for given neuron(s). Since this is a nucleus detection you will find that some neurons do @@ -98,22 +115,36 @@ def _get_somas(root_ids, table="nucleus_detection_v0", datastack="cortex65"): table. """ - # Get/Initialize the CAVE client - client = get_cave_client(datastack) + # This property should be set by the dispatching function + table = client.materialize.nucleus_table + + if materialization == "auto": + if root_ids is not None: + materialization = roots_to_mat(root_ids, client, verbose=False) + else: + # Use the most recent materialization + materialization = None filter_in_dict = None if not isinstance(root_ids, type(None)): root_ids = utils.make_iterable(root_ids) filter_in_dict = {"pt_root_id": root_ids} - return client.materialize.query_table(table, filter_in_dict=filter_in_dict) + return client.materialize.query_table( + table, filter_in_dict=filter_in_dict, materialization_version=materialization + ) -def fetch_neurons(x, lod, - with_synapses, - datastack, - parallel, - max_threads, - **kwargs): + +def fetch_neurons( + x, + lod, + with_synapses, + client, + parallel, + max_threads, + materialization="auto", + **kwargs, +): """Fetch neuron meshes. Notes @@ -130,15 +161,17 @@ def fetch_neurons(x, lod, meshes. with_synapses : bool, optional If True will also attach synapses as ``.connectors``. - datastack : "cortex65" | "cortex35" | "layer 2/3" | "h01_c3_flat" | str - Which dataset to query. "cortex65", "cortex35" and "layer 2/3" - are internally mapped to the corresponding sources: for example, - "minnie65_public_vXXX" for "cortex65" where XXX is always the - most recent version). + client : CAVEclient + The CAVEclient with which to interact. parallel : bool If True, will use parallel threads to fetch data. max_threads : int Max number of parallel threads to use. + materialization : "auto" | int + Which materialization version to use to look up somas and synapses + (if applicable). If "auto" (default) will try to find the most + recent version that contains the given root IDs. If an + integer is provided will use that version. **kwargs Keyword arguments are passed through to the initialization of the ``navis.MeshNeurons``. @@ -150,43 +183,63 @@ def fetch_neurons(x, lod, """ x = utils.make_iterable(x, force_type=int) - client = get_cave_client(datastack) vol = _get_cloudvol(client.info.segmentation_source()) # this is cached try: - somas = _get_somas(x, datastack=datastack) + somas = _get_somas(x, client=client, materialization=materialization) soma_pos = somas.set_index("pt_root_id").pt_position.to_dict() except BaseException as e: logger.warning("Failed to fetch somas via nucleus segmentation" f"(){e})") soma_pos = {} nl = [] - with ThreadPoolExecutor(max_workers=1 if not parallel else max_threads) as executor: - futures = {} - for id in x: - f = executor.submit(_fetch_single_neuron, - id, - vol=vol, - lod=lod, - client=client, - with_synapses=with_synapses, - source=datastack, - **kwargs - ) - futures[f] = id - - with config.tqdm(desc='Fetching', - total=len(x), - leave=config.pbar_leave, - disable=len(x) == 1 or config.pbar_hide) as pbar: - for f in as_completed(futures): - id = futures[f] - pbar.update(1) - try: - nl.append(f.result()) - except Exception as exc: - print(f'{id} generated an exception:', exc) + if max_threads > 1 and parallel: + with ThreadPoolExecutor(max_workers=max_threads) as executor: + futures = {} + for id in x: + f = executor.submit( + _fetch_single_neuron, + id, + vol=vol, + lod=lod, + client=client, + with_synapses=with_synapses, + materialization=materialization, + **kwargs, + ) + futures[f] = id + + with config.tqdm( + desc="Fetching", + total=len(x), + leave=config.pbar_leave, + disable=len(x) == 1 or config.pbar_hide, + ) as pbar: + for f in as_completed(futures): + id = futures[f] + pbar.update(1) + try: + nl.append(f.result()) + except Exception as exc: + print(f"{id} generated an exception:", exc) + else: + for id in config.tqdm( + x, + desc="Fetching", + leave=config.pbar_leave, + disable=len(x) == 1 or config.pbar_hide, + ): + n = _fetch_single_neuron( + id, + vol=vol, + lod=lod, + client=client, + with_synapses=with_synapses, + materialization=materialization, + **kwargs, + ) + nl.append(n) nl = NeuronList(nl) @@ -194,13 +247,18 @@ def fetch_neurons(x, lod, if n.id in soma_pos: # For VoxelResolution see client.materialize.get_table_metadata('nucleus_detection_v0') # (attached to df as 'table_voxel_resolution') - n.soma_pos = np.array(soma_pos[n.id]) * somas.attrs['table_voxel_resolution'] + n.soma_pos = ( + np.array(soma_pos[n.id]) * somas.attrs["table_voxel_resolution"] + ) else: n.soma_pos = None return nl -def _fetch_single_neuron(id, lod, vol, client, with_synapses=False, **kwargs): + +def _fetch_single_neuron( + id, lod, vol, client, with_synapses=False, materialization="auto", **kwargs +): """Fetch a single neuron.""" # Make sure we only use `lod` if that's actually supported by the source if "MultiLevel" in str(type(vol.mesh)): @@ -210,9 +268,16 @@ def _fetch_single_neuron(id, lod, vol, client, with_synapses=False, **kwargs): n = MeshNeuron(mesh, id=id, units="nm", **kwargs) + if materialization == "auto": + materialization = roots_to_mat(id, client, verbose=False) + if with_synapses: - pre = client.materialize.synapse_query(pre_ids=id) - post = client.materialize.synapse_query(post_ids=id) + pre = client.materialize.synapse_query( + pre_ids=id, materialization_version=materialization + ) + post = client.materialize.synapse_query( + post_ids=id, materialization_version=materialization + ) syn_table = client.materialize.synapse_table syn_info = client.materialize.get_table_metadata(syn_table) @@ -242,7 +307,7 @@ def _fetch_single_neuron(id, lod, vol, client, with_synapses=False, **kwargs): return n -def get_voxels(x, mip, bounds, datastack): +def get_voxels(x, mip, bounds, client): """Fetch voxels making a up given root ID. Parameters @@ -255,11 +320,8 @@ def get_voxels(x, mip, bounds, datastack): Bounding box [xmin, xmax, ymin, ymax, zmin, zmax] in voxel space. For example, the voxel resolution for mip 0 segmentation is 8 x 8 x 40 nm. - datastack : "cortex65" | "cortex35" | "layer 2/3" | "h01_c3_flat" | str - Which dataset to query. "cortex65", "cortex35" and "layer 2/3" - are internally mapped to the corresponding sources: for example, - "minnie65_public_vXXX" for "cortex65" where XXX is always the - most recent version). + client : CAVEclient + The CAVEclient with which to interact. Returns ------- @@ -267,18 +329,20 @@ def get_voxels(x, mip, bounds, datastack): In voxel space according to `mip`. """ - client = get_cave_client(datastack) # Need to get the graphene (not the precomputed) version of the data - vol_graphene = cv.CloudVolume(client.chunkedgraph.cloudvolume_path, - use_https=True, progress=False) - url = client.info.get_datastack_info()['segmentation_source'] + vol_graphene = cv.CloudVolume( + client.chunkedgraph.cloudvolume_path, use_https=True, progress=False + ) + url = client.info.get_datastack_info()["segmentation_source"] vol_prec = _get_cloudvol(url) # Get L2 chunks making up this neuron l2_ids = client.chunkedgraph.get_leaves(x, stop_layer=2) # Turn l2_ids into chunk indices - l2_ix = [np.array(vol_graphene.mesh.meta.meta.decode_chunk_position(l)) for l in l2_ids] + l2_ix = [ + np.array(vol_graphene.mesh.meta.meta.decode_chunk_position(l2)) for l2 in l2_ids + ] l2_ix = np.unique(l2_ix, axis=0) # Convert to nm @@ -296,16 +360,18 @@ def get_voxels(x, mip, bounds, datastack): if not isinstance(bounds, type(None)): bounds = np.asarray(bounds) if not bounds.ndim == 1 or len(bounds) != 6: - raise ValueError('`bounds` must be [xmin, xmax, ymin, ymax, zmin, zmax]') + raise ValueError("`bounds` must be [xmin, xmax, ymin, ymax, zmin, zmax]") l2_vxl = l2_vxl[np.all(l2_vxl >= bounds[::2], axis=1)] l2_vxl = l2_vxl[np.all(l2_vxl < bounds[1::2] + ch_size, axis=1)] try: vol_prec.mip = mip - for ch in config.tqdm(l2_vxl, desc='Loading'): - ct = vol_prec[ch[0]:ch[0] + ch_size[0], - ch[1]:ch[1] + ch_size[1], - ch[2]:ch[2] + ch_size[2]][:, :, :, 0] + for ch in config.tqdm(l2_vxl, desc="Loading"): + ct = vol_prec[ + ch[0] : ch[0] + ch_size[0], + ch[1] : ch[1] + ch_size[1], + ch[2] : ch[2] + ch_size[2], + ][:, :, :, 0] this_vxl = np.dstack(np.where(ct == x))[0] this_vxl = this_vxl + ch voxels.append(this_vxl) @@ -348,3 +414,120 @@ def _chunks_to_nm(xyz_ch, vol, voxel_resolution=[4, 4, 40]): * voxel_resolution * mip_scaling ) + + +def roots_to_mat( + ids, + client, + verbose=True, + allow_multiple=False, + raise_missing=True, +): + """Find a materialization version (or live) for given root ID(s). + + Parameters + ---------- + ids : int | iterable + Root ID(s) to check. + client : CAVEclient + The CAVEclient with which to interact. + verbose : bool + Whether to print results of search. + allow_multiple : bool + If True, will track if IDs can be found spread across multiple + materialization versions if there is no single one containing + all. + raise_missing : bool + Only relevant if `allow_multiple=True`. If False, will return + versions even if some IDs could not be found. + + Returns + ------- + version : int | "live" + A single version (including "live") that contains all given + root IDs. + versions : np.ndarray + If no single version was found and `allow_multiple=True` will + return a vector of `len(ids)` with the latest version at which + the respective ID can be found. + Important: "live" version will be return as -1! + If `raise_missing=False` and one or more root IDs could not + be found in any of the available materialization versions + these IDs will be return as version 0. + + """ + ids = utils.make_iterable(ids) + + # For each ID track the most recent valid version + latest_valid = np.zeros(len(ids), dtype=np.int32) + + # Get the meta data for the available materialization versions + # This is a list of dicts where each dict has a "time_stamp" key + vmeta = client.materialize.get_versions_metadata() + + # Sort by "time_stamp" + vmeta = sorted(vmeta, key=lambda x: x["time_stamp"], reverse=True) + + # Go over each version (start with the most recent) + for i, mat in enumerate(vmeta): + ts_m = mat["time_stamp"] + version = mat["version"] + + # Check which root IDs were valid at the time + is_valid = client.chunkedgraph.is_latest_roots(ids, timestamp=ts_m) + + # Update latest valid versions + latest_valid[(latest_valid == 0) & is_valid] = version + + if all(is_valid): + if verbose and not SILENCE_FIND_MAT_VERSION: + print(f"Using materialization version {version}.") + return version + + # If no single materialized version can be found, see if we can get + # by with the live materialization + is_latest = client.chunkedgraph.is_latest_roots(ids, timestamp=None) + latest_valid[(latest_valid == 0) & is_latest] = -1 # track "live" as -1 + if all(is_latest) and dataset != "public": # public does not have live + if verbose: + print("Using live materialization") + return "live" + + if allow_multiple and any(latest_valid != 0): + if all(latest_valid != 0): + if verbose and not SILENCE_FIND_MAT_VERSION: + print( + f"Found root IDs spread across {len(np.unique(latest_valid))} " + "materialization versions." + ) + return latest_valid + + msg = ( + f"Found root IDs spread across {len(np.unique(latest_valid)) - 1} " + f"materialization versions but {(latest_valid == 0).sum()} IDs " + "do not exist in any of the materialized tables." + ) + + if not raise_missing: + if verbose and not SILENCE_FIND_MAT_VERSION: + print(msg) + return latest_valid + else: + raise MaterializationMatchError(msg) + + if dataset not in ("public, "): + raise MaterializationMatchError( + "Given root IDs do not (co-)exist in any of the available " + "materialization versions (including live). Try updating " + "root IDs and rerun your query." + ) + else: + raise MaterializationMatchError( + "Given root IDs do not (co-)exist in any of the available " + "public materialization versions. Please make sure that " + "the root IDs do exist and rerun your query." + ) + + +class MaterializationMatchError(Exception): + pass diff --git a/navis/interfaces/h01.py b/navis/interfaces/h01.py index 276f0f58..c81cf8a7 100644 --- a/navis/interfaces/h01.py +++ b/navis/interfaces/h01.py @@ -1,19 +1,41 @@ +# This script is part of navis (http://www.github.com/navis-org/navis). +# Copyright (C) 2018 Philipp Schlegel +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + from . import cave_utils DATASTACK = "h01_c3_flat" SERVER_ADDRESS = "https://global.brain-wire-test.org" +NUCLEUS_TABLE = "nucleus" -def get_cave_client(): - """Get caveclient for H01 dataset. - """ - return cave_utils.get_cave_client(DATASTACK, SERVER_ADDRESS) -def fetch_neurons(x, *, lod=2, - with_synapses=True, - datastack=DATASTACK, - parallel=True, - max_threads=4, - **kwargs): +def get_cave_client(datastack=DATASTACK): + """Get caveclient for H01 dataset.""" + client = cave_utils.get_cave_client(datastack, SERVER_ADDRESS) + client.materialize.nucleus_table = NUCLEUS_TABLE + return client + + +def fetch_neurons( + x, + *, + lod=2, + with_synapses=True, + datastack=DATASTACK, + materialization="auto", + parallel=True, + max_threads=4, + **kwargs, +): """Fetch neuron meshes. Notes @@ -23,7 +45,7 @@ def fetch_neurons(x, *, lod=2, Parameters ---------- x : str | int | list-like - Segment ID(s). Multiple Ids can be provided as list-like. + Segment ID(s). Multiple IDs can be provided as list-like. lod : int Level of detail. Higher ``lod`` = coarser. This parameter is ignored if the data source does not support multi-level @@ -31,7 +53,12 @@ def fetch_neurons(x, *, lod=2, with_synapses : bool, optional If True will also attach synapses as ``.connectors``. datastack : str - DATASTACK. + Datastack to use. Default to "h01_c3_flat". + materialization : "auto" | int + Which materialization version to use to look up somas and synapses + (if applicable). If "auto" (default) will try to find the most + recent version that contains the given root IDs. If an + integer is provided will use that version. parallel : bool If True, will use parallel threads to fetch data. max_threads : int @@ -44,17 +71,21 @@ def fetch_neurons(x, *, lod=2, ------- navis.Neuronlist Containing :class:`navis.MeshNeuron`. + """ + client = get_cave_client(datastack) return cave_utils.fetch_neurons( x, lod=lod, with_synapses=with_synapses, - datastack=datastack, + client=client, parallel=parallel, max_threads=max_threads, - **kwargs + materialization=materialization, + **kwargs, ) + def get_voxels(x, mip=0, bounds=None, datastack=DATASTACK): """Fetch voxels making a up given root ID. @@ -75,10 +106,6 @@ def get_voxels(x, mip=0, bounds=None, datastack=DATASTACK): ------- voxels : (N, 3) np.ndarray In voxel space according to `mip`. + """ - return cave_utils.get_voxels( - x, - mip=mip, - bounds=bounds, - datastack=datastack - ) \ No newline at end of file + return cave_utils.get_voxels(x, mip=mip, bounds=bounds, client=get_cave_client(datastack)) diff --git a/navis/interfaces/microns.py b/navis/interfaces/microns.py index bb5d6aed..edb83223 100644 --- a/navis/interfaces/microns.py +++ b/navis/interfaces/microns.py @@ -8,7 +8,7 @@ # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. """Interface with MICrONS datasets: https://www.microns-explorer.org/.""" @@ -35,6 +35,9 @@ except BaseException: raise +NUCLEUS_TABLE = "nucleus_detection_v0" + + @lru_cache(None) def _translate_datastack(datastack): """Translate datastack to source.""" @@ -44,6 +47,10 @@ def _translate_datastack(datastack): return datastack elif datastack in ("cortex65", "minnie65"): # Find the latest cortex65 datastack + # "minnie65_public" is apparently the prefered datastack + if "minnie65_public" in ds: + return "minnie65_public" + # If for some reason that stack is not available, just take the latest return sorted([d for d in ds if "minnie65_public" in d and "sandbox" not in d])[ -1 ] @@ -54,9 +61,7 @@ def _translate_datastack(datastack): ] elif datastack == "layer 2/3": # The "pinky_sandbox" seems to be the latest layer 2/3 datastack - return sorted([d for d in ds if "pinky" in d])[ - -1 - ] + return sorted([d for d in ds if "pinky" in d])[-1] raise ValueError(f"Datastack '{datastack}' not found.") @@ -93,8 +98,7 @@ def get_cave_client(datastack="cortex65"): datastack : "cortex65" | "cortex35" | "layer 2/3" | str Which dataset to query. "cortex65", "cortex35" and "layer 2/3" are internally mapped to the corresponding sources: for example, - "minnie65_public_vXXX" for "cortex65" where XXX is always the - most recent version). + "minnie65_public" for "cortex65" """ if not CAVEclient: @@ -102,7 +106,9 @@ def get_cave_client(datastack="cortex65"): # Try mapping, else pass-through datastack = _translate_datastack(datastack) - return cave_utils.get_cave_client(datastack) + client = cave_utils.get_cave_client(datastack) + client.materialize.nucleus_table = NUCLEUS_TABLE + return client @lru_cache(None) @@ -114,8 +120,7 @@ def list_annotation_tables(datastack="cortex65"): datastack : "cortex65" | "cortex35" | "layer 2/3" | str Which dataset to query. "cortex65", "cortex35" and "layer 2/3" are internally mapped to the corresponding sources: for example, - "minnie65_public_vXXX" for "cortex65" where XXX is always the - most recent version). + "minnie65_public" for "cortex65" Returns ------- @@ -126,12 +131,17 @@ def list_annotation_tables(datastack="cortex65"): return get_cave_client(datastack).materialize.get_tables() -def fetch_neurons(x, *, lod=2, - with_synapses=True, - datastack='cortex65', - parallel=True, - max_threads=4, - **kwargs): +def fetch_neurons( + x, + *, + lod=2, + with_synapses=True, + datastack="cortex65", + materialization="auto", + parallel=True, + max_threads=4, + **kwargs, +): """Fetch neuron meshes. Notes @@ -151,8 +161,12 @@ def fetch_neurons(x, *, lod=2, datastack : "cortex65" | "cortex35" | "layer 2/3" | str Which dataset to query. "cortex65", "cortex35" and "layer 2/3" are internally mapped to the corresponding sources: for example, - "minnie65_public_vXXX" for "cortex65" where XXX is always the - most recent version). + "minnie65_public" for "cortex65" + materialization : "auto" | int + Which materialization version to use to look up somas and synapses + (if applicable). If "auto" (default) will try to find the most + recent version that contains the given root IDs. If an + integer is provided will use that version. parallel : bool If True, will use parallel threads to fetch data. max_threads : int @@ -167,27 +181,28 @@ def fetch_neurons(x, *, lod=2, Containing :class:`navis.MeshNeuron`. """ - + client = get_cave_client(_translate_datastack(datastack)) return cave_utils.fetch_neurons( x, lod=lod, with_synapses=with_synapses, - datastack=_translate_datastack(datastack), + client=client, parallel=parallel, max_threads=max_threads, - **kwargs + materialization=materialization, + **kwargs, ) -def get_voxels(x, mip=0, bounds=None, datastack='h01_c3_flat'): - """Fetch voxels making a up given root ID. +def get_voxels(x, mip=0, bounds=None, datastack="cortex65"): + """Fetch voxels making a up given root ID. Parameters ---------- x : int A single root ID. mip : int - Scale at which to fetch voxels. + Scale at which to fetch voxels. Lower = higher resolution. bounds : list, optional Bounding box [xmin, xmax, ymin, ymax, zmin, zmax] in voxel space. For example, the voxel resolution for mip 0 @@ -195,8 +210,7 @@ def get_voxels(x, mip=0, bounds=None, datastack='h01_c3_flat'): datastack : "cortex65" | "cortex35" | "layer 2/3" | str Which dataset to query. "cortex65", "cortex35" and "layer 2/3" are internally mapped to the corresponding sources: for example, - "minnie65_public_vXXX" for "cortex65" where XXX is always the - most recent version). + "minnie65_public" for "cortex65" Returns ------- @@ -205,8 +219,5 @@ def get_voxels(x, mip=0, bounds=None, datastack='h01_c3_flat'): """ return cave_utils.get_voxels( - x, - mip=mip, - bounds=bounds, - datastack=_translate_datastack(datastack) - ) \ No newline at end of file + x, mip=mip, bounds=bounds, client=get_cave_client(datastack) + )