diff --git a/setup.cfg b/setup.cfg
index d1c7494..c6c1195 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -49,9 +49,9 @@ package_dir =
 # For more information, check out https://semver.org/.
 install_requires =
     importlib-metadata; python_version<"3.8"
-    dolomite-base
+    dolomite-base>=0.2.0-alpha5
     h5py
-    delayedarray>=0.3.2
+    delayedarray>=0.3.3
     numpy
     filebackedarray
 
@@ -71,6 +71,7 @@ testing =
     pytest
     pytest-cov
     scipy
+    dask
 
 [options.entry_points]
 # Add here console scripts like:
diff --git a/src/dolomite_matrix/DelayedMask.py b/src/dolomite_matrix/DelayedMask.py
new file mode 100644
index 0000000..8a94f8e
--- /dev/null
+++ b/src/dolomite_matrix/DelayedMask.py
@@ -0,0 +1,136 @@
+from typing import Tuple, Optional, Sequence 
+import delayedarray
+import numpy
+
+class DelayedMask(delayedarray.DelayedOp): 
+    """
+    Delayed mask to replace the missing value placeholder with a NumPy masked array.
+    """
+
+    def __init__(self, seed, placeholder, dtype: Optional[numpy.dtype] = None):
+        """
+        Args:
+            seed:
+                Any object that satisfies the seed contract,
+                see :py:class:`~delayedarray.DelayedArray.DelayedArray` for details.
+
+            placeholder:
+                Placeholder value for defining masked values, of the same type
+                as ``seed.dtype`` (or coercible into that type). All values
+                equal to the placeholder are considered to be missing.
+
+            dtype:
+                Desired type of the masked output, defaults to ``seed.dtype``.
+        """
+        self._seed = seed
+
+        if numpy.issubdtype(seed.dtype, numpy.str_) and isinstance(placeholder, bytes): 
+            self._placeholder = numpy.str_(placeholder.decode("UTF8"))
+        else:
+            self._placeholder = seed.dtype.type(placeholder)
+
+        if dtype is None:
+            dtype = seed.dtype
+        self._dtype = dtype
+
+    @property
+    def shape(self) -> Tuple[int, ...]:
+        """
+        Returns:
+            Tuple of integers specifying the extent of each dimension of this
+            object. This is the same as the ``seed`` object.
+        """
+        return self._seed.shape
+
+    @property
+    def dtype(self) -> numpy.dtype:
+        """
+        Returns:
+            NumPy type for the contents after masking.
+        """
+        return self._dtype
+
+    @property
+    def seed(self):
+        """
+        Returns:
+            The seed object.
+        """
+        return self._seed
+
+    @property
+    def placeholder(self):
+        """
+        Returns:
+            The placeholder value.
+        """
+        return self._placeholder
+
+
+def _create_mask(x: numpy.ndarray, placeholder):
+    if numpy.issubdtype(placeholder.dtype, numpy.floating) and numpy.isnan(placeholder):
+        return numpy.isnan(x)
+    else:
+        return (x == placeholder)
+
+
+@delayedarray.extract_dense_array.register
+def extract_dense_array_DelayedMask(x: DelayedMask, subset: Optional[Tuple[Sequence[int], ...]] = None):
+    """See :py:meth:`~delayedarray.extract_dense_array.extract_dense_array`."""
+    out = delayedarray.extract_dense_array(x._seed, subset)
+    mask = _create_mask(out, x._placeholder) # do this before type coercion, as the placeholder is assumed to be of the same underlying seed type.
+    out = out.astype(x._dtype, copy=False)
+    if mask.any():
+        out = numpy.ma.MaskedArray(out, mask=mask)
+    return out
+
+
+def _mask_SparseNdarray(contents, placeholder, dtype):
+    if not isinstance(contents, list):
+        indices, values = contents
+        mask = _create_mask(values, placeholder) # do this before type coercion, again.
+        values = values.astype(dtype, copy=False)
+        if mask.any():
+            values = numpy.ma.MaskedArray(values, mask=mask)
+        return indices, values
+    else:
+        output = []
+        for val in contents:
+            if val is None:
+                output.append(val)
+            else:
+                output.append(_mask_SparseNdarray(val, placeholder, dtype))
+        return output
+
+
+@delayedarray.extract_sparse_array.register
+def extract_sparse_array_DelayedMask(x: DelayedMask, subset: Optional[Tuple[Sequence[int], ...]] = None):
+    """See :py:meth:`~delayedarray.extract_sparse_array.extract_sparse_array`."""
+    out = delayedarray.extract_sparse_array(x._seed, subset)
+    contents = out.contents
+    if contents is not None:
+        contents = _mask_SparseNdarray(contents, x._placeholder, x._dtype)
+    return delayedarray.SparseNdarray(x.shape, contents, dtype=x._dtype, index_dtype=out.index_dtype, check=False)
+
+
+@delayedarray.create_dask_array.register
+def create_dask_array_DelayedMask(x: DelayedMask):
+    """See :py:meth:`~delayedarray.create_dask_array.create_dask_array`."""
+    target = delayedarray.create_dask_array(x._seed)
+    mask = (target == x._placeholder) 
+    target = target.astype(x._dtype)
+    import dask.array
+    return dask.array.ma.masked_array(target, mask=mask)
+
+
+@delayedarray.chunk_shape.register
+def chunk_shape_DelayedMask(x: DelayedMask):
+    """See :py:meth:`~delayedarray.chunk_shape.chunk_shape`."""
+    return delayedarray.chunk_shape(x._seed)
+
+
+@delayedarray.is_sparse.register
+def is_sparse_DelayedMask(x: DelayedMask):
+    """See :py:meth:`~delayedarray.is_sparse.is_sparse`."""
+    return delayedarray.is_sparse(x._seed)
+
diff --git a/src/dolomite_matrix/ReloadedArray.py b/src/dolomite_matrix/ReloadedArray.py
new file mode 100644
index 0000000..7c99225
--- /dev/null
+++ b/src/dolomite_matrix/ReloadedArray.py
@@ -0,0 +1,124 @@
+import delayedarray
+from dolomite_base import save_object
+import os
+import shutil
+
+from .WrapperArraySeed import WrapperArraySeed
+from .save_compressed_sparse_matrix import _save_compressed_sparse_matrix
+from .save_dense_array import _save_dense_array
+
+
+class ReloadedArraySeed(WrapperArraySeed):
+    """
+    Seed for the :py:class:`~ReloadedArray` class. This is a subclass
+    of :py:class:`~dolomite_matrix.WrapperArraySeed.WrapperArraySeed`.
+    """
+
+    def __init__(self, seed, path: str):
+        """
+        Args:
+            seed: The contents of the reloaded array.
+            path: Path to the directory containing the on-disk representation.
+        """
+        super(ReloadedArraySeed, self).__init__(seed)
+        self._path = path
+
+    @property
+    def path(self) -> str:
+        """
+        Returns:
+            Path to the directory containing the on-disk representation.
+        """
+        return self._path
+
+
+class ReloadedArray(delayedarray.DelayedArray):
+    """
+    An array that was reloaded from disk by the
+    :py:func:`~dolomite_base.read_object.read_object` function, and remembers
+    the path from which it was loaded.  This class allows methods to refer to
+    the existing on-disk representation by inspecting the path.  For example,
+    :py:func:`~dolomite_base.save_object.save_object` can just copy/link to the
+    existing files instead of repeating the saving process.
+    """
+
+    def __init__(self, seed, path: str):
+        """
+        To construct a ``ReloadedArray`` from an existing
+        :py:class:`~ReloadedArraySeed`, use :py:meth:`~delayedarray.wrap.wrap`
+        instead.
+
+        Args:
+            seed: The contents of the reloaded array.
+            path: Path to the directory containing the on-disk representation.
+        """
+        if not isinstance(seed, ReloadedArraySeed):
+            seed = ReloadedArraySeed(seed, path)
+        super(ReloadedArray, self).__init__(seed)
+
+    @property
+    def path(self) -> str:
+        """
+        Returns:
+            Path to the directory containing the on-disk representation.
+        """
+        return self.seed._path
+
+
+@delayedarray.wrap.register
+def wrap_ReloadedArraySeed(x: ReloadedArraySeed) -> ReloadedArray:
+    """See :py:func:`~delayedarray.wrap.wrap`."""
+    return ReloadedArray(x)
+
+
+@save_object.register
+def save_object_ReloadedArray(x: ReloadedArray, path: str, reloaded_array_reuse_mode: str = "link", **kwargs):
+    """
+    Method for saving :py:class:`~ReloadedArray.ReloadedArray` objects to disk,
+    see :py:meth:`~dolomite_base.save_object.save_object` for details.
+
+    Args:
+        x: Object to be saved.
+
+        path: Path to a directory to save ``x``.
+
+        reloaded_array_reuse_mode:
+            How the files in ``x.path`` should be re-used when populating
+            ``path``.  This can be ``"link"``, to create a hard link to each
+            file; ``"symlink"``, to create a symbolic link to each file;
+            ``"copy"``, to create a copy of each file; or ``"none"``, to
+            perform a fresh save of ``x`` without relying on ``x.path``.
+
+        kwargs: Further arguments, ignored.
+
+    Returns:
+        ``x`` is saved to ``path``. 
+    """
+    if reloaded_array_reuse_mode == "none": 
+        if delayedarray.is_sparse(x):
+            return _save_compressed_sparse_matrix(x, path, **kwargs)
+        else:
+            return _save_dense_array(x, path, **kwargs)
+
+    if reloaded_array_reuse_mode == "link":
+        def FUN(src, dest):
+            try:
+                os.link(src, dest)
+            except:
+                shutil.copyfile(src, dest)
+    elif reloaded_array_reuse_mode == "symlink":
+        def FUN(src, dest):
+            try:
+                os.symlink(src, dest)
+            except:
+                shutil.copyfile(src, dest)
+    elif reloaded_array_reuse_mode == "copy":
+        FUN = shutil.copyfile
+    else:
+        raise ValueError("invalid reuse mode '" + reloaded_array_reuse_mode + "'")
+
+    for root, dirs, files in os.walk(x.path):
+        newpath = os.path.join(path, os.path.relpath(root, x.path))
+        os.makedirs(newpath)
+        for f in files:
+            FUN(os.path.join(root, f), os.path.join(newpath, f))
diff --git a/src/dolomite_matrix/WrapperArraySeed.py b/src/dolomite_matrix/WrapperArraySeed.py
new file mode 100644
index 0000000..0066d37
--- /dev/null
+++ b/src/dolomite_matrix/WrapperArraySeed.py
@@ -0,0 +1,73 @@
+from typing import Tuple, Sequence, Optional
+import numpy
+import delayedarray
+
+
+class WrapperArraySeed:
+    """
+    Wrapper for a DelayedArray seed, which forwards all of the required
+    operations to the seed object. This is expected to be used as a base for
+    concrete subclasses that attach more provenance-tracking information - see
+    :py:class:`~dolomite_base.ReloadedArray.ReloadedArray` for an example.
+    """
+
+    def __init__(self, seed):
+        """
+        Args:
+            seed: The underlying seed instance to be wrapped.
+        """
+        self._seed = seed
+
+    @property
+    def seed(self):
+        """
+        Returns:
+            The underlying seed instance.
+        """
+        return self._seed
+
+    @property
+    def shape(self) -> Tuple[int, ...]:
+        """
+        Returns:
+            The shape of the seed.
+        """
+        return self._seed.shape
+
+    @property
+    def dtype(self) -> numpy.dtype:
+        """
+        Returns:
+            The type of the seed.
+        """
+        return self._seed.dtype
+
+
+@delayedarray.is_sparse.register
+def is_sparse_WrapperArraySeed(x: WrapperArraySeed) -> bool:
+    """See :py:func:`~delayedarray.is_sparse.is_sparse` for details."""
+    return delayedarray.is_sparse(x._seed)
+
+
+@delayedarray.chunk_shape.register
+def chunk_shape_WrapperArraySeed(x: WrapperArraySeed) -> Tuple[int, ...]:
+    """See :py:func:`~delayedarray.chunk_shape.chunk_shape` for details."""
+    return delayedarray.chunk_shape(x._seed)
+
+
+@delayedarray.extract_dense_array.register
+def extract_dense_array_WrapperArraySeed(x: WrapperArraySeed, subset: Optional[Tuple[Sequence[int], ...]] = None) -> numpy.ndarray:
+    """See :py:func:`~delayedarray.extract_dense_array.extract_dense_array` for details."""
+    return delayedarray.extract_dense_array(x._seed, subset)
+
+
+@delayedarray.extract_sparse_array.register
+def extract_sparse_array_WrapperArraySeed(x: WrapperArraySeed, subset: Optional[Tuple[Sequence[int], ...]] = None) -> delayedarray.SparseNdarray:
+    """See :py:func:`~delayedarray.extract_sparse_array.extract_sparse_array` for details."""
+    return delayedarray.extract_sparse_array(x._seed, subset)
+
+
+@delayedarray.create_dask_array.register
+def create_dask_array_WrapperArraySeed(x: WrapperArraySeed):
+    """See :py:func:`~delayedarray.create_dask_array.create_dask_array` for details."""
+    return delayedarray.create_dask_array(x._seed)
diff --git a/src/dolomite_matrix/__init__.py b/src/dolomite_matrix/__init__.py
index 6ce14bc..fa06f3e 100644
--- a/src/dolomite_matrix/__init__.py
+++ b/src/dolomite_matrix/__init__.py
@@ -16,10 +16,13 @@
     del version, PackageNotFoundError
 
 
-from .choose_dense_chunk_sizes import choose_dense_chunk_sizes
-from .stage_ndarray import stage_ndarray
-from .stage_DelayedArray import stage_DelayedArray
-from .load_hdf5_dense_array import load_hdf5_dense_array
-from .write_sparse_matrix import write_sparse_matrix
-from .stage_sparse import *
-from .load_hdf5_sparse_matrix import load_hdf5_sparse_matrix
+from .choose_chunk_dimensions import choose_chunk_dimensions
+from .save_dense_array import save_dense_array_from_ndarray 
+from .read_dense_array import read_dense_array
+from .save_compressed_sparse_matrix import *
+from .read_compressed_sparse_matrix import read_compressed_sparse_matrix
+from .save_delayed_array import save_delayed_array
+
+from .DelayedMask import DelayedMask
+from .WrapperArraySeed import WrapperArraySeed
+from .ReloadedArray import ReloadedArray, ReloadedArraySeed
diff --git a/src/dolomite_matrix/_optimize_storage.py b/src/dolomite_matrix/_optimize_storage.py
new file mode 100644
index 0000000..68bc01b
--- /dev/null
+++ b/src/dolomite_matrix/_optimize_storage.py
@@ -0,0 +1,624 @@
+from delayedarray import SparseNdarray, is_sparse, apply_over_blocks
+from typing import Callable, Any, Optional, Union, Tuple
+from functools import singledispatch, reduce
+from collections import namedtuple
+from dataclasses import dataclass
+import numpy
+import dolomite_base as dl
+import h5py
+
+
+has_scipy = False
+try:
+    import scipy.sparse
+    has_scipy = True
+except:
+    pass
+
+
+def _aggregate_any(collated: list, name: str):
+    for y in collated:
+        val = getattr(y, name)
+        if val is not None and val:
+            return True
+    return False
+
+
+def _aggregate_min(collated: list, name: str):
+    mval = None
+    for y in collated:
+        val = getattr(y, name)
+        if val is not None:
+            if mval is None or mval > val:
+                mval = val
+    return mval
+
+
+def _aggregate_max(collated: list, name: str):
+    mval = None
+    for y in collated:
+        val = getattr(y, name)
+        if val is not None:
+            if mval is None or mval < val:
+                mval = val
+    return mval
+
+
+def _aggregate_sum(collated: list, name: str):
+    mval = 0
+    for y in collated:
+        val = getattr(y, name)
+        if val is not None:
+            mval += val
+    return mval
+
+
+def _blockwise_any(x: numpy.ndarray, condition: Callable):
+    y = x.ravel()
+    step = 100000
+    limit = len(y)
+    for i in range(0, limit, step):
+        if condition(y[i : min(limit, i + step)]).any():
+            return True
+    return False 
+
+
+
+def _collect_from_Sparse2darray(contents, fun: Callable, dtype: Callable):
+    if contents is None:
+        attrs = fun(numpy.array([], dtype=dtype))
+        attrs.non_zero = 0
+        return [attrs]
+
+    output = []
+    for i, node in enumerate(contents):
+        if node is None:
+            val = numpy.array([], dtype=dtype)
+        else:
+            val = node[1]
+        attrs = fun(val)
+        attrs.non_zero = len(val)
+        output.append(attrs)
+
+    return output
+
+
+_OptimizedStorageParameters = namedtuple("_OptimizedStorageParameters", ["type", "placeholder", "non_zero"])
+
+
+def _unique_values_from_ndarray(position: Tuple, contents: numpy.ndarray) -> set:
+    if not numpy.ma.is_masked(contents):
+        return set(contents)
+    output = set()
+    for y in contents:
+        if not numpy.ma.is_masked(y):
+            output.add(y)
+    return output
+
+
+def _unique_values_from_Sparse2darray(position: Tuple, contents: SparseNdarray) ->set:
+    output = set()
+    if contents is not None:
+        for i, node in enumerate(contents):
+            if node is not None:
+                output |= _unique_values_from_ndarray(node[[2]])
+    return output
+
+
+def _unique_values(x) -> set:
+    if is_sparse(x):
+        uniq_sets = apply_over_blocks(x, _unique_values_from_Sparse2darray, allow_sparse=True)
+    else:
+        uniq_sets = apply_over_blocks(x, _unique_values_from_ndarray)
+    return reduce(lambda a, b : a | b, uniq_sets)
+
+
+###################################################
+###################################################
+
+
+@singledispatch
+def collect_integer_attributes(x: Any):
+    if is_sparse(x):
+        collated = apply_over_blocks(x, lambda pos, block : _collect_integer_attributes_from_Sparse2darray(block), allow_sparse=True)
+    else:
+        collated = apply_over_blocks(x, lambda pos, block : _collect_integer_attributes_from_ndarray(block))
+    return _combine_integer_attributes(collated)
+
+
+@dataclass
+class _IntegerAttributes:
+    minimum: Optional[int]
+    maximum: Optional[int]
+    missing: bool
+    non_zero: int = 0
+
+
+def _simple_integer_collector(x: numpy.ndarray) -> _IntegerAttributes:
+    if x.size == 0:
+        return _IntegerAttributes(minimum = None, maximum = None, missing = False)
+
+    missing = False
+    if numpy.ma.is_masked(x):
+        if x.mask.all():
+            return _IntegerAttributes(minimum = None, maximum = None, missing = True)
+        if x.mask.any():
+            missing = True
+
+    return _IntegerAttributes(minimum=x.min(), maximum=x.max(), missing=missing)
+
+
+def _combine_integer_attributes(x: list[_IntegerAttributes]):
+    return _IntegerAttributes(
+        minimum=_aggregate_min(x, "minimum"),
+        maximum=_aggregate_max(x, "maximum"),
+        missing=_aggregate_any(x, "missing"),
+        non_zero=_aggregate_sum(x, "non_zero"),
+    )
+
+
+@collect_integer_attributes.register
+def _collect_integer_attributes_from_ndarray(x: numpy.ndarray) -> _IntegerAttributes:
+    return _simple_integer_collector(x)
+
+
+@collect_integer_attributes.register
+def _collect_integer_attributes_from_Sparse2darray(x: SparseNdarray) -> _IntegerAttributes:
+    collected = _collect_from_Sparse2darray(x.contents, _simple_integer_collector, x.dtype)
+    return _combine_integer_attributes(collected)
+
+
+if has_scipy:
+    @collect_integer_attributes.register
+    def _collect_integer_attributes_from_scipy_csc(x: scipy.sparse.csc_matrix):
+        output = _simple_integer_collector(x.data)
+        output.non_zero = int(x.data.shape[0])
+        return output
+
+
+    @collect_integer_attributes.register
+    def _collect_integer_attributes_from_scipy_csr(x: scipy.sparse.csr_matrix):
+        output = _simple_integer_collector(x.data)
+        output.non_zero = int(x.data.shape[0])
+        return output
+
+
+    @collect_integer_attributes.register
+    def _collect_integer_attributes_from_scipy_coo(x: scipy.sparse.coo_matrix):
+        output = _simple_integer_collector(x.data)
+        output.non_zero = int(x.data.shape[0])
+        return output
+
+
+def optimize_integer_storage(x) -> _OptimizedStorageParameters:
+    attr = collect_integer_attributes(x)
+    lower = attr.minimum
+    upper = attr.maximum
+    has_missing = attr.missing
+
+    if has_missing:
+        # If it's None, that means that there are only missing values in
+        # 'x', otherwise there should have been at least one finite value
+        # available. In any case, it means we can just do whatever we want so
+        # we'll just use the smallest type.
+        if lower is None:
+            return _OptimizedStorageParameters(type="i1", placeholder=-2**7, non_zero=attr.non_zero)
+
+        if lower < 0:
+            if lower > -2**7 and upper < 2**7:
+                return _OptimizedStorageParameters(type="i1", placeholder=-2**7, non_zero=attr.non_zero)
+            elif lower > -2**15 and upper < 2**15:
+                return _OptimizedStorageParameters(type="i2", placeholder=-2**15, non_zero=attr.non_zero)
+            elif lower > -2**31 and upper < 2**31:
+                return _OptimizedStorageParameters(type="i4", placeholder=-2**31, non_zero=attr.non_zero)
+        else: 
+            if upper < 2**8 - 1:
+                return _OptimizedStorageParameters(type="u1", placeholder=2**8-1, non_zero=attr.non_zero)
+            elif upper < 2**16 - 1:
+                return _OptimizedStorageParameters(type="u2", placeholder=2**16-1, non_zero=attr.non_zero)
+            elif upper < 2**31 - 1: # Yes, this is deliberate, as integer storage maxes out at 32-bit signed integers.
+                return _OptimizedStorageParameters(type="i4", placeholder=2**31-1, non_zero=attr.non_zero)
+
+        return _OptimizedStorageParameters(type="f8", placeholder=numpy.NaN, non_zero=attr.non_zero)
+
+    else:
+        # If it's infinite, that means that 'x' is of length zero, otherwise
+        # there should have been at least one finite value available. Here,
+        # the type doesn't matter, so we'll just use the smallest. 
+        if lower is None:
+            return _OptimizedStorageParameters(type="i1", placeholder=None, non_zero=attr.non_zero)
+
+        if lower < 0:
+            if lower >= -2**7 and upper < 2**7:
+                return _OptimizedStorageParameters(type="i1", placeholder=None, non_zero=attr.non_zero)
+            elif lower >= -2**15 and upper < 2**15:
+                return _OptimizedStorageParameters(type="i2", placeholder=None, non_zero=attr.non_zero)
+            elif lower >= -2**31 and upper < 2**31:
+                return _OptimizedStorageParameters(type="i4", placeholder=None, non_zero=attr.non_zero)
+        else:
+            if upper < 2**8:
+                return _OptimizedStorageParameters(type="u1", placeholder=None, non_zero=attr.non_zero)
+            elif upper < 2**16:
+                return _OptimizedStorageParameters(type="u2", placeholder=None, non_zero=attr.non_zero)
+            elif upper < 2**31: # Yes, this is deliberate, as integer storage maxes out at 32-bit signed integers.
+                return _OptimizedStorageParameters(type="i4", placeholder=None, non_zero=attr.non_zero)
+
+        return _OptimizedStorageParameters(type="f8", placeholder=None, non_zero=attr.non_zero)
+
+
+###################################################
+###################################################
+
+
+@dataclass
+class _FloatAttributes:
+    minimum: Optional[int]
+    maximum: Optional[int]
+    missing: bool
+    non_integer: bool
+    has_nan: bool
+    has_positive_inf: bool
+    has_negative_inf: bool
+    has_zero: Optional[bool]
+    has_lowest: Optional[bool]
+    has_highest: Optional[bool]
+    non_zero: int = 0
+
+
+@singledispatch
+def collect_float_attributes(x: Any, no_missing: bool) -> _FloatAttributes:
+    if is_sparse(x):
+        collated = apply_over_blocks(x, lambda pos, block : _collect_float_attributes_from_Sparse2darray(block, no_missing), allow_sparse=True)
+    else:
+        collated = apply_over_blocks(x, lambda pos, block : _collect_float_attributes_from_ndarray(block, no_missing))
+    return _combine_float_attributes(collated)
+
+
+def _simple_float_collector(x: numpy.ndarray, no_missing: bool) -> _FloatAttributes:
+    output = _FloatAttributes(
+        minimum = None,
+        maximum = None,
+        missing = False,
+        non_integer = False,
+        has_nan = False,
+        has_positive_inf = False,
+        has_negative_inf = False,
+        has_zero = None,
+        has_lowest = None,
+        has_highest = None 
+    )
+
+    if x.size == 0:
+        return output
+
+    if not no_missing:
+        if numpy.ma.is_masked(x):
+            if x.mask.all():
+                output.missing = True
+                return output
+            if x.mask.any():
+                output.missing = True
+
+    # While these are technically only used if there are missing values, we
+    # still need them to obtain 'non_integer', so we just compute them.
+    output.has_nan = _blockwise_any(x, numpy.isnan)
+    output.has_positive_inf = numpy.inf in x
+    output.has_negative_inf = -numpy.inf in x
+
+    if output.has_nan or output.has_positive_inf or output.has_negative_inf:
+        output.non_integer = True
+    else:
+        output.non_integer = _blockwise_any(x, lambda b : (b % 1 != 0))
+
+    # Minimum and maximum are only used if all floats contain integers.
+    if not output.non_integer:
+        output.minimum = x.min() 
+        output.maximum = x.max()
+
+    # Highest/lowest are only used when there might be missing values.
+    if not no_missing:
+        fstats = numpy.finfo(x.dtype)
+        output.has_lowest = fstats.min in x
+        output.has_highest = fstats.max in x
+        output.has_zero = 0 in x
+
+    return output
+
+
+@collect_float_attributes.register
+def _collect_float_attributes_from_ndarray(x: numpy.ndarray, no_missing: bool) -> _FloatAttributes:
+    return _simple_float_collector(x, no_missing)
+
+
+@collect_float_attributes.register
+def _collect_float_attributes_from_Sparse2darray(x: SparseNdarray, no_missing: bool) -> _FloatAttributes:
+    collected = _collect_from_Sparse2darray(x.contents, lambda block : _simple_float_collector(block, no_missing), x.dtype)
+    return _combine_float_attributes(collected)
+
+
+if has_scipy:
+    @collect_float_attributes.register
+    def _collect_float_attributes_from_scipy_csc(x: scipy.sparse.csc_matrix, no_missing: bool):
+        output = _simple_float_collector(x.data, no_missing)
+        output.non_zero = int(x.data.shape[0])
+        return output
+
+
+    @collect_float_attributes.register
+    def _collect_float_attributes_from_scipy_csr(x: scipy.sparse.csr_matrix, no_missing: bool):
+        output = _simple_float_collector(x.data, no_missing)
+        output.non_zero = int(x.data.shape[0])
+        return output
+
+
+    @collect_float_attributes.register
+    def _collect_float_attributes_from_scipy_coo(x: scipy.sparse.coo_matrix, no_missing: bool):
+        output = _simple_float_collector(x.data, no_missing)
+        output.non_zero = int(x.data.shape[0])
+        return output
+
+
+def _combine_float_attributes(x: list[_FloatAttributes]) -> _FloatAttributes:
+    return _FloatAttributes(
+        minimum=_aggregate_min(x, "minimum"),
+        maximum=_aggregate_max(x, "maximum"),
+        non_integer=_aggregate_any(x, "non_integer"),
+        missing=_aggregate_any(x, "missing"),
+        has_nan=_aggregate_any(x, "has_nan"),
+        has_positive_inf=_aggregate_any(x, "has_positive_inf"),
+        has_negative_inf=_aggregate_any(x, "has_negative_inf"),
+        has_lowest=_aggregate_any(x, "has_lowest"),
+        has_highest=_aggregate_any(x, "has_highest"),
+        has_zero=_aggregate_any(x, "has_zero"),
+        non_zero=_aggregate_sum(x, "non_zero"),
+    )
+
+
+def optimize_float_storage(x) -> _OptimizedStorageParameters:
+    attr = collect_float_attributes(x, isinstance(x, numpy.ndarray) and not numpy.ma.is_masked(x))
+
+    if attr.missing:
+        if not attr.non_integer:
+            lower = attr.minimum
+            upper = attr.maximum
+
+            # See logic in optimize_integer_storage().
+            if lower is None:
+                return _OptimizedStorageParameters(type="i1", placeholder=-2**7, non_zero=attr.non_zero)
+
+            if lower < 0:
+                if lower > -2**7 and upper < 2**7:
+                    return _OptimizedStorageParameters(type="i1", placeholder=-2**7, non_zero=attr.non_zero)
+                elif lower > -2**15 and upper < 2**15:
+                    return _OptimizedStorageParameters(type="i2", placeholder=-2**15, non_zero=attr.non_zero)
+                elif lower > -2**31 and upper < 2**31:
+                    return _OptimizedStorageParameters(type="i4", placeholder=-2**31, non_zero=attr.non_zero)
+            else: 
+                if upper < 2**8 - 1:
+                    return _OptimizedStorageParameters(type="u1", placeholder=2**8-1, non_zero=attr.non_zero)
+                elif upper < 2**16 - 1:
+                    return _OptimizedStorageParameters(type="u2", placeholder=2**16-1, non_zero=attr.non_zero)
+                elif upper < 2**32 - 1: 
+                    return _OptimizedStorageParameters(type="u4", placeholder=2**32-1, non_zero=attr.non_zero)
+
+        placeholder = None
+        if not attr.has_nan:
+            placeholder = numpy.NaN
+        elif not attr.has_positive_inf:
+            placeholder = numpy.inf
+        elif not attr.has_negative_inf:
+            placeholder = -numpy.inf
+        elif not attr.has_lowest:
+            placeholder = numpy.finfo(x.dtype).min
+        elif not attr.has_highest:
+            placeholder = numpy.finfo(x.dtype).max
+        elif not attr.has_zero:
+            placeholder = 0
+
+        # Fallback that just goes through and pulls out all unique values.
+        # This does involve a coercion to 64-bit floats, though; that's 
+        # just how 'choose_missing_float_placeholder' works currently.
+        if placeholder is None:
+            uniq = _unique_values(x)
+            placeholder = dl.choose_missing_float_placeholder(uniq)
+            return _OptimizedStorageParameters(type="f8", placeholder=placeholder, non_zero=attr.non_zero)
+
+        if x.dtype == numpy.float32:
+            return _OptimizedStorageParameters(type="f4", placeholder=placeholder, non_zero=attr.non_zero)
+        else:
+            return _OptimizedStorageParameters(type="f8", placeholder=placeholder, non_zero=attr.non_zero)
+
+    else:
+        if not attr.non_integer:
+            lower = attr.minimum
+            upper = attr.maximum
+
+            # See logic in optimize_integer_storage().
+            if lower is None:
+                return _OptimizedStorageParameters(type="i1", placeholder=None, non_zero=attr.non_zero)
+
+            if lower < 0:
+                if lower >= -2**7 and upper < 2**7:
+                    return _OptimizedStorageParameters(type="i1", placeholder=None, non_zero=attr.non_zero)
+                elif lower >= -2**15 and upper < 2**15:
+                    return _OptimizedStorageParameters(type="i2", placeholder=None, non_zero=attr.non_zero)
+                elif lower >= -2**31 and upper < 2**31:
+                    return _OptimizedStorageParameters(type="i4", placeholder=None, non_zero=attr.non_zero)
+            else:
+                if upper < 2**8:
+                    return _OptimizedStorageParameters(type="u1", placeholder=None, non_zero=attr.non_zero)
+                elif upper < 2**16:
+                    return _OptimizedStorageParameters(type="u2", placeholder=None, non_zero=attr.non_zero)
+                elif upper < 2**32: 
+                    return _OptimizedStorageParameters(type="u4", placeholder=None, non_zero=attr.non_zero)
+
+        if x.dtype == numpy.float32:
+            return _OptimizedStorageParameters(type="f4", placeholder=None, non_zero=attr.non_zero)
+        else:
+            return _OptimizedStorageParameters(type="f8", placeholder=None, non_zero=attr.non_zero)
+
+
+###################################################
+###################################################
+
+
+@dataclass
+class _StringAttributes:
+    has_na1: bool
+    has_na2: bool
+    missing: bool
+    max_len: int
+
+
+def _simple_string_collector(x: numpy.ndarray) -> _FloatAttributes:
+    if x.size == 0:
+        return _StringAttributes(
+            has_na1 = False,
+            has_na2 = False,
+            missing = False,
+            max_len = 0,
+        )
+
+    missing = False
+    if numpy.ma.is_masked(x):
+        if x.mask.all():
+            return _StringAttributes(
+                has_na1=False,
+                has_na2=False,
+                missing=True,
+                max_len=0,
+            )
+        if x.mask.any():
+            missing = True
+
+    max_len = 0
+    if missing:
+        for y in x.ravel():
+            if not numpy.ma.is_masked(y):
+                candidate = len(y.encode("UTF8"))
+                if max_len < candidate:
+                    max_len = candidate
+    else:
+        max_len = max(len(y.encode("UTF8")) for y in x.ravel())
+
+    return _StringAttributes(
+        has_na1=x.dtype.type("NA") in x,
+        has_na2=x.dtype.type("NA_") in x,
+        missing=missing,
+        max_len=max_len,
+    )
+
+
+@singledispatch
+def collect_string_attributes(x: Any) -> _StringAttributes:
+    collected = apply_over_blocks(x, lambda pos, block : _collect_string_attributes_from_ndarray(block))
+    return _combine_string_attributes(collected)
+
+
+def _combine_string_attributes(x: list[_StringAttributes]) -> _StringAttributes:
+    return _StringAttributes(
+        has_na1 = _aggregate_any(x, "has_na1"),
+        has_na2 = _aggregate_any(x, "has_na2"),
+        missing = _aggregate_any(x, "missing"),
+        max_len = _aggregate_max(x, "max_len"),
+    )
+
+
+@collect_string_attributes.register
+def _collect_string_attributes_from_ndarray(x: numpy.ndarray) -> _StringAttributes:
+    return _simple_string_collector(x)
+
+
+def optimize_string_storage(x) -> _OptimizedStorageParameters:
+    attr = collect_string_attributes(x)
+    attr.max_len = max(1, attr.max_len)
+
+    placeholder = None
+    if attr.missing:
+        if not attr.has_na1:
+            placeholder = "NA"
+        elif not attr.has_na2:
+            placeholder = "NA_"
+        else:
+            uniq = _unique_values(x)
+            placeholder = dl.choose_missing_string_placeholder(uniq)
+        attr.max_len = max(len(placeholder.encode("UTF8")), attr.max_len)
+
+    strtype = h5py.string_dtype(encoding = "utf8", length = attr.max_len)
+    return _OptimizedStorageParameters(type = strtype, placeholder = placeholder, non_zero = 0)
+
+
+###################################################
+###################################################
+
+
+@dataclass
+class _BooleanAttributes:
+    missing: bool
+    non_zero: int = 0
+
+
+@singledispatch
+def collect_boolean_attributes(x: Any) -> _BooleanAttributes:
+    if is_sparse(x):
+        collated = apply_over_blocks(x, lambda pos, block : _collect_boolean_attributes_from_Sparse2darray(block), allow_sparse=True)
+    else:
+        collated = apply_over_blocks(x, lambda pos, block : _collect_boolean_attributes_from_ndarray(block))
+    return _combine_boolean_attributes(collated)
+
+
+@collect_boolean_attributes.register
+def _collect_boolean_attributes_from_ndarray(x: numpy.ndarray) -> _BooleanAttributes:
+    return _simple_boolean_collector(x)
+
+
+@collect_boolean_attributes.register
+def _collect_boolean_attributes_from_Sparse2darray(x: SparseNdarray) -> _BooleanAttributes:
+    collected = _collect_from_Sparse2darray(x.contents, _simple_boolean_collector, x.dtype)
+    return _combine_boolean_attributes(collected)
+
+
+def _simple_boolean_collector(x: numpy.ndarray) -> _BooleanAttributes:
+    missing = False
+    if x.size:
+        if numpy.ma.is_masked(x):
+            if x.mask.any():
+                missing = True
+    return _BooleanAttributes(non_zero = 0, missing = missing)
+
+
+def _combine_boolean_attributes(x: list[_BooleanAttributes]) -> _BooleanAttributes:
+    return _BooleanAttributes(
+        missing = _aggregate_any(x, "missing"),
+        non_zero = _aggregate_sum(x, "non_zero")
+    )
+
+
+if has_scipy:
+    @collect_boolean_attributes.register
+    def _collect_boolean_attributes_from_scipy_csc(x: scipy.sparse.csc_matrix):
+        output = _simple_boolean_collector(x.data)
+        output.non_zero = int(x.data.shape[0])
+        return output
+
+
+    @collect_boolean_attributes.register
+    def _collect_boolean_attributes_from_scipy_csr(x: scipy.sparse.csr_matrix):
+        output = _simple_boolean_collector(x.data)
+        output.non_zero = int(x.data.shape[0])
+        return output
+
+
+    @collect_boolean_attributes.register
+    def _collect_boolean_attributes_from_scipy_coo(x: scipy.sparse.coo_matrix):
+        output = _simple_boolean_collector(x.data)
+        output.non_zero = int(x.data.shape[0])
+        return output
+
+
+def optimize_boolean_storage(x) -> _OptimizedStorageParameters:
+    attr = collect_boolean_attributes(x)
+    if attr.missing:
+        return _OptimizedStorageParameters(type="i1", placeholder=-1, non_zero=attr.non_zero)
+    else:
+        return _OptimizedStorageParameters(type="i1", placeholder=None, non_zero=attr.non_zero)
diff --git a/src/dolomite_matrix/_utils.py b/src/dolomite_matrix/_utils.py
index fbeaa34..ea1441e 100644
--- a/src/dolomite_matrix/_utils.py
+++ b/src/dolomite_matrix/_utils.py
@@ -1,48 +1,11 @@
-from numpy import issubdtype, integer, floating, bool_, uint8, prod, ceil
-from h5py import File
-from typing import Tuple
-from delayedarray import DelayedArray, chunk_shape
-
-
-def _translate_array_type(dtype):
-    if issubdtype(dtype, integer):
-        return "integer"
-    if issubdtype(dtype, floating):
-        return "number"
-    if dtype == bool_:
-        return "boolean"
-    raise NotImplementedError("staging of '" + str(type(dtype)) + "' arrays not yet supported")
-
-
-def _choose_file_dtype(dtype):
-    if dtype == bool_:
-        return uint8
-    return dtype
-
-
-def _open_writeable_hdf5_handle(path: str, cache_size: int, num_slots: int = 1000003): 
-    # using a prime for the number of slots to avoid collisions in the cache.
-    return File(path, "w", rdcc_nbytes = cache_size, rdcc_nslots = num_slots)
-
-
-def _choose_block_shape(x: DelayedArray, block_size: int) -> Tuple[int, ...]:
-    # Block shapes are calculated by scaling up the chunks (from last to first,
-    # i.e., the fastest changing to the slowest) until the block size is exceeded.
-    full_shape = x.shape
-    ndim = len(full_shape)
-
-    block_shape = list(chunk_shape(x))
-    block_elements = int(block_size / x.dtype.itemsize)
-
-    for i in range(ndim - 1, -1, -1):
-        current_elements = prod(block_shape) # just recompute it, avoid potential overflow issues.
-        if current_elements >= block_elements:
-            break
-        scaling = int(block_elements / current_elements)
-        if scaling == 1:
-            break
-        block_shape[i] = min(full_shape[i], scaling * block_shape[i])
-
-    return (*block_shape,)
+import numpy
 
 
+def sanitize_for_writing(x, placeholder):
+    if not numpy.ma.is_masked(x):
+        return x
+    if not x.mask.any():
+        return vals.data
+    copy = x.data.copy()
+    copy[mask] = placeholder
+    return copy
diff --git a/src/dolomite_matrix/choose_dense_chunk_sizes.py b/src/dolomite_matrix/choose_chunk_dimensions.py
similarity index 85%
rename from src/dolomite_matrix/choose_dense_chunk_sizes.py
rename to src/dolomite_matrix/choose_chunk_dimensions.py
index bb5d6e6..dbf19ea 100644
--- a/src/dolomite_matrix/choose_dense_chunk_sizes.py
+++ b/src/dolomite_matrix/choose_chunk_dimensions.py
@@ -1,8 +1,9 @@
-from typing import Tuple
+from typing import Tuple, Any
 
 
-def choose_dense_chunk_sizes(shape: Tuple[int, ...], size: int, min_extent: int = 100, memory: int = 1e7) -> Tuple[int, ...]:
-    """Choose some chunk sizes to use for a dense HDF5 dataset. For each
+def choose_chunk_dimensions(shape: Tuple[int, ...], size: int, min_extent: int = 100, memory: int = 1e7) -> Tuple[int, ...]:
+    """
+    Choose chunk dimensions to use for a dense HDF5 dataset. For each
     dimension, we consider a slice of the array that consists of the full
     extent of all other dimensions. We want this slice to occupy less than
     ``memory`` in memory, and we resize the slice along the current dimension
diff --git a/src/dolomite_matrix/load_hdf5_dense_array.py b/src/dolomite_matrix/load_hdf5_dense_array.py
deleted file mode 100644
index 2dd395c..0000000
--- a/src/dolomite_matrix/load_hdf5_dense_array.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from typing import Any
-from numpy import bool_
-from filebackedarray import Hdf5DenseArray
-from dolomite_base import acquire_file
-
-
-def load_hdf5_dense_array(meta: dict[str, Any], project, **kwargs) -> Hdf5DenseArray:
-    """
-    Load a HDF5-backed dense array.  In general, this function should be
-    called via :py:meth:`~dolomite_base.load_object.load_object`.
-
-    Args:
-        meta: Metadata for this HDF5 array.
-
-        project: Value specifying the project of interest. This is most
-            typically a string containing a file path to a staging directory
-            but may also be an application-specific object that works with
-            :py:meth:`~dolomite_base.acquire_file.acquire_file`.
-
-        kwargs: Further arguments, ignored.
-
-    Returns:
-        A HDF5-backed dense array.
-    """
-    fpath = acquire_file(project, meta["path"])
-    name = meta["hdf5_dense_array"]["dataset"]
-
-    dtype = None
-    if meta["array"]["type"] == "boolean":
-        dtype = bool_
-
-    return Hdf5DenseArray(fpath, name, dtype=dtype, native_order=False)
diff --git a/src/dolomite_matrix/load_hdf5_sparse_matrix.py b/src/dolomite_matrix/load_hdf5_sparse_matrix.py
deleted file mode 100644
index 99b4dae..0000000
--- a/src/dolomite_matrix/load_hdf5_sparse_matrix.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from typing import Any
-from numpy import bool_
-from filebackedarray import Hdf5CompressedSparseMatrix
-from dolomite_base import acquire_file
-
-
-def load_hdf5_sparse_matrix(meta: dict[str, Any], project, **kwargs) -> Hdf5CompressedSparseMatrix:
-    """
-    Load a HDF5-backed sparse matrix. In general, this function should be
-    called via :py:meth:`~dolomite_base.load_object.load_object`.
-
-    Args:
-        meta: Metadata for this HDF5 array.
-
-        project: Value specifying the project of interest. This is most
-            typically a string containing a file path to a staging directory
-            but may also be an application-specific object that works with
-            :py:meth:`~dolomite_base.acquire_file.acquire_file`.
-
-        kwargs: Further arguments, ignored.
-
-    Returns:
-        A HDF5-backed sparse matrix.
-    """
-    fpath = acquire_file(project, meta["path"])
-    name = meta["hdf5_sparse_matrix"]["group"]
-
-    dtype = None
-    if meta["array"]["type"] == "boolean":
-        dtype = bool_
-
-    return Hdf5CompressedSparseMatrix(fpath, name, shape=(*meta["array"]["dimensions"],), by_column=True, dtype=dtype)
diff --git a/src/dolomite_matrix/read_compressed_sparse_matrix.py b/src/dolomite_matrix/read_compressed_sparse_matrix.py
new file mode 100644
index 0000000..d8bab24
--- /dev/null
+++ b/src/dolomite_matrix/read_compressed_sparse_matrix.py
@@ -0,0 +1,54 @@
+import numpy
+import os
+import h5py
+from delayedarray import DelayedArray
+from filebackedarray import Hdf5CompressedSparseMatrixSeed
+
+from .DelayedMask import DelayedMask
+from .ReloadedArray import ReloadedArray
+
+
+def read_compressed_sparse_matrix(path: str, **kwargs) -> DelayedArray:
+    """
+    Read a compressed sparse matrix from its on-disk representation. In
+    general, this function should not be called directly but instead be
+    dispatched via :py:meth:`~dolomite_base.read_object.read_object`.
+
+    Args:
+        path: Path to the directory containing the object.
+
+        kwargs: Further arguments, ignored.
+
+    Returns:
+        A HDF5-backed compressed sparse matrix.
+    """
+    fpath = os.path.join(path, "matrix.h5")
+    name = "compressed_sparse_matrix"
+
+    with h5py.File(fpath, "r") as handle:
+        ghandle = handle[name]
+        dhandle = ghandle["data"]
+
+        tt = ghandle.attrs["type"]
+        dtype = None
+        if tt == "boolean":
+            dtype = numpy.dtype("bool")
+        elif tt == "float":
+            if not numpy.issubdtype(dhandle.dtype, numpy.floating):
+                dtype = numpy.dtype("float64")
+
+        layout = ghandle.attrs["layout"]
+        shape = (*[int(y) for y in ghandle["shape"]],)
+
+        placeholder = None
+        if "missing-value-placeholder" in dhandle.attrs:
+            placeholder = dhandle.attrs["missing-value-placeholder"]
+
+    bycol = (layout == "CSC")
+    if placeholder is None:
+        seed = Hdf5CompressedSparseMatrixSeed(fpath, name, shape=shape, by_column = bycol, dtype = dtype)
+    else:
+        core = Hdf5CompressedSparseMatrixSeed(fpath, name, shape=shape, by_column = bycol)
+        seed = DelayedMask(core, placeholder=placeholder, dtype=dtype)
+
+    return ReloadedArray(seed, path)
diff --git a/src/dolomite_matrix/read_dense_array.py b/src/dolomite_matrix/read_dense_array.py
new file mode 100644
index 0000000..68b47cd
--- /dev/null
+++ b/src/dolomite_matrix/read_dense_array.py
@@ -0,0 +1,59 @@
+import numpy
+import os
+import h5py
+from delayedarray import DelayedArray
+from filebackedarray import Hdf5DenseArraySeed
+
+from .DelayedMask import DelayedMask
+from .ReloadedArray import ReloadedArray
+
+
+def read_dense_array(path: str, **kwargs) -> DelayedArray:
+    """
+    Read a dense array from its on-disk representation. In general, this
+    function should not be called directly but instead be dispatched via
+    :py:meth:`~dolomite_base.read_object.read_object`.
+
+    Args:
+        path: Path to the directory containing the object.
+
+        kwargs: Further arguments, ignored.
+
+    Returns:
+        A HDF5-backed dense array.
+    """
+    fpath = os.path.join(path, "array.h5")
+    name = "dense_array"
+
+    with h5py.File(fpath, "r") as handle:
+        ghandle = handle[name]
+        dhandle = ghandle["data"]
+
+        tt = ghandle.attrs["type"]
+        dtype = None
+        if tt == "boolean":
+            dtype = numpy.dtype("bool")
+        elif tt == "string":
+            dtype_name = "U" + str(dhandle.dtype.itemsize)
+            dtype = numpy.dtype(dtype_name)
+        elif tt == "float":
+            if not numpy.issubdtype(dhandle.dtype, numpy.floating):
+                dtype = numpy.dtype("float64")
+
+        transposed = False
+        if "transposed" in ghandle:
+            transposed = (ghandle["transposed"][()] != 0)
+
+        placeholder = None
+        if "missing-value-placeholder" in dhandle.attrs:
+            placeholder = dhandle.attrs["missing-value-placeholder"]
+
+    dname = name + "/data"
+    is_native = not transposed
+    if placeholder is None:
+        seed = Hdf5DenseArraySeed(fpath, dname, dtype=dtype, native_order=is_native)
+    else: 
+        core = Hdf5DenseArraySeed(fpath, dname, native_order=is_native)
+        seed = DelayedMask(core, placeholder=placeholder, dtype=dtype)
+
+    return ReloadedArray(seed, path)
diff --git a/src/dolomite_matrix/save_compressed_sparse_matrix.py b/src/dolomite_matrix/save_compressed_sparse_matrix.py
new file mode 100644
index 0000000..2a0b01a
--- /dev/null
+++ b/src/dolomite_matrix/save_compressed_sparse_matrix.py
@@ -0,0 +1,327 @@
+from typing import Any
+from functools import singledispatch
+from dolomite_base import save_object, validate_saves
+from delayedarray import SparseNdarray, chunk_shape
+import os
+import h5py
+import numpy
+import delayedarray
+
+from . import _utils as ut
+from . import _optimize_storage as optim
+
+
+has_scipy = False
+try:
+    import scipy.sparse
+    has_scipy = True
+except:
+    pass
+
+
+def _choose_index_type(n: int) -> str:
+    if n < 2**8:
+        return "u1"
+    elif n < 2**16:
+        return "u2"
+    elif n < 2**32: 
+        return "u4"
+    else:
+        return "u8"
+
+
+###############################################
+###############################################
+
+
+@singledispatch
+def _h5_write_sparse_matrix(x: Any, handle, details, compressed_sparse_matrix_buffer_size, compressed_sparse_matrix_chunk_size):
+    chunks = chunk_shape(x)
+    chunks_per_col = x.shape[0] / chunks[0]
+    chunks_per_row = x.shape[1] / chunks[1]
+
+    # If we have to extract fewer chunks per column, we iterate by column to
+    # create a CSC matrix. Otherwise we make a CSR matrix.
+    if chunks_per_col < chunks_per_row:
+        primary = 1
+        secondary = 0
+        handle.attrs["layout"] = "CSC"
+    else:
+        primary = 0
+        secondary = 1
+        handle.attrs["layout"] = "CSR"
+
+    compressed_sparse_matrix_chunk_size = min(compressed_sparse_matrix_chunk_size, details.non_zero)
+    dhandle = handle.create_dataset("data", shape = details.non_zero, dtype = details.type, compression = "gzip", chunks = compressed_sparse_matrix_chunk_size)
+    if details.placeholder is not None:
+        dhandle.create("missing-value-placeholder", data = details.placeholder, dtype = details.dtype)
+
+    itype = _choose_index_type(x.shape[secondary])
+    ihandle = handle.create_dataset("indices", shape = details.non_zero, dtype = itype, compression = "gzip", chunks = compressed_sparse_matrix_chunk_size)
+    indptrs = numpy.zeros(x.shape[primary] + 1, dtype = numpy.uint64)
+    counter = 0
+
+    block_size = delayedarray.choose_block_size_for_1d_iteration(x, dimension=primary, memory=compressed_sparse_matrix_buffer_size)
+    limit = x.shape[primary]
+    subset = [None] * 2
+    subset[secondary] = range(x.shape[secondary])
+
+    for start in range(0, limit, block_size):
+        end = min(limit, start + block_size)
+        subset[primary] = range(start, end)
+        block = delayedarray.extract_sparse_array(x, (*subset,))
+
+        if block.contents is not None:
+            # Sparse2darrays are always CSC, so if we need to save it as CSR,
+            # we transpose it before we extract the contents.
+            if primary == 0:
+                block = block.T
+
+            original = counter
+            icollected = []
+            dcollected = []
+
+            for i, b in enumerate(block.contents):
+                if b is not None:
+                    counter += len(b[0])
+                    icollected.append(b[0])
+                    dcollected.append(ut.sanitize_for_writing(b[1], details.placeholder))
+                indptrs[start + i + 1] = counter
+
+            # Collecting everything in memory for a single write operation, avoid
+            # potential issues with writing/reloading partial chunks. 
+            ihandle[original : counter] = numpy.concatenate(icollected)
+            dhandle[original : counter] = numpy.concatenate(dcollected)
+
+    handle.create_dataset("indptr", data=indptrs, dtype="u8", compression="gzip", chunks=True)
+
+
+if has_scipy:
+    def _write_compressed_sparse_matrix(x: Any, handle, details, compressed_sparse_matrix_buffer_size, compressed_sparse_matrix_chunk_size, by_column): 
+        if by_column:
+            primary = 1
+            secondary = 0
+            handle.attrs["layout"] = "CSC"
+        else:
+            primary = 0
+            secondary = 1
+            handle.attrs["layout"] = "CSR"
+
+        compressed_sparse_matrix_chunk_size = min(compressed_sparse_matrix_chunk_size, details.non_zero)
+        itype = _choose_index_type(x.shape[secondary])
+        handle.create_dataset("indices", data = x.indices, dtype = itype, compression = "gzip", chunks = compressed_sparse_matrix_chunk_size)
+        handle.create_dataset("indptr", data = x.indptr, dtype = "u8", compression = "gzip", chunks = True)
+
+        if not numpy.ma.is_masked(x.data):
+            handle.create_dataset("data", data = x.data, dtype = details.type, compression = "gzip", chunks = compressed_sparse_matrix_chunk_size)
+        elif not x.mask.any():
+            handle.create_dataset("data", data = x.data.data, dtype = details.type, compression = "gzip", chunks = compressed_sparse_matrix_chunk_size)
+        else:
+            dhandle = handle.create_dataset("data", shape = details.non_zero, dtype = details.type, compression="gzip", chunks = compressed_sparse_matrix_chunk_size)
+            if details.placeholder is not None:
+                dhandle.create("missing-value-placeholder", data = details.placeholder, dtype = details.dtype)
+
+            step = max(1, int(compressed_sparse_matrix_buffer_size / compressed_sparse_matrix_chunk_size)) * compressed_sparse_matrix_chunk_size
+            for i in range(0, details.non_zero, step): 
+                end = min(details.non_zero, i + step)
+                block = x.data[i : end] # might be a view, so sanitization (and possible copying) is necessary.
+                dhandle[i : end] = ut.sanitize_for_writing(block, details.placeholder)
+
+
+    @_h5_write_sparse_matrix.register
+    def _h5_write_sparse_matrix_from_csc_matrix(x: scipy.sparse.csc_matrix, handle, details, compressed_sparse_matrix_buffer_size,  compressed_sparse_matrix_chunk_size):
+        _write_compressed_sparse_matrix(
+            x,
+            handle, 
+            details, 
+            compressed_sparse_matrix_buffer_size = compressed_sparse_matrix_buffer_size, 
+            compressed_sparse_matrix_chunk_size = compressed_sparse_matrix_chunk_size, 
+            by_column = True
+        )
+
+
+    @_h5_write_sparse_matrix.register
+    def _h5_write_sparse_matrix_from_csr_matrix(x: scipy.sparse.csr_matrix, handle, details, compressed_sparse_matrix_buffer_size, compressed_sparse_matrix_chunk_size):
+        _write_compressed_sparse_matrix(
+            x,
+            handle, 
+            details, 
+            compressed_sparse_matrix_buffer_size = compressed_sparse_matrix_buffer_size, 
+            compressed_sparse_matrix_chunk_size = compressed_sparse_matrix_chunk_size, 
+            by_column = False
+        )
+
+
+###############################################
+###############################################
+
+
+def _save_compressed_sparse_matrix(x: Any, path: str, compressed_sparse_matrix_chunk_size: int = 10000, compressed_sparse_matrix_buffer_size: int = 1e8, **kwargs):
+    os.mkdir(path)
+    if len(x.shape) != 2:
+        raise ValueError("only 2-dimensional sparse arrays are currently supported")
+
+    with h5py.File(os.path.join(path, "matrix.h5"), "w") as handle:
+        ghandle = handle.create_group("compressed_sparse_matrix")
+
+        if numpy.issubdtype(x.dtype, numpy.integer):
+            tt = "integer"
+            opts = optim.optimize_integer_storage(x)
+        elif numpy.issubdtype(x.dtype, numpy.floating):
+            tt = "number"
+            opts = optim.optimize_float_storage(x)
+        elif x.dtype == numpy.bool_:
+            tt = "boolean"
+            opts = optim.optimize_boolean_storage(x)
+        else:
+            raise NotImplementedError("cannot save sparse matrix of type '" + x.dtype.name + "'")
+
+        ghandle.attrs["type"] = tt
+        ghandle.create_dataset("shape", data = x.shape, dtype = "u8")
+        _h5_write_sparse_matrix(
+            x, 
+            handle = ghandle, 
+            details = opts, 
+            compressed_sparse_matrix_buffer_size = compressed_sparse_matrix_buffer_size,
+            compressed_sparse_matrix_chunk_size = compressed_sparse_matrix_chunk_size
+        )
+
+    with open(os.path.join(path, "OBJECT"), "w") as handle:
+        handle.write('{ "type": "compressed_sparse_matrix", "compressed_sparse_matrix": { "version": "1.0" } }')
+
+
+@save_object.register
+@validate_saves
+def save_compresssed_sparse_matrix_from_Sparse2darray(x: SparseNdarray, path: str, compressed_sparse_matrix_chunk_size: int = 10000, compressed_sparse_matrix_buffer_size: int = 1e8, **kwargs):
+    """
+    Method for saving a :py:class:`~delayedarray.SparseNdarray.SparseNdarray`
+    to disk, see :py:meth:`~dolomite_base.save_object.save_object` for details.
+
+    Args:
+        x: Object to be saved.
+
+        path: Path to a directory to save ``x``.
+
+        compressed_sparse_matrix_chunk_size 
+            Chunk size for the data and indices. Larger values improve compression
+            at the potential cost of reducing random access efficiency.
+
+        compressed_sparse_matrix_buffer_size:
+            Size of the buffer in bytes, for blockwise processing and writing
+            to file. Larger values improve speed at the cost of memory.
+
+        kwargs: Further arguments, ignored.
+
+    Returns:
+        ``x`` is saved to ``path``.
+    """
+    _save_compressed_sparse_matrix(
+        x, 
+        path, 
+        compressed_sparse_matrix_chunk_size = compressed_sparse_matrix_chunk_size, 
+        compressed_sparse_matrix_buffer_size = compressed_sparse_matrix_buffer_size, 
+        **kwargs
+    )
+
+
+if has_scipy:
+    @save_object.register
+    @validate_saves
+    def save_compresssed_sparse_matrix_from_scipy_csc_matrix(x: scipy.sparse.csc_matrix, path: str, compressed_sparse_matrix_chunk_size: int = 10000, compressed_sparse_matrix_buffer_size: int = 1e8, **kwargs):
+        """
+        Method for saving :py:class:`~scipy.sparse.csc_matrix` objects to disk,
+        see :py:meth:`~dolomite_base.stage_object.stage_object` for details.
+
+        Args:
+            x: Matrix to be saved.
+
+            path: Path to a directory to save ``x``.
+
+            compressed_sparse_matrix_chunk_size 
+                Chunk size for the data and indices. Larger values improve compression
+                at the potential cost of reducing random access efficiency.
+
+            compressed_sparse_matrix_cache_size:
+                Size of the buffer in bytes, for blockwise processing and writing
+                to file. Larger values improve speed at the cost of memory.
+
+            kwargs: Further arguments, ignored.
+
+        Returns:
+            ``x`` is saved to ``path``.
+        """
+        return _save_compressed_sparse_matrix(
+            x, 
+            path, 
+            compressed_sparse_matrix_chunk_size = compressed_sparse_matrix_chunk_size, 
+            compressed_sparse_matrix_buffer_size = compressed_sparse_matrix_buffer_size, 
+            **kwargs
+        )
+
+
+    @save_object.register
+    @validate_saves
+    def save_compresssed_sparse_matrix_from_scipy_csr_matrix(x: scipy.sparse.csr_matrix, path: str, compressed_sparse_matrix_chunk_size: int = 10000, compressed_sparse_matrix_buffer_size: int = 1e8, **kwargs):
+        """
+        Method for saving :py:class:`~scipy.sparse.csr_matrix` objects to disk,
+        see :py:meth:`~dolomite_base.stage_object.stage_object` for details.
+
+        Args:
+            x: Matrix to be saved.
+
+            path: Path to a directory to save ``x``.
+
+            compressed_sparse_matrix_chunk_size 
+                Chunk size for the data and indices. Larger values improve compression
+                at the potential cost of reducing random access efficiency.
+
+            compressed_sparse_matrix_cache_size:
+                Size of the buffer in bytes, for blockwise processing and writing
+                to file. Larger values improve speed at the cost of memory.
+
+            kwargs: Further arguments, ignored.
+
+        Returns:
+            ``x`` is saved to ``path``.
+        """
+        return _save_compressed_sparse_matrix(
+            x, 
+            path, 
+            compressed_sparse_matrix_chunk_size = compressed_sparse_matrix_chunk_size, 
+            compressed_sparse_matrix_buffer_size = compressed_sparse_matrix_buffer_size, 
+            **kwargs
+        )
+
+
+    @save_object.register
+    @validate_saves
+    def save_compresssed_sparse_matrix_from_scipy_coo_matrix(x: scipy.sparse.coo_matrix, path: str, compressed_sparse_matrix_chunk_size: int = 10000, compressed_sparse_matrix_buffer_size: int = 1e8, **kwargs):
+        """
+        Method for saving :py:class:`~scipy.sparse.coo_matrix` objects to disk,
+        see :py:meth:`~dolomite_base.stage_object.stage_object` for details.
+
+        Args:
+            x: Matrix to be saved.
+
+            path: Path to a directory to save ``x``.
+
+            compressed_sparse_matrix_chunk_size 
+                Chunk size for the data and indices. Larger values improve compression
+                at the potential cost of reducing random access efficiency.
+
+            compressed_sparse_matrix_cache_size:
+                Size of the buffer in bytes, for blockwise processing and writing
+                to file. Larger values improve speed at the cost of memory.
+
+            kwargs: Further arguments, ignored.
+
+        Returns:
+            ``x`` is saved to ``path``.
+        """
+        return _save_compressed_sparse_matrix(
+            x, 
+            path, 
+            compressed_sparse_matrix_chunk_size = compressed_sparse_matrix_chunk_size, 
+            compressed_sparse_matrix_buffer_size = compressed_sparse_matrix_buffer_size, 
+            **kwargs
+        )
diff --git a/src/dolomite_matrix/save_delayed_array.py b/src/dolomite_matrix/save_delayed_array.py
new file mode 100644
index 0000000..70fec5b
--- /dev/null
+++ b/src/dolomite_matrix/save_delayed_array.py
@@ -0,0 +1,48 @@
+from typing import Tuple, Optional, Any, Union
+from dolomite_base import save_object, validate_saves
+import delayedarray
+import numpy 
+import os
+
+from .save_compressed_sparse_matrix import _save_compressed_sparse_matrix
+from .save_dense_array import _save_dense_array
+
+
+@save_object.register
+@validate_saves
+def save_delayed_array(x: delayedarray.DelayedArray, path: str, delayed_array_preserve_operations: bool = False, **kwargs):
+    """
+    Method to save :py:class:`~delayedarray.DelayedArray.DelayedArray` objects
+    to disk, see :py:meth:`~dolomite_base.save_object.save_object` for details. 
+
+    If the array is pristine, we attempt to use the ``save_object`` method of
+    the seed. If ``delayed_array_preserve_operations = False``, we save the
+    ``DelayedArray`` as a dense array or a compressed sparse matrix.
+
+    Args:
+        x: Object to be saved.
+
+        path: Path to a directory to save ``x``.
+
+        delayed_array_preserve_operations:
+            Whether to preserve delayed operations via the **chihaya** specification.
+            Currently not supported.
+
+        kwargs: 
+            Further arguments, passed to the ``save_object`` methods for dense
+            arrays and compressed sparse matrices.
+
+    Returns:
+        ``x`` is saved to ``path``.
+    """
+    if delayedarray.is_pristine(x):
+        candidate = save_object.dispatch(type(x.seed))
+        if save_object.dispatch(object) != candidate:
+            return candidate(x.seed, path, delayed_array_preserve_operations=delayed_array_preserve_operations, **kwargs)
+
+    if not delayed_array_preserve_operations:
+        if delayedarray.is_sparse(x):
+            return _save_compressed_sparse_matrix(x, path, **kwargs)
+        return _save_dense_array(x, path, **kwargs)
+
+    raise NotImplementedError("no support yet for delayed operations, this will be coming soon")
diff --git a/src/dolomite_matrix/save_dense_array.py b/src/dolomite_matrix/save_dense_array.py
new file mode 100644
index 0000000..78e9648
--- /dev/null
+++ b/src/dolomite_matrix/save_dense_array.py
@@ -0,0 +1,183 @@
+from typing import Tuple, Optional, Any, Dict
+import numpy
+from dolomite_base import save_object, validate_saves
+import delayedarray
+import os
+import h5py
+
+from .choose_chunk_dimensions import choose_chunk_dimensions 
+from . import _optimize_storage as optim
+from . import _utils as ut
+
+
+###################################################
+###################################################
+
+
+# We use a mock class with the properties of the HDF5 dataset.  This allows us
+# to use choose_block_shape_for_iteration to pick dimensions that align with
+# the HDF5 chunks; these may or may not be suitable for the input array, but 
+# we'll take the chance that the input array is already in memory.
+class _DenseArrayOutputMock:
+    def __init__(self, shape: Tuple, dtype: numpy.dtype, chunks: Tuple):
+        self.shape = shape
+        self.dtype = dtype
+        self.chunks = chunks
+
+
+@delayedarray.chunk_shape.register
+def _chunk_shape_DenseArrayOutputMock(x: _DenseArrayOutputMock):
+    return x.chunks
+
+
+def _blockwise_write_to_hdf5(dhandle: h5py.Dataset, chunk_shape: Tuple, x: Any, placeholder: Any, is_string: bool, memory: int):
+    mock = _DenseArrayOutputMock(x.shape, x.dtype, chunk_shape)
+    block_shape = delayedarray.choose_block_shape_for_iteration(mock, memory=memory)
+    if placeholder is not None:
+        placeholder = x.dtype.type(placeholder)
+
+    def _blockwise_dense_writer(pos: Tuple, block):
+        block = ut.sanitize_for_writing(block, placeholder)
+
+        # h5py doesn't want to convert from numpy's Unicode type to bytes
+        # automatically, and fails: so fine, we'll do it ourselves.
+        if is_string: 
+            block = block.astype(dhandle.dtype, copy=False)
+
+        # Block processing is inherently Fortran-order based (i.e., first
+        # dimension is assumed to change the fastest), and the blocks
+        # themselves are also in F-contiguous layout (i.e., column-major). By
+        # comparison HDF5 uses C order. To avoid any rearrangement of data
+        # by h5py, we save it as a transposed array for efficiency.
+        coords = [slice(start, end) for start, end in reversed(pos)]
+        dhandle[(*coords,)] = block.T
+
+    delayedarray.apply_over_blocks(x, _blockwise_dense_writer, block_shape = block_shape)
+    return
+
+
+###################################################
+###################################################
+
+
+def _save_dense_array(
+    x: numpy.ndarray, 
+    path: str, 
+    dense_array_chunk_dimensions: Optional[Tuple[int, ...]] = None, 
+    dense_array_chunk_args: Dict = {},
+    dense_array_buffer_size: int = 1e8, 
+    **kwargs
+):
+    os.mkdir(path)
+
+    # Coming up with a decent chunk size.
+    if dense_array_chunk_dimensions is None:
+        dense_array_chunk_dimensions = choose_chunk_dimensions(x.shape, x.dtype.itemsize, **dense_array_chunk_args)
+    else:
+        capped = []
+        for i, d in enumerate(x.shape):
+            capped.append(min(d, dense_array_chunk_dimensions[i]))
+        dense_array_chunk_dimensions = (*capped,)
+
+    # Choosing the smallest data type that we can use.
+    tt = None
+    blockwise = False 
+    if numpy.issubdtype(x.dtype, numpy.integer):
+        tt = "integer"
+        opts = optim.optimize_integer_storage(x)
+    elif numpy.issubdtype(x.dtype, numpy.floating):
+        tt = "number"
+        opts = optim.optimize_float_storage(x)
+    elif x.dtype == numpy.bool_:
+        tt = "boolean"
+        opts = optim.optimize_boolean_storage(x)
+    elif numpy.issubdtype(x.dtype, numpy.str_):
+        tt = "string"
+        opts = optim.optimize_string_storage(x)
+        blockwise = True
+    else:
+        raise NotImplementedError("cannot save dense array of type '" + x.dtype.name + "'")
+
+    if opts.placeholder is not None:
+        blockwise = True
+    if not isinstance(x, numpy.ndarray):
+        blockwise = True
+    
+    fpath = os.path.join(path, "array.h5")
+    with h5py.File(fpath, "w") as handle:
+        ghandle = handle.create_group("dense_array")
+        ghandle.attrs["type"] = tt
+
+        if not blockwise:
+            # Saving it in transposed form if it's in Fortran order (i.e., first dimensions are fastest).
+            # This avoids the need for any data reorganization inside h5py itself.
+            if x.flags.f_contiguous:
+                x = x.T
+                dense_array_chunk_dimensions = (*reversed(dense_array_chunk_dimensions),)
+                ghandle.create_dataset("transposed", data=1, dtype="i1")
+            else:
+                ghandle.create_dataset("transposed", data=0, dtype="i1")
+            dhandle = ghandle.create_dataset("data", data=x, chunks=dense_array_chunk_dimensions, dtype=opts.type, compression="gzip")
+        else:
+            # Block processing of a dataset is always Fortran order, but HDF5 uses C order.
+            # So, we save the blocks in transposed form for efficiency.
+            ghandle.create_dataset("transposed", data=1, dtype="i1")
+            dhandle = ghandle.create_dataset("data", shape=(*reversed(x.shape),), chunks=(*reversed(dense_array_chunk_dimensions),), dtype=opts.type, compression="gzip")
+            _blockwise_write_to_hdf5(dhandle, chunk_shape=dense_array_chunk_dimensions, x=x, placeholder=opts.placeholder, is_string=(tt == "string"), memory=dense_array_buffer_size)
+            if opts.placeholder is not None:
+                dhandle.attrs.create("missing-value-placeholder", data=opts.placeholder, dtype=opts.type)
+
+    with open(os.path.join(path, "OBJECT"), "w") as handle:
+        handle.write('{ "type": "dense_array", "dense_array": { "version": "1.0" } }')
+
+
+###################################################
+###################################################
+
+
+@save_object.register
+@validate_saves
+def save_dense_array_from_ndarray(
+    x: numpy.ndarray, 
+    path: str, 
+    dense_array_chunk_dimensions: Optional[Tuple[int, ...]] = None, 
+    dense_array_chunk_args: Dict = {},
+    dense_array_buffer_size: int = 1e8, 
+    **kwargs
+):
+    """
+    Method for saving :py:class:`~numpy.ndarray` objects to disk, see
+    :py:meth:`~dolomite_base.save_object.save_object` for details.
+
+    Args:
+        x: Object to be saved.
+
+        path: Path to a directory to save ``x``.
+
+        dense_array_chunk_dimensions: 
+            Chunk dimensions for the HDF5 dataset. Larger values improve
+            compression at the potential cost of reducing random access
+            efficiency. If not provided, we choose some chunk sizes with
+            :py:meth:`~dolomite_matrix.choose_chunk_dimensions.choose_chunk_dimensions`.
+
+        dense_array_chunk_args: 
+            Arguments to pass to ``choose_chunk_dimensions`` if
+            ``dense_array_chunk_dimensions`` is not provided.
+
+        dense_array_buffer_size:
+            Size of the buffer in bytes, for blockwise processing and writing
+            to file. Larger values improve speed at the cost of memory.
+
+        kwargs: Further arguments, ignored.
+
+    Returns:
+        ``x`` is saved to ``path``.
+    """
+    _save_dense_array(
+        x, 
+        path=path, 
+        dense_array_chunk_dimensions=dense_array_chunk_dimensions,
+        dense_array_chunk_args = dense_array_chunk_args,
+        dense_array_buffer_size = dense_array_buffer_size,
+        **kwargs
+    )
diff --git a/src/dolomite_matrix/stage_DelayedArray.py b/src/dolomite_matrix/stage_DelayedArray.py
deleted file mode 100644
index e40854d..0000000
--- a/src/dolomite_matrix/stage_DelayedArray.py
+++ /dev/null
@@ -1,180 +0,0 @@
-from typing import Tuple, Optional, Any, Union
-from delayedarray import DelayedArray, chunk_shape, is_sparse, extract_dense_array, extract_sparse_array, is_pristine
-from numpy import ceil, prod
-from dolomite_base import stage_object
-import os
-
-from .choose_dense_chunk_sizes import choose_dense_chunk_sizes
-from ._utils import _choose_file_dtype, _translate_array_type, _open_writeable_hdf5_handle, _choose_block_shape
-from .stage_sparse import _stage_sparse_matrix
-
-
-def _stage_DelayedArray_dense(
-    x: DelayedArray,
-    dir: str,
-    path: str,
-    is_child: bool = False,
-    chunks: Optional[Tuple[int, ...]] = None,
-    cache_size: int = 1e8,
-    block_size: int = 1e8,
-    **kwargs
-) -> dict[str, Any]:
-    os.mkdir(os.path.join(dir, path))
-    newpath = path + "/array.h5"
-
-    # Coming up with a decent chunk size.
-    if chunks is None:
-        chunks = choose_dense_chunk_sizes(x.shape, x.dtype.itemsize)
-    else:
-        capped = []
-        for i, d in enumerate(x.shape):
-            capped.append(min(d, chunks[i]))
-        chunks = (*capped,)
-
-    # Transposing it so that we save it in the right order.
-    t = x.T
-    chunks = (*list(reversed(chunks)),)
-
-    # Saving the matrix in a blockwise fashion. We progress along HDF5's
-    # fastest changing dimension (i.e., the last one), and we shift along the
-    # other dimensions once we need to wrap around.
-    full_shape = t.shape
-    ndim = len(full_shape)
-    block_shape = _choose_block_shape(t, block_size)
-
-    fpath = os.path.join(dir, newpath)
-    with _open_writeable_hdf5_handle(fpath, cache_size) as fhandle:
-        dset = fhandle.create_dataset("data", shape=t.shape, chunks=chunks, dtype=_choose_file_dtype(t.dtype), compression="gzip")
-
-        num_chunks = []
-        subset_as_slices = []
-        for i, s in enumerate(full_shape):
-            b = block_shape[i]
-            num_chunks.append(int(ceil(s / b)))
-            subset_as_slices.append(slice(0, b))
-
-        starts = [0] * len(num_chunks)
-        counter = [0] * len(num_chunks)
-        subset_as_ranges = [None] * len(num_chunks)
-
-        running = True
-        while running:
-            for i, sl in enumerate(subset_as_slices):
-                subset_as_ranges[i] = range(*(sl.indices(full_shape[i])))
-            curblock = extract_dense_array(t, subset_as_ranges)
-            dset[(*subset_as_slices,)] = curblock
-
-            for i in range(ndim - 1, -1, -1):
-                starts[i] += 1
-                block_extent = block_shape[i]
-                if starts[i] < num_chunks[i]:
-                    new_start = starts[i] * block_extent
-                    subset_as_slices[i] = slice(new_start, min(new_start + block_extent, full_shape[i]))
-                    break
-                if i == 0:
-                    running = False
-                    break
-                starts[i] = 0
-                subset_as_slices[i] = slice(0, block_extent)
-
-    return {
-        "$schema": "hdf5_dense_array/v1.json",
-        "path": newpath,
-        "is_child": is_child,
-        "array": {
-            "type": _translate_array_type(x.dtype),
-            "dimensions": list(x.shape),
-        },
-        "hdf5_dense_array": {
-            "dataset": "data",
-        }
-    }
-
-
-@stage_object.register
-def stage_DelayedArray(
-    x: DelayedArray,
-    dir: str,
-    path: str,
-    is_child: bool = False,
-    chunks: Optional[Union[Tuple[int, ...], int]] = None,
-    cache_size: int = 1e8,
-    block_size: int = 1e8,
-    **kwargs
-) -> dict[str, Any]:
-    """Method for saving :py:class:`~delaydearray.DelayedArray.DelayedArray`
-    objects to file, see :py:meth:`~dolomite_base.stage_object.stage_object`
-    for details.
-
-    Args:
-        x: Array to be saved.
-
-        dir: Staging directory.
-
-        path: Relative path inside ``dir`` to save the object.
-
-        is_child: Is ``x`` a child of another object?
-
-        chunks:
-            For dense ``x``, a tuple of chunk dimensions. If None, we
-            automatically choose some chunk sizes with
-            :py:meth:`~dolomite_matrix.choose_dense_chunk_sizes.choose_dense_chunk_sizes`.
-
-            For sparse ``x``, an integer specifying the chunk dimension in
-            :py:meth:`~dolomite_matrix.write_sparse_matrix.write_sparse_matrix`.
-            If None, a suitable chunk size is chosen.
-
-        cache_size:
-            Size of the HDF5 cache size, in bytes. Larger values improve speed
-            at the cost of memory.
-
-        block_size:
-            Size of each block in bytes. Staging is performed by iterating over
-            ``x``, extracting one block at a time, and saving it to the HDF5
-            file. Larger values improve speed at the cost of memory.
-
-        kwargs: 
-            Further arguments, ignored for dense ``x``. For sparse ``x``, these
-            are passed to
-            :py:meth:`~dolomite_matrix.write_sparse_matrix.write_sparse_matrix`.
-
-    Returns:
-        Metadata that can be edited by calling methods and then saved with
-        :py:meth:`~dolomite_base.write_metadata.write_metadata`.
-    """
-    # Seeing if we can call specialized method for the seed in pristine objects.
-    if is_pristine(x) and isinstance(x, DelayedArray):
-        candidate = stage_object.dispatch(type(x.seed))
-        if stage_object.dispatch(object) != candidate:
-            return candidate(
-                x.seed,
-                dir=dir,
-                path=path,
-                is_child=is_child,
-                chunks=chunks,
-                cache_size=cache_size,
-                block_size=block_size,
-                **kwargs
-            )
-
-    if is_sparse(x):
-        return _stage_sparse_matrix(
-            x,
-            dir=dir,
-            path=path,
-            is_child=is_child,
-            chunks=chunks,
-            block_size=block_size,
-            **kwargs,
-        )
-    else:
-        return _stage_DelayedArray_dense(
-            x,
-            dir=dir,
-            path=path,
-            is_child=is_child,
-            chunks=chunks,
-            cache_size=cache_size,
-            block_size=block_size,
-            **kwargs
-        )
diff --git a/src/dolomite_matrix/stage_ndarray.py b/src/dolomite_matrix/stage_ndarray.py
deleted file mode 100644
index 41e54b5..0000000
--- a/src/dolomite_matrix/stage_ndarray.py
+++ /dev/null
@@ -1,77 +0,0 @@
-from typing import Tuple, Optional, Any
-from numpy import ndarray, bool_, uint8
-from dolomite_base import stage_object
-import os
-
-from .choose_dense_chunk_sizes import choose_dense_chunk_sizes
-from ._utils import _translate_array_type, _open_writeable_hdf5_handle, _choose_file_dtype
-
-
-@stage_object.register
-def stage_ndarray(
-    x: ndarray, 
-    dir: str, 
-    path: str, 
-    is_child: bool = False, 
-    chunks: Optional[Tuple[int, ...]] = None, 
-    cache_size: int = 1e8, 
-    **kwargs
-) -> dict[str, Any]:
-    """Method for saving :py:class:`~numpy.ndarray` objects to file, see
-    :py:meth:`~dolomite_base.stage_object.stage_object` for details.
-
-    Args:
-        x: Array to be saved.
-
-        dir: Staging directory.
-
-        path: Relative path inside ``dir`` to save the object.
-
-        is_child: Is ``x`` a child of another object?
-
-        chunks: 
-            Chunk dimensions. If not provided, we choose some chunk sizes with 
-            :py:meth:`~dolomite_matrix.choose_dense_chunk_sizes.choose_dense_chunk_sizes`.
-
-        cache_size:
-            Size of the HDF5 cache size, in bytes. Larger values improve speed
-            at the cost of memory.
-
-        kwargs: Further arguments, ignored.
-
-    Returns:
-        Metadata that can be edited by calling methods and then saved with 
-        :py:meth:`~dolomite_base.write_metadata.write_metadata`.
-    """
-    os.mkdir(os.path.join(dir, path))
-    newpath = path + "/array.h5"
-
-    # Coming up with a decent chunk size.
-    if chunks is None:
-        chunks = choose_dense_chunk_sizes(x.shape, x.dtype.itemsize)
-    else:
-        capped = []
-        for i, d in enumerate(x.shape):
-            capped.append(min(d, chunks[i]))
-        chunks = (*capped,)
-
-    # Transposing it so that we save it in the right order.
-    t = x.T
-    chunks = (*list(reversed(chunks)),)
-
-    fpath = os.path.join(dir, newpath)
-    with _open_writeable_hdf5_handle(fpath, cache_size) as fhandle:
-        fhandle.create_dataset("data", data=t, chunks=chunks, dtype=_choose_file_dtype(t.dtype), compression="gzip")
-
-    return { 
-        "$schema": "hdf5_dense_array/v1.json",
-        "path": newpath,
-        "is_child": is_child,
-        "array": {
-            "type": _translate_array_type(x.dtype),
-            "dimensions": list(x.shape),
-        },
-        "hdf5_dense_array": {
-            "dataset": "data",
-        } 
-    }
diff --git a/src/dolomite_matrix/stage_sparse.py b/src/dolomite_matrix/stage_sparse.py
deleted file mode 100644
index 5701468..0000000
--- a/src/dolomite_matrix/stage_sparse.py
+++ /dev/null
@@ -1,111 +0,0 @@
-from dolomite_base import stage_object
-import os
-
-from .write_sparse_matrix import write_sparse_matrix
-from ._utils import _translate_array_type
-
-
-has_scipy = False
-try:
-    import scipy.sparse
-    has_scipy = True
-except:
-    pass
-
-
-def _stage_sparse_matrix(x, dir: str, path: str, is_child: bool, **kwargs):
-    os.mkdir(os.path.join(dir, path))
-    newpath = path + "/matrix.h5"
-    fullpath = os.path.join(dir, newpath)
-    write_sparse_matrix(x, fullpath, "data", **kwargs)
-    return {
-        "$schema": "hdf5_sparse_matrix/v1.json",
-        "path": newpath,
-        "array": {
-            "type": _translate_array_type(x.dtype),
-            "dimensions": list(x.shape),
-        },
-        "hdf5_sparse_matrix": {
-            "format": "tenx_matrix",
-            "group": "data"
-        }
-    }
-
-
-if has_scipy:
-    @stage_object.register
-    def stage_scipy_csc_matrix(x: scipy.sparse.csc_matrix, dir: str, path: str, is_child: bool = False, **kwargs):
-        """Method for saving :py:class:`~scipy.sparse.csc_matrix` objects to
-        file, see :py:meth:`~dolomite_base.stage_object.stage_object` for
-        details.
-
-        Args:
-            x: Matrix to be saved.
-
-            dir: Staging directory.
-
-            path: Relative path inside ``dir`` to save the object.
-
-            is_child: Is ``x`` a child of another object?
-
-            kwargs: 
-                Further arguments to pass to
-                :py:meth:`~dolomite_matrix.write_sparse_matrix.write_sparse_matrix`.
-
-        Returns:
-            Metadata that can be edited by calling methods and then saved with 
-            :py:meth:`~dolomite_base.write_metadata.write_metadata`.
-        """
-        return _stage_sparse_matrix(x, dir, path, is_child, **kwargs)
-
-
-    @stage_object.register
-    def stage_scipy_csr_matrix(x: scipy.sparse.csr_matrix, dir: str, path: str, is_child: bool = False, **kwargs):
-        """Method for saving :py:class:`~scipy.sparse.csr_matrix` objects to
-        file, see :py:meth:`~dolomite_base.stage_object.stage_object` for
-        details.
-
-        Args:
-            x: Matrix to be saved.
-
-            dir: Staging directory.
-
-            path: Relative path inside ``dir`` to save the object.
-
-            is_child: Is ``x`` a child of another object?
-
-            kwargs: 
-                Further arguments to pass to
-                :py:meth:`~dolomite_matrix.write_sparse_matrix.write_sparse_matrix`.
-
-        Returns:
-            Metadata that can be edited by calling methods and then saved with 
-            :py:meth:`~dolomite_base.write_metadata.write_metadata`.
-        """
-        return _stage_sparse_matrix(x, dir, path, is_child, **kwargs)
-
-
-    @stage_object.register
-    def stage_scipy_coo_matrix(x: scipy.sparse.coo_matrix, dir: str, path: str, is_child: bool = False, **kwargs):
-        """Method for saving :py:class:`~scipy.sparse.coo_matrix` objects to
-        file, see :py:meth:`~dolomite_base.stage_object.stage_object` for
-        details.
-
-        Args:
-            x: Matrix to be saved.
-
-            dir: Staging directory.
-
-            path: Relative path inside ``dir`` to save the object.
-
-            is_child: Is ``x`` a child of another object?
-
-            kwargs: 
-                Further arguments to pass to
-                :py:meth:`~dolomite_matrix.write_sparse_matrix.write_sparse_matrix`.
-
-        Returns:
-            Metadata that can be edited by calling methods and then saved with 
-            :py:meth:`~dolomite_base.write_metadata.write_metadata`.
-        """
-        return _stage_sparse_matrix(x, dir, path, is_child, **kwargs)
diff --git a/src/dolomite_matrix/write_sparse_matrix.py b/src/dolomite_matrix/write_sparse_matrix.py
deleted file mode 100644
index 662ff3b..0000000
--- a/src/dolomite_matrix/write_sparse_matrix.py
+++ /dev/null
@@ -1,333 +0,0 @@
-from functools import singledispatch
-from typing import Any, Optional
-from collections import namedtuple
-from delayedarray import wrap, extract_sparse_array, DelayedArray, SparseNdarray, Combine, guess_iteration_block_size
-from h5py import File
-import numpy
-
-from ._utils import _choose_block_shape
-
-
-def _choose_smallest_uint(value):
-    if value < 2**8:
-        return numpy.uint8
-    elif value < 2**16:
-        return numpy.uint16
-    elif value < 2**32:
-        return numpy.uint32
-    else:
-        return numpy.uint64
-
-
-def write_sparse_matrix(x, path: str, name: str, chunks: Optional[int] = None, guess_integer: bool = True, block_size: int = 1e8):
-    """Write a sparse matrix into a HDF5 file in a compressed-sparse column
-    format. This creates a HDF5 group containing the ``data``, ``indices`` and
-    ``indptr`` datasets, containing the corresponding components of the usual
-    compressed sparse representation. The shape of the matrix is stored in the
-    ``shape`` dataset inside the same group.
-
-    Args:
-        x: Any sparse matrix.
-
-        path: Path to the file in which to save ``x``.
-
-        name: Name of the group in the file.
-
-        chunks: 
-            Size of the 1-dimensional chunks for the data and index datasets.
-            If None, defaults to 10000.
-
-        guess_integer: 
-            Whether to guess a compact integer type for the data. This can reduce
-            disk usage at the cost of a second pass through the data.
-
-        block_size:
-            Size of the blocks to use for processing ``x``. Larger values
-            increase speed at the cost of memory efficiency.
-
-    Returns:
-        A HDF5 file is created at ``path`` with the sparse data stored in the
-        ``name`` group.
-    """
-    with File(path, "w") as handle:
-        ghandle = handle.create_group(name)
-        ghandle.create_dataset(name="shape", data=numpy.array(x.shape, dtype=numpy.uint64))
-
-        # Choosing a compact type for the indices.
-        index_dtype = _choose_smallest_uint(x.shape[0])
-
-        deets = _extract_details(x, block_size=block_size)
-
-        # Choosing a compact type for the data.
-        if not guess_integer:
-            dtype = _choose_file_dtype(x.dtype)
-        else:
-            if deets.non_integer:
-                if x.dtype == numpy.float32:
-                    dtype = numpy.float32
-                else:
-                    dtype = numpy.float64
-            elif deets.minimum < 0:
-                if deets.minimum >= -2**7 and deets.maximum < 2**7:
-                    dtype = numpy.int8
-                elif deets.minimum >= -2**15 and deets.maximum < 2**15:
-                    dtype = numpy.int16
-                elif deets.minimum >= -2**31 and deets.maximum < 2**31:
-                    dtype = numpy.int32
-                else:
-                    dtype = numpy.int64
-            else:
-                dtype = _choose_smallest_uint(deets.maximum)
-
-        # Doing the dump.
-        if chunks is None:
-            chunks = 10000
-        ihandle = ghandle.create_dataset(name="indices", shape=deets.count, dtype=index_dtype, chunks=chunks, compression="gzip")
-        dhandle = ghandle.create_dataset(name="data", shape=deets.count, dtype=dtype, chunks=chunks, compression="gzip")
-        saved = _dump_sparse_matrix(x, ihandle, dhandle, block_size=block_size, offset=0)
-
-        pointers = numpy.zeros(x.shape[1] + 1, dtype=numpy.uint64)
-        for i, s in enumerate(saved):
-            pointers[i + 1] += pointers[i] + s
-        ghandle.create_dataset(name="indptr", data=pointers, compression="gzip")
-
-
-has_scipy = False
-try:
-    import scipy.sparse
-    has_scipy = True
-except:
-    pass
-
-
-####################################################
-####################################################
-
-
-_SparseMatrixDetails = namedtuple("_SparseMatrixDetails", [ "minimum", "maximum", "non_integer", "count" ])
-
-
-def _extract_details_by_block(x, block_size: int) -> _SparseMatrixDetails:
-    full_shape = x.shape
-    block_shape = _choose_block_shape(x, block_size)
-
-    num_row_blocks = int(numpy.ceil(full_shape[0] / block_shape[0]))
-    num_col_blocks = int(numpy.ceil(full_shape[1] / block_shape[1]))
-
-    minimum = None
-    maximum = None
-    count = 0
-    non_integer = False
-
-    for c in range(num_col_blocks):
-        col_start = c * block_shape[1]
-        col_range = range(col_start, min(col_start + block_shape[1], full_shape[1]))
-
-        for r in range(num_row_blocks):
-            row_start = c * block_shape[0]
-            row_range = range(row_start, min(row_start + block_shape[0], full_shape[0]))
-
-            block = extract_sparse_array(x, (row_range, col_range))
-            deets = _extract_details(block)
-
-            if minimum is None or deets.minimum < minimum:
-                minimum = deets.minimum
-            if maximum is None or deets.maximum > maximum:
-                maximum = deets.maximum
-            if not non_integer:
-                non_integer = deets.non_integer
-            count += deets.count
-
-    return _SparseMatrixDetails(
-        minimum = minimum, 
-        maximum = maximum, 
-        non_integer = non_integer, 
-        count = count,
-    )
-
-
-@singledispatch
-def _extract_details(x: Any, block_size: int, **kwargs) -> _SparseMatrixDetails:
-    return _extract_details_by_block(x, block_size)
-
-
-def _has_non_integer(data: numpy.ndarray):
-    if numpy.issubdtype(data.dtype, numpy.floating):
-        for y in data:
-            if not y.is_integer():
-                return True
-    return False
-
-
-@_extract_details.register
-def _extract_details_SparseNdarray(x: SparseNdarray, **kwargs) -> _SparseMatrixDetails:
-    minimum = None
-    maximum = None
-    count = 0
-    non_integer = False
-
-    if x.contents is not None:
-        for idx, val in x.contents:
-            candidate_min = min(val)
-            if minimum is None or candidate_min < minimum:
-                minimum = candidate_min
-
-            candidate_max = max(val)
-            if maximum is None or candidate_max > maximum:
-                maximum = candidate_max
-
-            if not non_integer:
-                non_integer = _has_non_integer(val)
-
-            count += len(val)
-    else:
-        minimum = 0
-        maximum = 0
-
-    return _SparseMatrixDetails(
-        minimum = minimum, 
-        maximum = maximum, 
-        non_integer = non_integer, 
-        count = count,
-    )
-
-
-
-@_extract_details.register
-def _extract_details_Combine(x: Combine, **kwargs) -> _SparseMatrixDetails:
-    minimum = None
-    maximum = None
-    count = 0
-    non_integer = False
-
-    for s in x.seeds:
-        deets = _extract_details(s, **kwargs)
-        if minimum is None or deets.minimum < minimum:
-            minimum = deets.minimum
-        if maximum is None or deets.maximum > maximum:
-            maximum = deets.maximum
-        if not non_integer:
-            non_integer = deets.non_integer
-        count += deets.count
-
-    return _SparseMatrixDetails(
-        minimum = minimum, 
-        maximum = maximum, 
-        non_integer = non_integer, 
-        count = count,
-    )
-
-
-@_extract_details.register
-def _extract_details_DelayedArray(x: DelayedArray, block_size: int, **kwargs) -> _SparseMatrixDetails:
-    candidate = _extract_details.dispatch(type(x.seed))
-    if _extract_details.dispatch(object) != candidate:
-        return candidate(x.seed, block_size = block_size, **kwargs)
-    else:
-        return _extract_details_by_block(x, block_size)
-
-
-if has_scipy:
-    def _extract_from_compressed_data(data: numpy.ndarray):
-        return _SparseMatrixDetails(
-            minimum = min(data),
-            maximum = max(data),
-            non_integer = _has_non_integer(data),
-            count = len(data),
-        )
-
-
-    @_extract_details.register
-    def _extract_details_scipy_csc(x: scipy.sparse.csc_matrix, **kwargs):
-        return _extract_from_compressed_data(x.data)
-
-
-    @_extract_details.register
-    def _extract_details_scipy_csr(x: scipy.sparse.csr_matrix, **kwargs):
-        return _extract_from_compressed_data(x.data)
-
-
-    @_extract_details.register
-    def _extract_details_scipy_coo(x: scipy.sparse.coo_matrix, **kwargs):
-        return _extract_from_compressed_data(x.data)
-
-
-####################################################
-####################################################
-
-def _dump_sparse_matrix_as_SparseNdarray(x, indices_handle, data_handle, offset: int): 
-    saved = numpy.zeros(x.shape[1], dtype=numpy.uint64)
-    if x.contents is not None:
-        for s, con in enumerate(x.contents):
-            if con is not None:
-                idx, val = con
-                indices_handle[offset:offset + len(idx)] = idx
-                data_handle[offset:offset + len(val)] = val
-                offset += len(idx)
-                saved[s] = len(idx)
-    return saved
-
-
-def _dump_sparse_matrix_by_block(x, indices_handle, data_handle, offset: int, block_size: int):
-    block_size = guess_iteration_block_size(x, 1, block_size)
-    start = 0 
-    row_range = range(x.shape[0])
-    all_saved = [numpy.zeros(0, dtype=numpy.uint64)]
-
-    while start < x.shape[1]:
-        end = min(start + block_size, x.shape[1])
-        col_range = range(start, end)
-        block = extract_sparse_array(x, (row_range, col_range))
-        saved = _dump_sparse_matrix_as_SparseNdarray(block, indices_handle, data_handle, offset=offset)
-        offset += saved.sum()
-        all_saved.append(saved)
-        start = end
-
-    return numpy.concatenate(all_saved)
-
-
-@singledispatch
-def _dump_sparse_matrix(x: Any, indices_handle, data_handle, offset: int, block_size: int, **kwargs) -> numpy.ndarray:
-    return _dump_sparse_matrix_by_block(x, indices_handle, data_handle, offset=offset, block_size=block_size)
-
-
-@_dump_sparse_matrix.register
-def _dump_sparse_matrix_SparseNdarray(x: SparseNdarray, indices_handle, data_handle, offset: int, block_size: int, **kwargs):
-    return _dump_sparse_matrix_by_block(x, indices_handle, data_handle, offset=offset, block_size=block_size)
-
-
-@_dump_sparse_matrix.register
-def _dump_sparse_matrix_Combine(x: Combine, indices_handle, data_handle, offset: int, block_size: int, **kwargs):
-    if x.along != 1:
-        return _dump_sparse_matrix_by_block(x, indices_handle, data_handle, offset=offset, block_size=block_size)
-
-    all_saved = [numpy.zeros(0, dtype=numpy.uint64)]
-    offset = 0
-    for s in x.seeds:
-        saved = _dump_sparse_matrix(s, indices_handle, data_handle, offset=offset, block_size=block_size, **kwargs)
-        all_saved.append(saved)
-        offset += int(saved.sum())
-
-    return numpy.concatenate(all_saved)
-
-
-@_dump_sparse_matrix.register
-def _dump_sparse_matrix_DelayedArray(x: DelayedArray, indices_handle, data_handle, offset: int, block_size: int, **kwargs):
-    candidate = _dump_sparse_matrix.dispatch(type(x.seed))
-    if _dump_sparse_matrix.dispatch(object) != candidate:
-        return candidate(x.seed, indices_handle, data_handle, offset=offset, block_size = block_size, **kwargs)
-    else:
-        return _dump_sparse_matrix_by_block(x, indices_handle, data_handle, offset=offset, block_size=block_size)
-
-
-if has_scipy:
-    # All other representations can undergo block processing.
-    @_dump_sparse_matrix.register
-    def _dump_sparse_matrix_scipy_csc(x: scipy.sparse.csc_matrix, indices_handle, data_handle, offset: int, block_size: int, **kwargs):
-        n = len(x.data)
-        indices_handle[offset:offset + n] = x.indices
-        data_handle[offset:offset + n] = x.data
-        saved = numpy.zeros(x.shape[1], dtype=numpy.uint64)
-        for i in range(x.shape[1]):
-            saved[i] = x.indptr[i+1] - x.indptr[i]
-        return saved
diff --git a/tests/test_DelayedMask.py b/tests/test_DelayedMask.py
new file mode 100644
index 0000000..a275074
--- /dev/null
+++ b/tests/test_DelayedMask.py
@@ -0,0 +1,115 @@
+import dolomite_matrix as dm
+import delayedarray
+import numpy
+
+
+def test_DelayedMask_dense():
+    y = numpy.array([[1,2,3],[4,5,6]])
+
+    m = dm.DelayedMask(y, 1)
+    assert m.dtype == y.dtype
+    assert m.shape == y.shape
+    assert m.placeholder == 1
+    assert not delayedarray.is_sparse(m)
+    assert delayedarray.chunk_shape(m) == (1, 3)
+
+    block = delayedarray.extract_dense_array(m)
+    assert numpy.ma.is_masked(block)
+    assert numpy.ma.is_masked(block[0,0])
+    assert block[1,2] == 6
+
+    # A no-op block.
+    block = delayedarray.extract_dense_array(m, (range(0, 2), range(1, 3)))
+    assert not numpy.ma.is_masked(block)
+
+
+def test_DelayedMask_dense_NaN():
+    y = numpy.array([[1,2,numpy.NaN],[4,5,6]])
+    m = dm.DelayedMask(y, numpy.NaN)
+    block = delayedarray.extract_dense_array(m)
+    assert numpy.ma.is_masked(block)
+    assert numpy.ma.is_masked(block[0,2])
+    assert block[0,0] == 1
+
+
+def test_DelayedMask_strings():
+    # Check that the placeholders work with different types of strings being compared.
+    y = numpy.array(["A", "B", "C"])
+    m = dm.DelayedMask(y, "C")
+    block = delayedarray.extract_dense_array(m)
+    assert numpy.ma.is_masked(block)
+    assert numpy.ma.is_masked(block[2])
+    assert block[0] == "A"
+
+    m = dm.DelayedMask(y, "ASDASD")
+    block = delayedarray.extract_dense_array(m)
+    assert not numpy.ma.is_masked(block)
+    assert m.placeholder == "ASDASD"
+
+    m = dm.DelayedMask(y, b"B")
+    block = delayedarray.extract_dense_array(m)
+    assert numpy.ma.is_masked(block)
+    assert numpy.ma.is_masked(block[1])
+    assert m.placeholder == "B"
+
+    m = dm.DelayedMask(y, numpy.bytes_("C"))
+    block = delayedarray.extract_dense_array(m)
+    assert numpy.ma.is_masked(block)
+    assert m.placeholder == "C"
+
+    # Trying with byte strings now.
+    y = numpy.array([b"A", b"B", b"C"])
+    m = dm.DelayedMask(y, b"A")
+    block = delayedarray.extract_dense_array(m)
+    assert numpy.ma.is_masked(block)
+    assert numpy.ma.is_masked(block[0])
+    assert block[2] == b"C"
+
+    m = dm.DelayedMask(y, "ASDASD")
+    block = delayedarray.extract_dense_array(m)
+    assert not numpy.ma.is_masked(block)
+    assert m.placeholder == b"ASDASD"
+
+    m = dm.DelayedMask(y, numpy.str_("B"))
+    block = delayedarray.extract_dense_array(m)
+    assert numpy.ma.is_masked(block)
+    assert m.placeholder == b"B"
+
+
+def test_DelayedMask_dense_type():
+    y = numpy.array([[1,2,10000],[4,5,6]])
+    m = dm.DelayedMask(y, 10000, dtype=numpy.dtype(numpy.int8))
+
+    block = delayedarray.extract_dense_array(m)
+    assert block.dtype == numpy.int8
+    assert numpy.ma.is_masked(block)
+    assert numpy.ma.is_masked(block[0,2])
+    assert block[0,0] == 1
+
+
+def test_DelayedMask_sparse():
+    contents = [None, (numpy.array([1, 8]), numpy.array([2, 10])), None, None, (numpy.array([0, 3, 9]), numpy.array([3, 4, 6]))]
+    y = delayedarray.SparseNdarray((10, 5), contents)
+
+    m = dm.DelayedMask(y, 4)
+    assert m.dtype == y.dtype
+    assert m.shape == y.shape
+    assert m.placeholder == 4
+    assert delayedarray.is_sparse(m)
+    assert delayedarray.chunk_shape(m) == (10, 1)
+
+    block = delayedarray.extract_sparse_array(m)
+    assert block.dtype == y.dtype
+    assert numpy.ma.is_masked(block.contents[4][1])
+    #assert numpy.ma.is_masked(block[3,4]) # TODO: fix SparseNdarray subsetting to support masked arrays. 
+    assert block[1,1] == 2
+
+
+def test_DelayedMask_dense_dask():
+    y = numpy.array([[1,2,10000],[4,5,6]])
+    m = dm.DelayedMask(y, 10000, dtype=numpy.dtype(numpy.int8))
+    da = delayedarray.create_dask_array(m).compute()
+    assert numpy.ma.is_masked(da)
+    assert numpy.ma.is_masked(da[0,2])
+    assert da[1,2] == 6
+
diff --git a/tests/test_ReloadedArray.py b/tests/test_ReloadedArray.py
new file mode 100644
index 0000000..8a30e63
--- /dev/null
+++ b/tests/test_ReloadedArray.py
@@ -0,0 +1,78 @@
+import numpy
+from dolomite_base import save_object
+import dolomite_matrix as dm
+import os
+import scipy.sparse
+import filebackedarray
+import delayedarray
+from tempfile import mkdtemp
+
+
+def test_ReloadedArray_basic():
+    y = numpy.random.rand(100, 200)
+    dir = os.path.join(mkdtemp(), "foobar")
+    save_object(y, dir)
+    roundtrip = dm.read_dense_array(dir)
+
+    assert roundtrip.shape == y.shape
+    assert numpy.issubdtype(roundtrip.dtype, numpy.floating)
+    assert isinstance(roundtrip, dm.ReloadedArray)
+    assert roundtrip.path == dir
+    assert isinstance(roundtrip.seed.seed, filebackedarray.Hdf5DenseArraySeed)
+
+    assert (delayedarray.extract_dense_array(roundtrip) == y).all()
+
+    # Checking re-saves.
+    dir2 = os.path.join(mkdtemp(), "foobar2")
+    save_object(roundtrip, dir2)
+    assert os.path.samefile(os.path.join(dir, "OBJECT"), os.path.join(dir2, "OBJECT"))
+    assert os.path.samefile(os.path.join(dir, "array.h5"), os.path.join(dir2, "array.h5"))
+
+    dir2 = os.path.join(mkdtemp(), "foobar2")
+    save_object(roundtrip, dir2, reloaded_array_reuse_mode="symlink")
+    assert os.readlink(os.path.join(dir2, "OBJECT")) == os.path.join(dir, "OBJECT")
+    assert os.readlink(os.path.join(dir2, "array.h5")) == os.path.join(dir, "array.h5")
+
+    # Copies along the tracer.
+    with open(os.path.join(dir, "_TRACER"), "w") as handle:
+        handle.write("YAY")
+    dir2 = os.path.join(mkdtemp(), "foobar2")
+    save_object(roundtrip, dir2, reloaded_array_reuse_mode="copy")
+    assert os.stat(os.path.join(dir2, "OBJECT")).st_size == os.stat(os.path.join(dir, "OBJECT")).st_size
+    assert os.stat(os.path.join(dir2, "array.h5")).st_size == os.stat(os.path.join(dir, "array.h5")).st_size
+    assert os.stat(os.path.join(dir2, "_TRACER")).st_size == os.stat(os.path.join(dir, "_TRACER")).st_size
+
+    # New save ignores the tracer.
+    dir2 = os.path.join(mkdtemp(), "foobar2")
+    save_object(roundtrip, dir2, reloaded_array_reuse_mode="none")
+    assert os.path.exists(os.path.join(dir2, "OBJECT"))
+    assert os.path.exists(os.path.join(dir2, "array.h5"))
+    assert not os.path.exists(os.path.join(dir2, "_TRACER"))
+
+
+def test_ReloadedArray_sparse():
+    y = (scipy.sparse.random(1000, 200, 0.1) > 0.5).tocsc()
+    dir = os.path.join(mkdtemp(),"foobar")
+    save_object(y, dir)
+    roundtrip = dm.read_compressed_sparse_matrix(dir)
+
+    assert roundtrip.shape == y.shape
+    assert roundtrip.dtype == numpy.bool_
+    assert isinstance(roundtrip, dm.ReloadedArray)
+    assert roundtrip.path == dir
+    assert isinstance(roundtrip.seed.seed, filebackedarray.Hdf5CompressedSparseMatrixSeed)
+
+    as_sparse = delayedarray.extract_sparse_array(roundtrip) 
+    assert isinstance(as_sparse, delayedarray.SparseNdarray)
+    assert (numpy.array(as_sparse) == y.toarray()).all()
+
+    # Checking re-saves.
+    dir2 = os.path.join(mkdtemp(), "foobar2")
+    save_object(roundtrip, dir2)
+    assert os.path.samefile(os.path.join(dir, "OBJECT"), os.path.join(dir2, "OBJECT"))
+    assert os.path.samefile(os.path.join(dir, "matrix.h5"), os.path.join(dir2, "matrix.h5"))
+
+    dir2 = os.path.join(mkdtemp(), "foobar2")
+    save_object(roundtrip, dir2, reloaded_array_reuse_mode="none")
+    assert os.path.exists(os.path.join(dir2, "OBJECT"))
+    assert os.path.exists(os.path.join(dir2, "matrix.h5"))
diff --git a/tests/test_choose_chunk_dimensions.py b/tests/test_choose_chunk_dimensions.py
new file mode 100644
index 0000000..2395bb7
--- /dev/null
+++ b/tests/test_choose_chunk_dimensions.py
@@ -0,0 +1,16 @@
+from dolomite_matrix import choose_chunk_dimensions
+
+
+def test_choose_chunk_dimensions():
+    assert choose_chunk_dimensions((1000, 100), 4, min_extent = 0, memory=8000) == (20, 2)
+    assert choose_chunk_dimensions((1000, 100), 4, min_extent = 10, memory=8000) == (20, 10)
+    assert choose_chunk_dimensions((1000, 100), 4, min_extent = 10, memory=80000) == (200, 20)
+    assert choose_chunk_dimensions((1000, 100), 4, min_extent = 0, memory=800000) == (1000, 100)
+    assert choose_chunk_dimensions((1000, 100), 8, min_extent = 0, memory=80000) == (100, 10)
+    assert choose_chunk_dimensions((1000, 100), 1, min_extent = 0, memory=1000) == (10, 1)
+
+
+def test_choose_chunk_dimensions_3d():
+    assert choose_chunk_dimensions((1000, 100, 10), 4, min_extent = 0, memory=400000) == (100, 10, 1)
+    assert choose_chunk_dimensions((1000, 100, 10), 1, min_extent = 0, memory=400000) == (400, 40, 4)
+    assert choose_chunk_dimensions((1000, 100, 10), 1, min_extent = 0, memory=1e8) == (1000, 100, 10)
diff --git a/tests/test_choose_dense_chunk_sizes.py b/tests/test_choose_dense_chunk_sizes.py
deleted file mode 100644
index 9460e12..0000000
--- a/tests/test_choose_dense_chunk_sizes.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from dolomite_matrix import choose_dense_chunk_sizes
-
-
-def test_choose_dense_chunk_sizes():
-    assert choose_dense_chunk_sizes((1000, 100), 4, min_extent = 0, memory=8000) == (20, 2)
-    assert choose_dense_chunk_sizes((1000, 100), 4, min_extent = 10, memory=8000) == (20, 10)
-    assert choose_dense_chunk_sizes((1000, 100), 4, min_extent = 10, memory=80000) == (200, 20)
-    assert choose_dense_chunk_sizes((1000, 100), 4, min_extent = 0, memory=800000) == (1000, 100)
-    assert choose_dense_chunk_sizes((1000, 100), 8, min_extent = 0, memory=80000) == (100, 10)
-    assert choose_dense_chunk_sizes((1000, 100), 1, min_extent = 0, memory=1000) == (10, 1)
-
-
-def test_choose_dense_chunk_sizes_3d():
-    assert choose_dense_chunk_sizes((1000, 100, 10), 4, min_extent = 0, memory=400000) == (100, 10, 1)
-    assert choose_dense_chunk_sizes((1000, 100, 10), 1, min_extent = 0, memory=400000) == (400, 40, 4)
-    assert choose_dense_chunk_sizes((1000, 100, 10), 1, min_extent = 0, memory=1e8) == (1000, 100, 10)
diff --git a/tests/test_compressed_sparse_matrix.py b/tests/test_compressed_sparse_matrix.py
new file mode 100644
index 0000000..778f337
--- /dev/null
+++ b/tests/test_compressed_sparse_matrix.py
@@ -0,0 +1,88 @@
+import scipy.sparse
+import dolomite_base as dl
+import dolomite_matrix as dm
+from tempfile import mkdtemp
+import numpy
+import delayedarray
+import filebackedarray
+import os
+
+
+def test_compressed_sparse_matrix_csc():
+    y = scipy.sparse.random(1000, 200, 0.1).tocsc()
+    dir = os.path.join(mkdtemp(),"foobar")
+    dl.save_object(y, dir)
+
+    roundtrip = dm.read_compressed_sparse_matrix(dir)
+    assert roundtrip.shape == y.shape
+    assert roundtrip.dtype == y.dtype
+    assert isinstance(roundtrip, dm.ReloadedArray)
+    assert isinstance(roundtrip.seed.seed, filebackedarray.Hdf5CompressedSparseMatrixSeed)
+    assert (numpy.array(roundtrip) == y.toarray()).all()
+
+
+def test_compressed_sparse_matrix_csr():
+    y = scipy.sparse.random(1000, 200, 0.1)
+    y = y.tocsr()
+    dir = os.path.join(mkdtemp(),"foobar")
+    dl.save_object(y, dir)
+
+    roundtrip = dm.read_compressed_sparse_matrix(dir)
+    assert roundtrip.shape == y.shape
+    assert numpy.issubdtype(roundtrip.dtype, numpy.floating)
+    assert (numpy.array(roundtrip) == y.toarray()).all()
+
+
+def test_compressed_sparse_matrix_coo():
+    y = scipy.sparse.random(1000, 200, 0.1)
+    y = y.tocoo()
+    dir = os.path.join(mkdtemp(),"foobar")
+    dl.save_object(y, dir)
+
+    roundtrip = dm.read_compressed_sparse_matrix(dir)
+    assert roundtrip.shape == y.shape
+    assert numpy.issubdtype(roundtrip.dtype, numpy.floating)
+    assert (numpy.array(roundtrip) == y.toarray()).all()
+
+
+def test_compressed_sparse_matrix_SparseNdarray():
+    y = delayedarray.SparseNdarray(
+        (10, 5),
+        [
+            None, 
+            (numpy.array([0, 8]), numpy.array([1, 20])), 
+            None, 
+            (numpy.array([2, 9]), numpy.array([0, 5000])), 
+            None
+        ]
+    )
+    dir = os.path.join(mkdtemp(),"foobar")
+    dl.save_object(y, dir)
+
+    roundtrip = dm.read_compressed_sparse_matrix(dir)
+    assert roundtrip.shape == y.shape
+    assert numpy.issubdtype(roundtrip.dtype, numpy.integer)
+    assert (numpy.array(roundtrip) == numpy.array(y)).all()
+
+
+def test_compressed_sparse_matrix_integer():
+    y = (scipy.sparse.random(1000, 200, 0.1) * 10).tocsc()
+    y = y.astype(numpy.int32)
+    dir = os.path.join(mkdtemp(),"foobar")
+    dl.save_object(y, dir)
+
+    roundtrip = dm.read_compressed_sparse_matrix(dir)
+    assert roundtrip.shape == y.shape
+    assert numpy.issubdtype(roundtrip.dtype, numpy.integer)
+    assert (numpy.array(roundtrip) == y.toarray()).all()
+
+
+def test_compressed_sparse_matrix_boolean():
+    y = (scipy.sparse.random(1000, 200, 0.1) > 0).tocsc()
+    dir = os.path.join(mkdtemp(),"foobar")
+    dl.save_object(y, dir)
+
+    roundtrip = dm.read_compressed_sparse_matrix(dir)
+    assert roundtrip.shape == y.shape
+    assert roundtrip.dtype == y.dtype
+    assert (numpy.array(roundtrip) == y.toarray()).all()
diff --git a/tests/test_delayed_array.py b/tests/test_delayed_array.py
new file mode 100644
index 0000000..bfef9da
--- /dev/null
+++ b/tests/test_delayed_array.py
@@ -0,0 +1,129 @@
+import numpy
+import dolomite_base as dl
+import dolomite_matrix as dm
+import delayedarray as da
+import os
+import h5py
+import filebackedarray
+from tempfile import mkdtemp
+import scipy.sparse
+
+
+def test_save_delayed_array_simple():
+    x = numpy.random.rand(100, 200)
+    y = da.wrap(x) + 1
+    assert isinstance(y, da.DelayedArray)
+
+    dir = os.path.join(mkdtemp(), "foobar")
+    dl.save_object(y, dir)
+
+    roundtrip = dm.read_dense_array(dir)
+    assert roundtrip.shape == y.shape
+    assert roundtrip.dtype == y.dtype
+    assert isinstance(roundtrip, dm.ReloadedArray) 
+    assert isinstance(roundtrip.seed.seed, filebackedarray.Hdf5DenseArraySeed)
+    assert (numpy.array(roundtrip) == x + 1).all()
+
+
+def test_save_delayed_array_booleans():
+    x1 = numpy.random.rand(100, 200) > 0
+    x2 = numpy.random.rand(100, 200) > 0
+    y = numpy.logical_and(da.wrap(x1), da.wrap(x2))
+    assert isinstance(y, da.DelayedArray)
+
+    dir = os.path.join(mkdtemp(), "foobar")
+    dl.save_object(y, dir)
+
+    roundtrip = dm.read_dense_array(dir)
+    assert roundtrip.shape == y.shape
+    assert roundtrip.dtype == numpy.bool_
+    assert (numpy.array(roundtrip) == numpy.logical_and(x1, x2)).all()
+
+
+########################################################
+########################################################
+
+
+class _ChunkyBoi:
+    def __init__(self, core, chunks):
+        self._core = core
+        self._chunks = chunks
+
+    @property
+    def dtype(self):
+        return self._core.dtype
+
+    @property
+    def shape(self):
+        return self._core.shape
+
+@da.extract_dense_array.register
+def extract_dense_array_ChunkyBoi(x: _ChunkyBoi, subsets):
+    return da.extract_dense_array(x._core, subsets)
+
+@da.chunk_shape.register
+def chunk_shape_ChunkyBoi(x: _ChunkyBoi):
+    return x._chunks
+
+
+def test_delayed_array_custom_chunks():
+    # Chunky boi (I)
+    x = numpy.random.rand(100, 20, 30)
+    y = da.wrap(_ChunkyBoi(x, (10, 10, 10)))
+    dir = os.path.join(mkdtemp(), "foobar")
+    dl.save_object(y, dir, dense_array_buffer_size=20 * 5000, dense_array_chunk_dimensions=(5, 20, 5))
+
+    roundtrip = dm.read_dense_array(dir)
+    assert roundtrip.shape == y.shape
+    assert roundtrip.dtype == y.dtype
+    assert (numpy.array(roundtrip) == x).all()
+
+    # Chunky boi (II)
+    y = da.wrap(_ChunkyBoi(x, (1, 1, x.shape[2])))
+    dir = os.path.join(mkdtemp(), "foobar2")
+    dl.save_object(y, dir, dense_array_buffer_size=20 * 5000, dense_array_chunk_dimensions=(5, 1, 10))
+
+    roundtrip = dm.read_dense_array(dir)
+    assert roundtrip.shape == y.shape
+    assert roundtrip.dtype == y.dtype
+    assert (numpy.array(roundtrip) == x).all()
+
+
+def test_delayed_array_low_block_size_C_contiguous():
+    x = numpy.random.rand(100, 200)
+    y = da.wrap(x) + 1
+    dir = os.path.join(mkdtemp(), "foobar")
+    dl.save_object(y, dir, dense_array_buffer_size=8 * 1000)
+    roundtrip = dm.read_dense_array(dir)
+    assert roundtrip.shape == y.shape
+    assert roundtrip.dtype == y.dtype
+    assert (numpy.array(roundtrip) == x + 1).all()
+
+
+def test_delayed_array_low_block_size_F_contiguous():
+    x = numpy.asfortranarray(numpy.random.rand(100, 200))
+    y = da.wrap(x) + 1
+    dir = os.path.join(mkdtemp(), "foobar")
+    dl.save_object(y, dir, dense_array_buffer_size=8 * 1000)
+    roundtrip = dm.read_dense_array(dir)
+    assert roundtrip.shape == y.shape
+    assert roundtrip.dtype == y.dtype
+    assert (numpy.array(roundtrip) == x + 1).all()
+
+
+########################################################
+########################################################
+
+
+def test_delayed_array_sparse():
+    x = scipy.sparse.random(1000, 200, 0.1).tocsc()
+    y = da.wrap(x) * 10
+
+    dir = os.path.join(mkdtemp(), "foobar")
+    dl.save_object(y, dir)
+    roundtrip = dm.read_compressed_sparse_matrix(dir)
+    assert roundtrip.shape == y.shape
+    assert roundtrip.dtype == y.dtype
+    assert isinstance(roundtrip, dm.ReloadedArray)
+    assert isinstance(roundtrip.seed.seed, filebackedarray.Hdf5CompressedSparseMatrixSeed)
+    assert (numpy.array(roundtrip) == x.toarray() * 10).all()
diff --git a/tests/test_dense_array.py b/tests/test_dense_array.py
new file mode 100644
index 0000000..2f9cf53
--- /dev/null
+++ b/tests/test_dense_array.py
@@ -0,0 +1,101 @@
+import numpy
+import random
+from dolomite_base import save_object
+import dolomite_matrix as dm
+import os
+import h5py
+import filebackedarray
+import delayedarray
+from tempfile import mkdtemp
+
+
+def test_dense_array_number():
+    y = numpy.random.rand(100, 200)
+    dir = os.path.join(mkdtemp(), "foobar")
+    save_object(y, dir)
+    roundtrip = dm.read_dense_array(dir)
+    assert roundtrip.shape == y.shape
+    assert numpy.issubdtype(roundtrip.dtype, numpy.floating)
+    assert isinstance(roundtrip, dm.ReloadedArray)
+    assert isinstance(roundtrip.seed.seed, filebackedarray.Hdf5DenseArraySeed)
+    assert (numpy.array(roundtrip) == y).all()
+
+
+def test_dense_array_integer():
+    y = numpy.random.rand(150, 250) * 10
+    y = y.astype(numpy.int32)
+    dir = os.path.join(mkdtemp(), "foobar")
+    save_object(y, dir)
+    roundtrip = dm.read_dense_array(dir)
+    assert roundtrip.shape == y.shape
+    assert numpy.issubdtype(roundtrip.dtype, numpy.integer)
+    assert (numpy.array(roundtrip) == y).all()
+
+
+def test_dense_array_boolean():
+    y = numpy.random.rand(99, 75) > 0
+    dir = os.path.join(mkdtemp(), "foobar")
+    save_object(y, dir)
+    roundtrip = dm.read_dense_array(dir)
+    assert roundtrip.shape == y.shape
+    assert roundtrip.dtype == numpy.bool_
+    assert (numpy.array(roundtrip) == y).all()
+
+
+def test_dense_array_string():
+    y = numpy.array(["Sumire", "Kanon", "Chisato", "Ren", "Keke"])
+    dir = os.path.join(mkdtemp(), "foobar")
+    save_object(y, dir)
+    roundtrip = dm.read_dense_array(dir)
+    assert roundtrip.shape == y.shape
+    assert numpy.issubdtype(roundtrip.dtype, numpy.str_)
+    assert (numpy.array(roundtrip) == y).all()
+
+
+########################################################
+########################################################
+
+
+#def test_dense_array_number_mask():
+#    y0 = numpy.random.rand(100, 200)
+#    mask = y0 > 0.9
+#    y = numpy.ma.MaskedArray(y0, mask=mask)
+#
+#    dir = os.path.join(mkdtemp(), "foobar")
+#    save_object(y, dir)
+#    roundtrip = dm.read_dense_array(dir)
+#    assert roundtrip.shape == y.shape
+#    assert numpy.issubdtype(roundtrip.dtype, numpy.floating)
+#
+#    dump = delayedarray.extract_dense_array(roundtrip)
+#    assert (dump.mask == mask).all()
+#    assert (dump == y).all()
+
+
+########################################################
+########################################################
+
+
+def test_dense_array_F_contiguous():
+    x = numpy.asfortranarray(numpy.random.rand(100, 200))
+    dir = os.path.join(mkdtemp(), "foobar")
+    save_object(x, dir)
+    roundtrip = dm.read_dense_array(dir)
+    assert roundtrip.shape == x.shape
+    assert roundtrip.dtype == x.dtype
+    assert (numpy.array(roundtrip) == x).all()
+
+
+def test_dense_array_block_size():
+    x = numpy.ndarray([100, 200], dtype="U1")
+    choices = "ABCDE"
+    for i in range(x.shape[0]):
+        for j in range(x.shape[1]):
+            x[i,j] = random.choice(choices)
+
+    dir = os.path.join(mkdtemp(), "foobar")
+    save_object(x, dir, dense_array_buffer_size= x.dtype.itemsize * 1000)
+    roundtrip = dm.read_dense_array(dir)
+    assert roundtrip.shape == x.shape
+    assert roundtrip.dtype == x.dtype
+    assert (numpy.array(roundtrip) == x).all()
diff --git a/tests/test_optimize_storage.py b/tests/test_optimize_storage.py
new file mode 100644
index 0000000..a2e978b
--- /dev/null
+++ b/tests/test_optimize_storage.py
@@ -0,0 +1,660 @@
+import dolomite_matrix._optimize_storage as optim
+import numpy
+import delayedarray
+
+
+###################################################
+###################################################
+
+
+def test_optimize_integer_storage_dense():
+    # Unsigned integers
+    y = numpy.array([1,2,3])
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "u1"
+    assert opt.placeholder is None
+
+    y = numpy.array([1,2,300])
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "u2"
+    assert opt.placeholder is None
+
+    y = numpy.array([1,2,300000])
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "i4"
+    assert opt.placeholder is None
+
+    y = numpy.array([1,2,3000000000])
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "f8"
+    assert opt.placeholder is None
+
+    # Signed integers
+    y = numpy.array([-1,2,3])
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "i1"
+    assert opt.placeholder is None
+
+    y = numpy.array([-1,2,200])
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "i2"
+    assert opt.placeholder is None
+
+    y = numpy.array([-1,2,200000])
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "i4"
+    assert opt.placeholder is None
+
+    y = numpy.array([-1,2,-20000000000])
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "f8"
+    assert opt.placeholder is None
+
+    # Empty
+    y = numpy.array([], dtype=numpy.int32)
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "i1"
+    assert opt.placeholder is None
+
+
+def test_optimize_integer_storage_dense_MaskedArray():
+    # Unsigned integers
+    y = numpy.ma.MaskedArray(numpy.array([1,2,3]), mask=[True, False, False])
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "u1"
+    assert opt.placeholder == 2**8 - 1
+
+    y = numpy.ma.MaskedArray(numpy.array([1,2,300]), mask=[True, False, False])
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "u2"
+    assert opt.placeholder == 2**16 - 1
+
+    y = numpy.ma.MaskedArray(numpy.array([1,2,3000000]), mask=[True, False, False])
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "i4"
+    assert opt.placeholder == 2**31 - 1
+
+    y = numpy.ma.MaskedArray(numpy.array([1,2,3000000000]), mask=[True, False, False])
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "f8"
+    assert numpy.isnan(opt.placeholder)
+
+    # Signed integers
+    y = numpy.ma.MaskedArray(numpy.array([-1,2,3]), mask=[False, True, False])
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "i1"
+    assert opt.placeholder == -2**7
+
+    y = numpy.ma.MaskedArray(numpy.array([-1,2,200]), mask=[False, True, False])
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "i2"
+    assert opt.placeholder == -2**15
+
+    y = numpy.ma.MaskedArray(numpy.array([-1,2,200000]), mask=[False, True, False])
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "i4"
+    assert opt.placeholder == -2**31
+
+    y = numpy.ma.MaskedArray(numpy.array([-1,2,200000000000]), mask=[False, True, False])
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "f8"
+    assert numpy.isnan(opt.placeholder)
+
+    # Masked large values.
+    y = numpy.ma.MaskedArray(numpy.array([1000,2,3]), mask=[True, False, False])
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "u1"
+    assert opt.placeholder == 2**8 - 1
+
+    # Masked but no op.
+    y = numpy.ma.MaskedArray(numpy.array([1000,2,3]), mask=False)
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "u2"
+    assert opt.placeholder is None
+
+    # Fully masked.
+    y = numpy.ma.MaskedArray([1,2,3], mask=True)
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "i1"
+    assert opt.placeholder == -128
+
+
+def test_optimize_integer_storage_Sparse2darray():
+    y = delayedarray.SparseNdarray([10, 5], None, dtype=numpy.int32, index_dtype=numpy.int8)
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "i1"
+    assert opt.placeholder is None
+
+    y = delayedarray.SparseNdarray(
+        [10, 5],
+        [
+            None, 
+            (numpy.array([0, 8]), numpy.array([1, 20])), 
+            None, 
+            (numpy.array([2, 9]), numpy.array([0, 5000])), 
+            None
+        ]
+    )
+
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "u2"
+    assert opt.placeholder is None
+    assert opt.non_zero == 4
+
+    y = delayedarray.SparseNdarray(
+        [10, 5],
+        [
+            None, 
+            (numpy.array([0, 8]), numpy.ma.MaskedArray(numpy.array([1, 20]), mask=True)), 
+            None, 
+            (numpy.array([1, 7, 9]), numpy.ma.MaskedArray(numpy.array([-1, -1000, 500000]), mask=False)), 
+            None
+        ]
+    )
+
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "i4"
+    assert opt.placeholder == -2**31
+    assert opt.non_zero == 5
+
+
+def test_optimize_integer_storage_scipy():
+    import scipy
+    y = scipy.sparse.coo_matrix(
+        (
+            [1,-200,3,-4,500],
+            (
+                [1,2,3,4,5],
+                [1,2,3,4,5]
+            )
+        ), 
+        [10, 10]
+    )
+
+    opt = optim.optimize_integer_storage(y)
+    assert opt.type == "i2"
+    assert opt.placeholder is None
+    assert opt.non_zero == 5
+
+    opt = optim.optimize_integer_storage(y.tocsc())
+    assert opt.type == "i2"
+    assert opt.placeholder is None
+    assert opt.non_zero == 5
+
+    opt = optim.optimize_integer_storage(y.tocsr())
+    assert opt.type == "i2"
+    assert opt.placeholder is None
+    assert opt.non_zero == 5
+
+
+def test_optimize_integer_storage_Any():
+    y = delayedarray.DelayedArray(numpy.array([[1,2,3],[4,5,6]]))
+    opt = optim.optimize_integer_storage(y * 200000)
+    assert opt.type == "i4"
+    assert opt.placeholder is None
+
+    y = delayedarray.SparseNdarray(
+        [10, 5],
+        [
+            None, 
+            (numpy.array([0, 8]), numpy.array([1, 20])), 
+            None, 
+            (numpy.array([2, 9]), numpy.array([0, 5000])), 
+            None
+        ]
+    )
+    y = delayedarray.DelayedArray(y)
+    opt = optim.optimize_integer_storage(y * 2)
+    assert opt.type == "u2"
+    assert opt.placeholder is None
+
+
+###################################################
+###################################################
+
+
+def test_optimize_float_storage_dense():
+    # Unsigned integers.
+    y = numpy.array([1.0,2,3])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "u1"
+    assert opt.placeholder is None
+
+    y = numpy.array([1.0,2,300])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "u2"
+    assert opt.placeholder is None
+
+    y = numpy.array([1.0,2,300000])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "u4"
+    assert opt.placeholder is None
+
+    y = numpy.array([1.0,2,30000000000])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "f8"
+    assert opt.placeholder is None
+
+    # Signed integers.
+    y = numpy.array([-1.0,2,3])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "i1"
+    assert opt.placeholder is None
+
+    y = numpy.array([-1.0,2,200])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "i2"
+    assert opt.placeholder is None
+
+    y = numpy.array([-1.0,2,200000])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "i4"
+    assert opt.placeholder is None
+
+    y = numpy.array([-1.0,2,-20000000000])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "f8"
+    assert opt.placeholder is None
+
+    # Empty
+    y = numpy.array([], dtype=numpy.float64)
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "i1"
+    assert opt.placeholder is None
+
+    # Actual floating point values.
+    y = numpy.array([-1.5,2,3])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "f8"
+    assert opt.placeholder is None
+
+    y = numpy.array([numpy.NaN,2,3])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "f8"
+    assert opt.placeholder is None
+
+    y = numpy.array([numpy.inf,2,3])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "f8"
+    assert opt.placeholder is None
+
+    # 32-bit floating point values.
+    y = numpy.array([-1.5,2,3], dtype=numpy.float32)
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "f4"
+    assert opt.placeholder is None
+
+
+def test_optimize_float_storage_dense_MaskedArray():
+    # Unsigned floats
+    y = numpy.ma.MaskedArray(numpy.array([1.0,2,3]), mask=[True, False, False])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "u1"
+    assert opt.placeholder == 2**8 - 1
+
+    y = numpy.ma.MaskedArray(numpy.array([1.0,2,300]), mask=[True, False, False])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "u2"
+    assert opt.placeholder == 2**16 - 1
+
+    y = numpy.ma.MaskedArray(numpy.array([1.0,2,3000000]), mask=[True, False, False])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "u4"
+    assert opt.placeholder == 2**32 - 1
+
+    y = numpy.ma.MaskedArray(numpy.array([1.0,2,30000000000]), mask=[True, False, False])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "f8"
+    assert numpy.isnan(opt.placeholder)
+
+    # Signed floats
+    y = numpy.ma.MaskedArray(numpy.array([-1.0,2,3]), mask=[False, True, False])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "i1"
+    assert opt.placeholder == -2**7
+
+    y = numpy.ma.MaskedArray(numpy.array([-1.0,2,200]), mask=[False, True, False])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "i2"
+    assert opt.placeholder == -2**15
+
+    y = numpy.ma.MaskedArray(numpy.array([-1.0,2,200000]), mask=[False, True, False])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "i4"
+    assert opt.placeholder == -2**31
+
+    y = numpy.ma.MaskedArray(numpy.array([-1.0,2,200000000000]), mask=[False, True, False])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "f8"
+    assert numpy.isnan(opt.placeholder)
+
+    # Masked large values.
+    y = numpy.ma.MaskedArray(numpy.array([1000.0,2,3]), mask=[True, False, False])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "u1"
+    assert opt.placeholder == 2**8 - 1
+
+    # Masked but no op.
+    y = numpy.ma.MaskedArray(numpy.array([1000.0,2,3]), mask=False)
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "u2"
+    assert opt.placeholder is None
+
+    # Fully masked.
+    y = numpy.ma.MaskedArray([1.0,2,3], mask=True)
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "i1"
+    assert opt.placeholder == -128
+
+    # Actual floating point values.
+    y = numpy.ma.MaskedArray([-1.5,2,3], mask=[False, True, False])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "f8"
+    assert numpy.isnan(opt.placeholder)
+
+    y = numpy.ma.MaskedArray([numpy.NaN,2,3], mask=[False, True, False])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "f8"
+    assert opt.placeholder == numpy.inf
+
+    y = numpy.ma.MaskedArray([numpy.NaN,2,numpy.inf], mask=[False, True, False])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "f8"
+    assert opt.placeholder == -numpy.inf
+
+    fstats = numpy.finfo(numpy.float64)
+    y = numpy.ma.MaskedArray([numpy.NaN, 2, numpy.inf, -numpy.inf], mask=[False, True, False, False])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "f8"
+    assert opt.placeholder == fstats.min
+
+    y = numpy.ma.MaskedArray([numpy.NaN, 2, numpy.inf, -numpy.inf, fstats.min], mask=[False, True, False, False, False])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "f8"
+    assert opt.placeholder == fstats.max
+
+    y = numpy.ma.MaskedArray([numpy.NaN, 2, numpy.inf, -numpy.inf, fstats.min, fstats.max], mask=[False, True, False, False, False, False])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "f8"
+    assert opt.placeholder == 0
+
+    y = numpy.ma.MaskedArray([numpy.NaN, 2, numpy.inf, -numpy.inf, fstats.min, fstats.max, 0], mask=[False, True, False, False, False, False, False])
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "f8"
+    assert opt.placeholder == fstats.min / 2 
+
+    # 32-bit floating point values.
+    y = numpy.ma.MaskedArray([-1.5,2.2,3], mask=[True, False, False], dtype=numpy.float32)
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "f4"
+    assert numpy.isnan(opt.placeholder)
+
+
+def test_optimize_float_storage_Sparse2darray():
+    y = delayedarray.SparseNdarray([10, 5], None, dtype=numpy.float32, index_dtype=numpy.int8)
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "i1"
+    assert opt.placeholder is None
+
+    y = delayedarray.SparseNdarray(
+        [10, 5],
+        [
+            None, 
+            (numpy.array([0, 8]), numpy.array([1.0, 20])), 
+            None, 
+            (numpy.array([2, 9]), numpy.array([0, 5000.5])), 
+            None
+        ]
+    )
+
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "f8"
+    assert opt.placeholder is None
+    assert opt.non_zero == 4
+
+    y = delayedarray.SparseNdarray(
+        [10, 5],
+        [
+            None, 
+            (numpy.array([0, 8]), numpy.ma.MaskedArray(numpy.array([1, 2.0]), mask=True)), 
+            None, 
+            (numpy.array([1, 7, 9]), numpy.ma.MaskedArray(numpy.array([-1.0, -1000, 500000]), mask=False)), 
+            None
+        ]
+    )
+
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "i4"
+    assert opt.placeholder == -2**31
+    assert opt.non_zero == 5
+
+
+def test_optimize_float_storage_scipy():
+    import scipy
+    y = scipy.sparse.coo_matrix(
+        (
+            [1.0,-200.0,3,-4,500],
+            (
+                [1,2,3,4,5],
+                [1,2,3,4,5]
+            )
+        ), 
+        [10, 10]
+    )
+    assert y.dtype == numpy.float64
+
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "i2"
+    assert opt.placeholder is None
+    assert opt.non_zero == 5
+
+    opt = optim.optimize_float_storage(y.tocsc())
+    assert opt.type == "i2"
+    assert opt.placeholder is None
+    assert opt.non_zero == 5
+
+    opt = optim.optimize_float_storage(y.tocsr())
+    assert opt.type == "i2"
+    assert opt.placeholder is None
+    assert opt.non_zero == 5
+
+
+def test_optimize_float_storage_Any():
+    y = delayedarray.DelayedArray(numpy.array([[1,2,3],[4,5,6]]))
+    y = y * 20000.000
+    assert y.dtype == numpy.float64
+
+    opt = optim.optimize_float_storage(y)
+    assert opt.type == "u4"
+    assert opt.placeholder is None
+
+    y = delayedarray.SparseNdarray(
+        [10, 5],
+        [
+            None, 
+            (numpy.array([0, 8]), numpy.array([1, 2.0])), 
+            None, 
+            (numpy.array([2, 9]), numpy.array([0, 500.0])), 
+            None
+        ]
+    )
+    y = delayedarray.DelayedArray(y)
+    opt = optim.optimize_float_storage(y * 2)
+    assert opt.type == "u2"
+    assert opt.placeholder is None
+
+
+###################################################
+###################################################
+
+
+def test_optimize_string_storage_dense():
+    y = numpy.array(["A","BB","CCC"])
+    opt = optim.optimize_string_storage(y)
+    assert opt.type == "S3"
+    assert opt.placeholder is None
+
+    # Empty
+    y = numpy.array([], dtype=numpy.str_)
+    opt = optim.optimize_string_storage(y)
+    assert opt.type == "S1"
+    assert opt.placeholder is None
+
+
+def test_optimize_string_storage_dense_MaskedArray():
+    y = numpy.ma.MaskedArray(["A","BB","CCC"], mask=[True,False,False])
+    opt = optim.optimize_string_storage(y)
+    assert opt.type == "S3"
+    assert opt.placeholder == "NA"
+
+    y = numpy.ma.MaskedArray(["A","NA","CCC"], mask=[True,False,False])
+    opt = optim.optimize_string_storage(y)
+    assert opt.type == "S3"
+    assert opt.placeholder == "NA_"
+
+    y = numpy.ma.MaskedArray(["A","NA","NA_","CCC"], mask=[True,False,False,False])
+    opt = optim.optimize_string_storage(y)
+    assert opt.type == "S4"
+    assert opt.placeholder == "NA__"
+
+    # All masked.
+    y = numpy.ma.MaskedArray(["A","BB","CCC"], mask=[True,False,False])
+    opt = optim.optimize_string_storage(y)
+    assert opt.type == "S3"
+    assert opt.placeholder == "NA"
+
+
+def test_optimize_string_storage_Any():
+    y = delayedarray.DelayedArray(numpy.array([["A","BB","CCC"],["DDDD","EEEEE","FFFFFF"]]))
+    opt = optim.optimize_string_storage(y)
+    assert opt.type == "S6"
+    assert opt.placeholder is None
+
+
+###################################################
+###################################################
+
+
+def test_optimize_boolean_storage_dense():
+    y = numpy.array([True,False,True])
+    opt = optim.optimize_boolean_storage(y)
+    assert opt.type == "i1"
+    assert opt.placeholder is None
+
+    # Empty
+    y = numpy.array([], dtype=numpy.bool_)
+    opt = optim.optimize_boolean_storage(y)
+    assert opt.type == "i1"
+    assert opt.placeholder is None
+
+
+def test_optimize_boolean_storage_dense_MaskedArray():
+    # Unsigned booleans
+    y = numpy.ma.MaskedArray(numpy.array([True,False,True]), mask=[True, False, False])
+    opt = optim.optimize_boolean_storage(y)
+    assert opt.type == "i1"
+    assert opt.placeholder == -1
+
+    # Masked but no op.
+    y = numpy.ma.MaskedArray(numpy.array([True,False,True]), mask=False)
+    opt = optim.optimize_boolean_storage(y)
+    assert opt.type == "i1"
+    assert opt.placeholder is None
+
+    # Fully masked.
+    y = numpy.ma.MaskedArray([True,False,False], mask=True)
+    opt = optim.optimize_boolean_storage(y)
+    assert opt.type == "i1"
+    assert opt.placeholder == -1
+
+
+def test_optimize_boolean_storage_Sparse2darray():
+    y = delayedarray.SparseNdarray([10, 5], None, dtype=numpy.bool_, index_dtype=numpy.int8)
+    opt = optim.optimize_boolean_storage(y)
+    assert opt.type == "i1"
+    assert opt.placeholder is None
+
+    y = delayedarray.SparseNdarray(
+        [10, 5],
+        [
+            None, 
+            (numpy.array([0, 8]), numpy.array([True, False])), 
+            None, 
+            (numpy.array([2, 9]), numpy.array([False, True])), 
+            None
+        ]
+    )
+
+    opt = optim.optimize_boolean_storage(y)
+    assert opt.type == "i1"
+    assert opt.placeholder is None
+    assert opt.non_zero == 4
+
+    y = delayedarray.SparseNdarray(
+        [10, 5],
+        [
+            None, 
+            (numpy.array([0, 8]), numpy.ma.MaskedArray(numpy.array([True, False]), mask=True)), 
+            None, 
+            (numpy.array([1, 7, 9]), numpy.ma.MaskedArray(numpy.array([False, False, True]), mask=False)), 
+            None
+        ]
+    )
+
+    opt = optim.optimize_boolean_storage(y)
+    assert opt.type == "i1"
+    assert opt.placeholder == -1
+    assert opt.non_zero == 5
+
+
+def test_optimize_boolean_storage_scipy():
+    import scipy
+    y = scipy.sparse.coo_matrix(
+        (
+            [True,False,True,False,True],
+            (
+                [1,2,3,4,5],
+                [1,2,3,4,5]
+            )
+        ), 
+        [10, 10]
+    )
+
+    opt = optim.optimize_boolean_storage(y)
+    assert opt.type == "i1"
+    assert opt.placeholder is None
+    assert opt.non_zero == 5
+
+    opt = optim.optimize_boolean_storage(y.tocsc())
+    assert opt.type == "i1"
+    assert opt.placeholder is None
+    assert opt.non_zero == 5
+
+    opt = optim.optimize_boolean_storage(y.tocsr())
+    assert opt.type == "i1"
+    assert opt.placeholder is None
+    assert opt.non_zero == 5
+
+
+def test_optimize_boolean_storage_Any():
+    y = delayedarray.DelayedArray(numpy.array([[True,False,True],[False,True,False]]))
+    opt = optim.optimize_boolean_storage(y)
+    assert opt.type == "i1"
+    assert opt.placeholder is None
+
+    y = delayedarray.SparseNdarray(
+        [10, 5],
+        [
+            None, 
+            (numpy.array([0, 8]), numpy.array([True, False])), 
+            None, 
+            (numpy.array([2, 9]), numpy.array([False, True])), 
+            None
+        ]
+    )
+    y = delayedarray.DelayedArray(y)
+    opt = optim.optimize_boolean_storage(y)
+    assert opt.type == "i1"
+    assert opt.placeholder is None
+
+
diff --git a/tests/test_stage_DelayedArray.py b/tests/test_stage_DelayedArray.py
deleted file mode 100644
index 2878de8..0000000
--- a/tests/test_stage_DelayedArray.py
+++ /dev/null
@@ -1,178 +0,0 @@
-import numpy
-from dolomite_base import stage_object, write_metadata, load_object
-from delayedarray import wrap
-import delayedarray as da
-import dolomite_matrix
-from dolomite_matrix._utils import _choose_block_shape
-import os
-import h5py
-import filebackedarray
-from tempfile import mkdtemp
-import scipy.sparse
-
-
-def test_stage_DelayedArray_simple():
-    x = numpy.random.rand(100, 200)
-    y = wrap(x) + 1
-    assert isinstance(y, da.DelayedArray)
-
-    dir = mkdtemp()
-    meta = stage_object(y, dir=dir, path="foobar")
-    write_metadata(meta, dir=dir)
-    assert meta["array"]["type"] == "number"
-
-    fpath = os.path.join(dir, meta["path"])
-    handle = h5py.File(fpath, "r")
-    dset = handle[meta["hdf5_dense_array"]["dataset"]]
-
-    copy = dset[:].T
-    assert copy.dtype == y.dtype
-    assert (copy == x + 1).all()
-
-    roundtrip = load_object(meta, dir)
-    assert roundtrip.shape == y.shape
-    assert roundtrip.dtype == y.dtype
-    assert isinstance(roundtrip, filebackedarray.Hdf5DenseArray)
-    assert (numpy.array(roundtrip) == x + 1).all()
-
-
-def test_stage_DelayedArray_booleans():
-    x1 = numpy.random.rand(100, 200) > 0
-    x2 = numpy.random.rand(100, 200) > 0
-    y = numpy.logical_and(wrap(x1), wrap(x2))
-    assert isinstance(y, da.DelayedArray)
-
-    dir = mkdtemp()
-    meta = stage_object(y, dir=dir, path="foobar")
-    write_metadata(meta, dir=dir)
-    assert meta["array"]["type"] == "boolean"
-
-    roundtrip = load_object(meta, dir)
-    assert roundtrip.shape == y.shape
-    assert roundtrip.dtype == numpy.bool_
-    assert isinstance(roundtrip, filebackedarray.Hdf5DenseArray)
-    assert (numpy.array(roundtrip) == numpy.logical_and(x1, x2)).all()
-
-
-########################################################
-########################################################
-
-
-class _ChunkyBoi:
-    def __init__(self, core, chunks):
-        self._core = core
-        self._chunks = chunks
-
-    @property
-    def dtype(self):
-        return self._core.dtype
-
-    @property
-    def shape(self):
-        return self._core.shape
-
-@da.extract_dense_array.register
-def extract_dense_array_ChunkyBoi(x: _ChunkyBoi, subsets):
-    return da.extract_dense_array(x._core, subsets)
-
-@da.chunk_shape.register
-def chunk_shape_ChunkyBoi(x: _ChunkyBoi):
-    return x._chunks
-
-
-########################################################
-########################################################
-
-
-def test_stage_DelayedArray_choose_block_shape():
-    y = _ChunkyBoi(numpy.random.rand(100, 200), (10, 10))
-    assert _choose_block_shape(y, 2000 * 8) == (10, 200)
-    assert _choose_block_shape(y, 5000 * 8) == (20, 200)
-
-    y = _ChunkyBoi(numpy.random.rand(100, 200), (100, 10))
-    assert _choose_block_shape(y, 2000 * 8) == (100, 20)
-    assert _choose_block_shape(y, 5000 * 8) == (100, 50)
-
-    y = _ChunkyBoi(numpy.random.rand(100, 200, 300), (10, 10, 10))
-    assert _choose_block_shape(y, 2000 * 8) == (10, 10, 20)
-
-    y = _ChunkyBoi(numpy.random.rand(100, 200, 300), (1, 1, 300))
-    assert _choose_block_shape(y, 5000 * 8) == (1, 16, 300)
-
-
-def test_stage_DelayedArray_low_block_size_C_contiguous():
-    x = numpy.random.rand(100, 200)
-    y = wrap(x) + 1
-
-    dir = mkdtemp()
-    meta = stage_object(y, dir=dir, path="foobar", block_size=8000)
-
-    fpath = os.path.join(dir, meta["path"])
-    handle = h5py.File(fpath, "r")
-    dset = handle[meta["hdf5_dense_array"]["dataset"]]
-
-    roundtrip = load_object(meta, dir)
-    assert roundtrip.shape == y.shape
-    assert roundtrip.dtype == y.dtype
-    assert isinstance(roundtrip, filebackedarray.Hdf5DenseArray)
-    assert (numpy.array(roundtrip) == x + 1).all()
-
-
-def test_stage_DelayedArray_low_block_size_F_contiguous():
-    x = numpy.asfortranarray(numpy.random.rand(100, 200))
-    y = wrap(x) + 1
-
-    dir = mkdtemp()
-    meta = stage_object(y, dir=dir, path="foobar", block_size=8000)
-
-    roundtrip = load_object(meta, dir)
-    assert roundtrip.shape == y.shape
-    assert roundtrip.dtype == y.dtype
-    assert isinstance(roundtrip, filebackedarray.Hdf5DenseArray)
-    assert (numpy.array(roundtrip) == x + 1).all()
-
-
-def test_stage_DelayedArray_custom_chunks():
-    # Chunky boi (I)
-    x = numpy.random.rand(100, 200, 300)
-
-    y = wrap(_ChunkyBoi(x, (10, 10, 10)))
-    dir = mkdtemp()
-    meta = stage_object(y, dir=dir, path="foobar", block_size=8 * 5000)
-
-    roundtrip = load_object(meta, dir)
-    assert roundtrip.shape == y.shape
-    assert roundtrip.dtype == y.dtype
-    assert isinstance(roundtrip, filebackedarray.Hdf5DenseArray)
-    assert (numpy.array(roundtrip) == x).all()
-
-    # Chunky boi (II)
-    y = wrap(_ChunkyBoi(x, (1, 1, x.shape[2])))
-    dir = mkdtemp()
-    meta = stage_object(y, dir=dir, path="foobar", block_size=8 * 5000)
-
-    roundtrip = load_object(meta, dir)
-    assert roundtrip.shape == y.shape
-    assert roundtrip.dtype == y.dtype
-    assert isinstance(roundtrip, filebackedarray.Hdf5DenseArray)
-    assert (numpy.array(roundtrip) == x).all()
-
-
-########################################################
-########################################################
-
-
-def test_stage_DelayedArray_sparse():
-    x = scipy.sparse.random(1000, 200, 0.1).tocsc()
-    y = wrap(x) * 10
-
-    dir = mkdtemp()
-    meta = stage_object(y, dir=dir, path="foobar")
-    write_metadata(meta, dir=dir)
-    assert meta["array"]["type"] == "number"
-
-    roundtrip = load_object(meta, dir)
-    assert roundtrip.shape == y.shape
-    assert roundtrip.dtype == y.dtype
-    assert isinstance(roundtrip, filebackedarray.Hdf5CompressedSparseMatrix)
-    assert (numpy.array(roundtrip) == x.toarray() * 10).all()
diff --git a/tests/test_stage_ndarray.py b/tests/test_stage_ndarray.py
deleted file mode 100644
index 80c150a..0000000
--- a/tests/test_stage_ndarray.py
+++ /dev/null
@@ -1,77 +0,0 @@
-import numpy
-from dolomite_base import stage_object, write_metadata, load_object
-import dolomite_matrix
-import os
-import h5py
-import filebackedarray
-from tempfile import mkdtemp
-
-
-def test_stage_ndarray_number():
-    y = numpy.random.rand(100, 200)
-
-    dir = mkdtemp()
-    meta = stage_object(y, dir=dir, path="foobar")
-    write_metadata(meta, dir=dir)
-    assert meta["array"]["type"] == "number"
-
-    fpath = os.path.join(dir, meta["path"])
-    handle = h5py.File(fpath, "r")
-    dset = handle[meta["hdf5_dense_array"]["dataset"]]
-
-    assert dset.shape[0] == 200 # transposed, as expected.
-    assert dset.shape[1] == 100
-    assert dset.dtype == numpy.float64
-
-    roundtrip = load_object(meta, dir)
-    assert roundtrip.shape == y.shape
-    assert roundtrip.dtype == y.dtype
-    assert isinstance(roundtrip, filebackedarray.Hdf5DenseArray)
-    assert (numpy.array(roundtrip) == y).all()
-
-
-def test_stage_ndarray_integer():
-    y = numpy.random.rand(150, 250) * 10
-    y = y.astype(numpy.int32)
-
-    dir = mkdtemp()
-    meta = stage_object(y, dir=dir, path="foobar")
-    write_metadata(meta, dir=dir)
-    assert meta["array"]["type"] == "integer"
-
-    fpath = os.path.join(dir, meta["path"])
-    handle = h5py.File(fpath, "r")
-    dset = handle[meta["hdf5_dense_array"]["dataset"]]
-
-    assert dset.shape[0] == 250 # transposed, as expected.
-    assert dset.shape[1] == 150
-    assert dset.dtype == numpy.int32
-
-    roundtrip = load_object(meta, dir)
-    assert roundtrip.shape == y.shape
-    assert roundtrip.dtype == y.dtype
-    assert isinstance(roundtrip, filebackedarray.Hdf5DenseArray)
-    assert (numpy.array(roundtrip) == y).all()
-
-
-def test_stage_ndarray_boolean():
-    y = numpy.random.rand(99, 75) > 0
-
-    dir = mkdtemp()
-    meta = stage_object(y, dir=dir, path="foobar")
-    write_metadata(meta, dir=dir)
-    assert meta["array"]["type"] == "boolean"
-
-    fpath = os.path.join(dir, meta["path"])
-    handle = h5py.File(fpath, "r")
-    dset = handle[meta["hdf5_dense_array"]["dataset"]]
-
-    assert dset.shape[0] == 75 # transposed, as expected.
-    assert dset.shape[1] == 99 
-    assert dset.dtype == numpy.uint8
-
-    roundtrip = load_object(meta, dir)
-    assert roundtrip.shape == y.shape
-    assert roundtrip.dtype == y.dtype
-    assert isinstance(roundtrip, filebackedarray.Hdf5DenseArray)
-    assert (numpy.array(roundtrip) == y).all()
diff --git a/tests/test_stage_sparse.py b/tests/test_stage_sparse.py
deleted file mode 100644
index 67fc709..0000000
--- a/tests/test_stage_sparse.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import scipy.sparse
-from dolomite_base import stage_object, write_metadata, load_object
-import dolomite_matrix
-from tempfile import mkdtemp
-import filebackedarray
-import numpy
-
-
-def test_stage_scipy_csc():
-    y = scipy.sparse.random(1000, 200, 0.1).tocsc()
-
-    dir = mkdtemp()
-    meta = stage_object(y, dir=dir, path="foobar")
-    write_metadata(meta, dir=dir)
-    assert meta["array"]["type"] == "number"
-
-    roundtrip = load_object(meta, dir)
-    assert roundtrip.shape == y.shape
-    assert roundtrip.dtype == y.dtype
-    assert isinstance(roundtrip, filebackedarray.Hdf5CompressedSparseMatrix)
-    assert (numpy.array(roundtrip) == y.toarray()).all()
-
-
-def test_stage_scipy_csr():
-    y = scipy.sparse.random(1000, 200, 0.1) * 10
-    y = y.astype(numpy.int32)
-    y = y.tocsr()
-
-    dir = mkdtemp()
-    meta = stage_object(y, dir=dir, path="foobar")
-    write_metadata(meta, dir=dir)
-    assert meta["array"]["type"] == "integer"
-
-    roundtrip = load_object(meta, dir)
-    assert roundtrip.shape == y.shape
-    assert numpy.issubdtype(roundtrip.dtype, numpy.integer)
-    assert isinstance(roundtrip, filebackedarray.Hdf5CompressedSparseMatrix)
-    assert (numpy.array(roundtrip) == y.toarray()).all()
-
-
-def test_stage_ndarray_boolean():
-    y = scipy.sparse.random(1000, 200, 0.1) > 0
-    y = y.tocoo()
-
-    dir = mkdtemp()
-    meta = stage_object(y, dir=dir, path="foobar")
-    write_metadata(meta, dir=dir)
-    assert meta["array"]["type"] == "boolean"
-
-    roundtrip = load_object(meta, dir)
-    assert roundtrip.shape == y.shape
-    assert roundtrip.dtype == y.dtype
-    assert isinstance(roundtrip, filebackedarray.Hdf5CompressedSparseMatrix)
-    assert (numpy.array(roundtrip) == y.toarray()).all()
diff --git a/tests/test_write_sparse_matrix.py b/tests/test_write_sparse_matrix.py
deleted file mode 100644
index c010094..0000000
--- a/tests/test_write_sparse_matrix.py
+++ /dev/null
@@ -1,134 +0,0 @@
-import scipy.sparse
-import dolomite_matrix
-from tempfile import mkstemp
-from filebackedarray import Hdf5CompressedSparseMatrix
-from delayedarray import wrap, Combine
-import delayedarray
-import h5py
-import numpy
-
-
-def test_write_sparse_matrix_scipy_csc():
-    out = scipy.sparse.random(1000, 200, 0.1).tocsc()
-    _, fpath = mkstemp(suffix=".h5")
-    dolomite_matrix.write_sparse_matrix(out, fpath, "foo")
-
-    with h5py.File(fpath, "r") as handle:
-        dset = handle["foo/data"]
-        assert dset.dtype == numpy.float64
-        iset = handle["foo/indices"]
-        assert iset.dtype == numpy.uint16
-        assert list(handle["foo/shape"][:]) == [1000, 200]
-
-    reloaded = Hdf5CompressedSparseMatrix(fpath, "foo", shape=(1000, 200), by_column=True)
-    assert reloaded.dtype == numpy.float64
-    assert (numpy.array(reloaded) == out.toarray()).all()
-
-
-def test_write_sparse_matrix_scipy_other():
-    out = scipy.sparse.random(1000, 200, 0.1).tocoo()
-    _, fpath = mkstemp(suffix=".h5")
-    dolomite_matrix.write_sparse_matrix(out, fpath, "foo")
-
-    with h5py.File(fpath, "r") as handle:
-        dset = handle["foo/data"]
-        assert dset.dtype == numpy.float64
-        iset = handle["foo/indices"]
-        assert iset.dtype == numpy.uint16
-        assert list(handle["foo/shape"][:]) == [1000, 200]
-
-    reloaded = Hdf5CompressedSparseMatrix(fpath, "foo", shape=(1000, 200), by_column=True)
-    assert reloaded.dtype == numpy.float64
-    assert (numpy.array(reloaded) == out.toarray()).all()
-
-
-def test_write_sparse_matrix_DelayedArray():
-    raw = scipy.sparse.random(1000, 200, 0.1).tocsc()
-    out = wrap(raw) * 10
-    _, fpath = mkstemp(suffix=".h5")
-    dolomite_matrix.write_sparse_matrix(out, fpath, "foo")
-
-    with h5py.File(fpath, "r") as handle:
-        dset = handle["foo/data"]
-        assert dset.dtype == numpy.float64
-        iset = handle["foo/indices"]
-        assert iset.dtype == numpy.uint16
-        assert list(handle["foo/shape"][:]) == [1000, 200]
-
-    reloaded = Hdf5CompressedSparseMatrix(fpath, "foo", shape=(1000, 200), by_column=True)
-    assert reloaded.dtype == numpy.float64
-    assert (numpy.array(reloaded) == raw.toarray() * 10).all()
-
-    # Trying again with a pristine object.
-    out = wrap(raw)
-    dolomite_matrix.write_sparse_matrix(out, fpath, "foo")
-    reloaded = Hdf5CompressedSparseMatrix(fpath, "foo", shape=(1000, 200), by_column=True)
-    assert reloaded.dtype == numpy.float64
-    assert (numpy.array(reloaded) == raw.toarray()).all()
-
-
-def test_write_sparse_matrix_Combine():
-    raw1 = scipy.sparse.random(1000, 200, 0.1).tocsc()
-    raw2 = scipy.sparse.random(1000, 100, 0.1).tocsc()
-    out = numpy.concatenate([wrap(raw1), wrap(raw2)], axis=1)
-    assert isinstance(out.seed, Combine)
-
-    _, fpath = mkstemp(suffix=".h5")
-    dolomite_matrix.write_sparse_matrix(out, fpath, "foo")
-    reloaded = Hdf5CompressedSparseMatrix(fpath, "foo", shape=(1000, 300), by_column=True)
-    assert reloaded.dtype == numpy.float64
-    assert (numpy.array(reloaded) == numpy.concatenate([raw1.toarray(), raw2.toarray()], axis=1)).all()
-
-    # Now combining it in the other dimension.
-    raw2b = scipy.sparse.random(150, 200, 0.1).tocsc()
-    out = numpy.concatenate([wrap(raw1), wrap(raw2b)], axis=0)
-
-    dolomite_matrix.write_sparse_matrix(out, fpath, "foo")
-    reloaded = Hdf5CompressedSparseMatrix(fpath, "foo", shape=(1150, 200), by_column=True)
-    assert reloaded.dtype == numpy.float64
-    assert (numpy.array(reloaded) == numpy.concatenate([raw1.toarray(), raw2b.toarray()], axis=0)).all()
-
-
-def test_write_sparse_matrix_type_guess():
-    raw = scipy.sparse.random(1000, 200, 0.1)
-    raw.data *= (-1)**(numpy.random.rand(len(raw.data)) > 0.5)
-
-    # Small signed integer.
-    out = numpy.round(raw * 10)
-    out = out.tocsc()
-    _, fpath = mkstemp(suffix=".h5")
-
-    dolomite_matrix.write_sparse_matrix(out, fpath, "foo")
-    reloaded = Hdf5CompressedSparseMatrix(fpath, "foo", shape=(1000, 200), by_column=True)
-    assert reloaded.dtype == numpy.int8
-    assert (numpy.array(reloaded) == out.toarray()).all()
-
-    # Trying a larger integer.
-    out = numpy.round(raw * 10000)
-    out = out.tocsc()
-
-    _, fpath = mkstemp(suffix=".h5")
-    dolomite_matrix.write_sparse_matrix(out, fpath, "foo")
-    reloaded = Hdf5CompressedSparseMatrix(fpath, "foo", shape=(1000, 200), by_column=True)
-    assert reloaded.dtype == numpy.int16
-    assert (numpy.array(reloaded) == out.toarray()).all()
-
-    # Checking the unsigned choices.
-    out = numpy.round(numpy.abs(raw) * 2**15)
-    out = out.tocsc()
-
-    _, fpath = mkstemp(suffix=".h5")
-    dolomite_matrix.write_sparse_matrix(out, fpath, "foo")
-    reloaded = Hdf5CompressedSparseMatrix(fpath, "foo", shape=(1000, 200), by_column=True)
-    assert reloaded.dtype == numpy.uint16
-    assert (numpy.array(reloaded) == out.toarray()).all()
-
-    # Ramping up to 32 bits.
-    out = numpy.round(raw * 2**30)
-    out = out.tocsc()
-
-    _, fpath = mkstemp(suffix=".h5")
-    dolomite_matrix.write_sparse_matrix(out, fpath, "foo")
-    reloaded = Hdf5CompressedSparseMatrix(fpath, "foo", shape=(1000, 200), by_column=True)
-    assert reloaded.dtype == numpy.int32
-    assert (numpy.array(reloaded) == out.toarray()).all()