Skip to content

Commit

Permalink
[110] Delayed initialization of the default store (#113)
Browse files Browse the repository at this point in the history
* merge

* issue

* bug fix

* changes

* changes to doc

Co-authored-by: Tim Hunter <[email protected]>
  • Loading branch information
tjhunter and Tim Hunter authored Feb 24, 2021
1 parent 83878e5 commit b11a265
Show file tree
Hide file tree
Showing 15 changed files with 93 additions and 51 deletions.
61 changes: 40 additions & 21 deletions dds/_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import logging
import pathlib
import time
import tempfile
from collections import OrderedDict
from typing import TypeVar, Tuple, Callable, Dict, Any, Optional, Union, Set, List

Expand Down Expand Up @@ -36,7 +37,7 @@


# TODO: set up in the use temporary space
_store: Store = LocalFileStore("/tmp/dds/internal/", "/tmp/dds/data/")
_store_var: Optional[Store] = None
_eval_ctx: Optional[EvalContext] = None


Expand Down Expand Up @@ -64,11 +65,11 @@ def eval(

def load(path: Union[str, DDSPath, pathlib.Path]) -> Any:
path_ = DDSPathUtils.create(path)
key = _store.fetch_paths([path_]).get(path_)
key = _store().fetch_paths([path_]).get(path_)
if key is None:
raise DDSException(f"The store {_store} did not return path {path_}")
raise DDSException(f"The store {_store()} did not return path {path_}")
else:
return _store.fetch_blob(key)
return _store().fetch_blob(key)


def set_store(
Expand All @@ -84,21 +85,23 @@ def set_store(
store: either a store, or 'local' or 'dbfs'
"""
global _store
global _store_var
if isinstance(store, Store):
if cache_objects is not None:
raise DDSException(
f"Cannot provide a caching option and a store object of type 'Store' at the same time"
)
# Directly setting the store
_store = store
_store_var = store
return
elif store == "local":
if not internal_dir:
internal_dir = "/tmp"
internal_dir = str(
pathlib.Path(tempfile.gettempdir()).joinpath("dds", "store")
)
if not data_dir:
data_dir = "/tmp/data"
_store = LocalFileStore(internal_dir, data_dir)
data_dir = str(pathlib.Path(tempfile.gettempdir()).joinpath("dds", "data"))
_store_var = LocalFileStore(internal_dir, data_dir)
elif store == "dbfs":
if data_dir is None:
raise DDSException("Missing data_dir argument")
Expand All @@ -115,7 +118,7 @@ def set_store(
commit_type = str(commit_type or CommitType.FULL.name).upper()
commit_type_ = CommitType[commit_type]

_store = DBFSStore(
_store_var = DBFSStore(
DBFSURI.parse(internal_dir), DBFSURI.parse(data_dir), dbutils, commit_type_
)
else:
Expand All @@ -136,8 +139,8 @@ def set_store(
elif cache_objects > 0:
num_objects = cache_objects
if num_objects is not None:
_store = LRUCacheStore(_store, num_elem=num_objects)
_logger.debug(f"Setting the store to {_store}")
_store_var = LRUCacheStore(_store(), num_elem=num_objects)
_logger.debug(f"Setting the store to {_store()}")


def _parse_stages(
Expand Down Expand Up @@ -196,9 +199,9 @@ def _eval(
)
key = None if path is None else _eval_ctx.requested_paths[path]
t = _time()
if key is not None and _store.has_blob(key):
if key is not None and _store().has_blob(key):
_logger.debug(f"_eval:Return cached {path} from {key}")
blob = _store.fetch_blob(key)
blob = _store().fetch_blob(key)
_add_delta(t, ProcessingStage.STORE_COMMIT)
return blob
else:
Expand All @@ -217,11 +220,27 @@ def _eval(
if key is not None:
_logger.info(f"_eval:Storing blob into key {key}")
t = _time()
_store.store_blob(key, res, codec=None)
_store().store_blob(key, res, codec=None)
_add_delta(t, ProcessingStage.STORE_COMMIT)
return res


def _store() -> Store:
"""
Gets the current store (or initializes it to the local default store if necessary)
"""
global _store_var
if _store_var is None:
p = pathlib.Path(tempfile.gettempdir()).joinpath("dds")
store_path = p.joinpath("store")
data_path = p.joinpath("data")
_logger.info(
f"Initializing default store. store dir: {store_path} data dir: {data_path}"
)
_store_var = LocalFileStore(str(store_path), str(data_path))
return _store_var


def _time() -> float:
return time.monotonic()

Expand Down Expand Up @@ -272,7 +291,7 @@ def _eval_new_ctx(
_logger.debug(
f"_eval_new_ctx: need to resolve indirect references: {loads_to_check}"
)
resolved_indirect_refs = _store.fetch_paths(loads_to_check)
resolved_indirect_refs = _store().fetch_paths(loads_to_check)
_logger.debug(
f"_eval_new_ctx: fetched indirect references: {resolved_indirect_refs}"
)
Expand All @@ -296,7 +315,7 @@ def _eval_new_ctx(
present_blobs: Optional[Set[PyHash]]
if extra_debug:
present_blobs = set(
[key for key in set(store_paths.values()) if _store.has_blob(key)]
[key for key in set(store_paths.values()) if _store().has_blob(key)]
)
_logger.debug(f"_eval_new_ctx: {len(present_blobs)} present blobs")
else:
Expand Down Expand Up @@ -327,9 +346,9 @@ def _eval_new_ctx(
current_sig = inters.fun_return_sig
_logger.debug(f"_eval_new_ctx:current_sig: {current_sig}")
t = _time()
if _store.has_blob(current_sig):
if _store().has_blob(current_sig):
_logger.debug(f"_eval_new_ctx:Return cached signature {current_sig}")
res = _store.fetch_blob(current_sig)
res = _store().fetch_blob(current_sig)
_add_delta(t, ProcessingStage.STORE_COMMIT)
else:
arg_repr = [str(type(arg)) for arg in args]
Expand All @@ -349,13 +368,13 @@ def _eval_new_ctx(
# TODO: add a phase for storing the blobs
_logger.info(f"_eval:Storing blob into key {obj_key}")
t = _time()
_store.store_blob(obj_key, res, codec=None)
_store().store_blob(obj_key, res, codec=None)
_add_delta(t, ProcessingStage.STORE_COMMIT)

if ProcessingStage.PATH_COMMIT in stages:
_logger.debug(f"Starting stage {ProcessingStage.PATH_COMMIT}")
t = _time()
_store.sync_paths(store_paths)
_store().sync_paths(store_paths)
_add_delta(t, ProcessingStage.PATH_COMMIT)
_logger.debug(f"Stage {ProcessingStage.PATH_COMMIT} done")
else:
Expand Down
2 changes: 1 addition & 1 deletion dds/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version = "0.7.1"
version = "0.7.2"
3 changes: 3 additions & 0 deletions dds/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ def __init__(self, internal_dir: str, data_dir: str, create_dirs: bool = True):
if not os.path.exists(p_blobs):
os.makedirs(p_blobs)

def __repr__(self):
return f"LocalFileStore(internal_dir={self._root} data_dir={self._data_root})"

def fetch_blob(self, key: PyHash) -> Any:
p = os.path.join(self._root, "blobs", key)
meta_p = os.path.join(self._root, "blobs", key + ".meta")
Expand Down
8 changes: 8 additions & 0 deletions doc_source/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Changelog

## v0.7.2

Small usability fixes in this release:

* delaying the creation of a default store (and all its side effects) to better support highly concurrent environments
* fix to the type signature of `dds.keep` and `dds.eval`
* improves debugging messages (with a potential extra round trip to the store)

## v0.7.0

Adds a major feature: caching in memory of most recently used objects. See the documentation of
Expand Down
2 changes: 1 addition & 1 deletion doc_source/tut_custom_types.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@
"metadata": {},
"outputs": [],
"source": [
"dds._api._store.codec_registry().add_file_codec(PilFileCodec())"
"dds._api._store().codec_registry().add_file_codec(PilFileCodec())"
]
},
{
Expand Down
8 changes: 8 additions & 0 deletions docs/changelog/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Changelog

## v0.7.2

Small usability fixes in this release:

* delaying the creation of a default store (and all its side effects) to better support highly concurrent environments
* fix to the type signature of `dds.keep` and `dds.eval`
* improves debugging messages (with a potential extra round trip to the store)

## v0.7.0

Adds a major feature: caching in memory of most recently used objects. See the documentation of
Expand Down
9 changes: 9 additions & 0 deletions docs/changelog/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@
<ul class="current">
<li class="toctree-l1 current"><a class="reference internal current" href="./">Changelog</a>
<ul class="current">
<li class="toctree-l2"><a class="reference internal" href="#v072">v0.7.2</a>
</li>
<li class="toctree-l2"><a class="reference internal" href="#v070">v0.7.0</a>
</li>
<li class="toctree-l2"><a class="reference internal" href="#v060">v0.6.0</a>
Expand Down Expand Up @@ -127,6 +129,13 @@
<div class="section">

<h1 id="changelog">Changelog</h1>
<h2 id="v072">v0.7.2</h2>
<p>Small usability fixes in this release:</p>
<ul>
<li>delaying the creation of a default store (and all its side effects) to better support highly concurrent environments</li>
<li>fix to the type signature of <code>dds.keep</code> and <code>dds.eval</code></li>
<li>improves debugging messages (with a potential extra round trip to the store)</li>
</ul>
<h2 id="v070">v0.7.0</h2>
<p>Adds a major feature: caching in memory of most recently used objects. See the documentation of
<code>dds.set_store</code>.</p>
Expand Down
14 changes: 7 additions & 7 deletions docs/dds-reference/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ <h2 class="doc doc-heading" id="dds.__init__.eval">
</tr>
<tr>
<td><code>args</code></td>
<td><code>Tuple[Any, ...]</code></td>
<td><code>Any</code></td>
<td>
<p>the optional arguments for this function.</p>
<p>NOTE: keyworded arguments are not supported yet.</p>
Expand Down Expand Up @@ -346,11 +346,11 @@ <h2 class="doc doc-heading" id="dds.__init__.eval">
<div class="highlight">
<pre><span></span><code><span class="k">def</span> <span class="nf">eval</span><span class="p">(</span>
<span class="n">fun</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="n">_Out</span><span class="p">],</span>
<span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Tuple</span><span class="p">[</span><span class="n">Any</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span>
<span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span>
<span class="n">dds_export_graph</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">pathlib</span><span class="o">.</span><span class="n">Path</span><span class="p">,</span> <span class="kc">None</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">dds_extra_debug</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">dds_stages</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">ProcessingStage</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span>
<span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="n">_Out</span><span class="p">]:</span>
<span class="sd">"""</span>
<span class="sd"> Evaluates a function. The result of the function is not stored in the data store, but the function itself may</span>
Expand Down Expand Up @@ -450,15 +450,15 @@ <h2 class="doc doc-heading" id="dds.__init__.keep">
</tr>
<tr>
<td><code>args</code></td>
<td><code>Tuple[Any, ...]</code></td>
<td><code>Any</code></td>
<td>
<p>the arguments of this function</p>
</td>
<td><code>()</code></td>
</tr>
<tr>
<td><code>kwargs</code></td>
<td><code>Dict[str, Any]</code></td>
<td><code>Any</code></td>
<td>
<p><em>(keyworded arguments are currently unsupported)</em></p>
</td>
Expand Down Expand Up @@ -518,8 +518,8 @@ <h3 id="using-complex-arguments">Using complex arguments</h3>
<pre><span></span><code><span class="k">def</span> <span class="nf">keep</span><span class="p">(</span>
<span class="n">path</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">DDSPath</span><span class="p">,</span> <span class="n">pathlib</span><span class="o">.</span><span class="n">Path</span><span class="p">],</span>
<span class="n">fun</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="n">_Out</span><span class="p">],</span>
<span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Tuple</span><span class="p">[</span><span class="n">Any</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span>
<span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span>
<span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span>
<span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">_Out</span><span class="p">:</span>
<span class="sd">"""</span>
<span class="sd"> Stores the result of calling a function to a specific path. If this particular evaluation has not happened before,</span>
Expand Down
2 changes: 1 addition & 1 deletion docs/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -240,5 +240,5 @@ <h2 id="license">License</h2>

<!--
MkDocs version : 1.1.2
Build Date UTC : 2021-02-07 14:27:00.236152+00:00
Build Date UTC : 2021-02-24 18:37:47.655064+00:00
-->
2 changes: 1 addition & 1 deletion docs/search/search_index.json

Large diffs are not rendered by default.

20 changes: 10 additions & 10 deletions docs/sitemap.xml
Original file line number Diff line number Diff line change
@@ -1,43 +1,43 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"><url>
<loc>None</loc>
<lastmod>2021-02-07</lastmod>
<lastmod>2021-02-24</lastmod>
<changefreq>daily</changefreq>
</url><url>
<loc>None</loc>
<lastmod>2021-02-07</lastmod>
<lastmod>2021-02-24</lastmod>
<changefreq>daily</changefreq>
</url><url>
<loc>None</loc>
<lastmod>2021-02-07</lastmod>
<lastmod>2021-02-24</lastmod>
<changefreq>daily</changefreq>
</url><url>
<loc>None</loc>
<lastmod>2021-02-07</lastmod>
<lastmod>2021-02-24</lastmod>
<changefreq>daily</changefreq>
</url><url>
<loc>None</loc>
<lastmod>2021-02-07</lastmod>
<lastmod>2021-02-24</lastmod>
<changefreq>daily</changefreq>
</url><url>
<loc>None</loc>
<lastmod>2021-02-07</lastmod>
<lastmod>2021-02-24</lastmod>
<changefreq>daily</changefreq>
</url><url>
<loc>None</loc>
<lastmod>2021-02-07</lastmod>
<lastmod>2021-02-24</lastmod>
<changefreq>daily</changefreq>
</url><url>
<loc>None</loc>
<lastmod>2021-02-07</lastmod>
<lastmod>2021-02-24</lastmod>
<changefreq>daily</changefreq>
</url><url>
<loc>None</loc>
<lastmod>2021-02-07</lastmod>
<lastmod>2021-02-24</lastmod>
<changefreq>daily</changefreq>
</url><url>
<loc>None</loc>
<lastmod>2021-02-07</lastmod>
<lastmod>2021-02-24</lastmod>
<changefreq>daily</changefreq>
</url>
</urlset>
Binary file modified docs/sitemap.xml.gz
Binary file not shown.
2 changes: 1 addition & 1 deletion docs/tut_custom_types/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -14658,7 +14658,7 @@ <h1 id="tutorial-custom-serialization-protocols">Tutorial - Custom serialization
<div class="jp-InputPrompt jp-InputArea-prompt">In [8]:</div>
<div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
<div class="CodeMirror cm-s-jupyter">
<div class="highlight highlight-ipynb hl-python"><pre><span></span><span class="n">dds</span><span class="o">.</span><span class="n">_api</span><span class="o">.</span><span class="n">_store</span><span class="o">.</span><span class="n">codec_registry</span><span class="p">()</span><span class="o">.</span><span class="n">add_file_codec</span><span class="p">(</span><span class="n">PilFileCodec</span><span class="p">())</span>
<div class="highlight highlight-ipynb hl-python"><pre><span></span><span class="n">dds</span><span class="o">.</span><span class="n">_api</span><span class="o">.</span><span class="n">_store</span><span class="p">()</span><span class="o">.</span><span class="n">codec_registry</span><span class="p">()</span><span class="o">.</span><span class="n">add_file_codec</span><span class="p">(</span><span class="n">PilFileCodec</span><span class="p">())</span>
</pre></div>
</div>
</div>
Expand Down
2 changes: 1 addition & 1 deletion docs/tut_custom_types/tut_custom_types.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@
"metadata": {},
"outputs": [],
"source": [
"dds._api._store.codec_registry().add_file_codec(PilFileCodec())"
"dds._api._store().codec_registry().add_file_codec(PilFileCodec())"
]
},
{
Expand Down
9 changes: 2 additions & 7 deletions docs/user_guide/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -14586,7 +14586,7 @@ <h2 id="user-guide">User guide<a class="anchor-link" href="#User-guide">¶</a></
<div class="jp-OutputArea-child">
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain">
<pre>/tmp/dds/internal/blobs/26f46034012ffdebb21af34aea2e6f0775a521122f457f23bf34d4b97facfb3b
<pre>/tmp/dds/store/blobs/26f46034012ffdebb21af34aea2e6f0775a521122f457f23bf34d4b97facfb3b
</pre>
</div>
</div>
Expand Down Expand Up @@ -15110,12 +15110,7 @@ <h2 id="conclusion">Conclusion<a class="anchor-link" href="#Conclusion">¶</a></
<div class="jp-OutputArea-child">
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain">
<pre>1c5018ef452f3aafead20de4d9e1ad5e6920453025813a266fde975387d0b5f5
22deab6baa11ebb1f379519a1c00a0bd9a8e6a93e278b8ae319c2bd95c4fd3dc
26f46034012ffdebb21af34aea2e6f0775a521122f457f23bf34d4b97facfb3b
d3b7b9510de7aaf1f8ec72bff9bfc6f4af363b07731346c5ccf8378ba83a80e5
de7ee19728e267fc76a0c22b4aaa5e28c6d9b7388038de9d422fb257609bb671
f2802c71b37ba3eeefaf0a6c6f6fe4cec847cbba8f67e7de8bd2580a27cbb5c
<pre>ls: kan geen toegang krijgen tot '/tmp/dds/internal/blobs': Bestand of map bestaat niet
</pre>
</div>
</div>
Expand Down

0 comments on commit b11a265

Please sign in to comment.