diff --git a/CHANGES.rst b/CHANGES.rst index 8f26676a0..ba5ba27d8 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,6 +5,15 @@ 5.7.1 (unreleased) ================== +- Introduce a new ``loadBeforeEx`` interface that complements ``loadBefore``: + ``loadBeforeEx`` is simpler, provides better information for object delete + records and can be more efficiently implemented by many storages. + ``loadBeforeEx`` is used (and required) to fix a ``DemoStorage`` data corruption + in the presence of object delete records. + See `issue 318 `_ + and `PR 323 `_ + for details. + 5.7.0 (2022-03-17) ================== diff --git a/src/ZODB/BaseStorage.py b/src/ZODB/BaseStorage.py index f7465f80f..bf4ad55ab 100644 --- a/src/ZODB/BaseStorage.py +++ b/src/ZODB/BaseStorage.py @@ -60,6 +60,7 @@ class BaseStorage(UndoLogCompatible): If it stores multiple revisions, it should implement loadSerial() loadBefore() + loadBeforeEx() Each storage will have two locks that are accessed via lock acquire and release methods bound to the instance. (Yuck.) @@ -269,7 +270,13 @@ def loadSerial(self, oid, serial): def loadBefore(self, oid, tid): """Return most recent revision of oid before tid committed.""" - return None + raise NotImplementedError + + def loadBeforeEx(self, oid, tid): + """Return most recent revision of oid before tid committed. + (see IStorageLoadBeforeEx). + """ + raise NotImplementedError def copyTransactionsFrom(self, other, verbose=0): """Copy transactions from another storage. diff --git a/src/ZODB/DB.py b/src/ZODB/DB.py index 73cb96f82..b7f60e9e5 100644 --- a/src/ZODB/DB.py +++ b/src/ZODB/DB.py @@ -732,7 +732,7 @@ def open(self, transaction_manager=None, at=None, before=None): - `before`: like `at`, but opens the readonly state before the tid or datetime. """ - # `at` is normalized to `before`, since we use storage.loadBefore + # `at` is normalized to `before`, since we use storage.loadBeforeEx # as the underlying implementation of both. before = getTID(at, before) if (before is not None and diff --git a/src/ZODB/DemoStorage.py b/src/ZODB/DemoStorage.py index bc891a061..56af9b39c 100644 --- a/src/ZODB/DemoStorage.py +++ b/src/ZODB/DemoStorage.py @@ -217,45 +217,44 @@ def __len__(self): # still want load for old clients (e.g. zeo servers) load = load_current - def loadBefore(self, oid, tid): - try: - result = self.changes.loadBefore(oid, tid) - except ZODB.POSException.POSKeyError: - # The oid isn't in the changes, so defer to base - return self.base.loadBefore(oid, tid) - - if result is None: - # The oid *was* in the changes, but there aren't any - # earlier records. Maybe there are in the base. - try: - result = self.base.loadBefore(oid, tid) - except ZODB.POSException.POSKeyError: - # The oid isn't in the base, so None will be the right result - pass + def loadBeforeEx(self, oid, before): + data, serial = ZODB.utils.loadBeforeEx(self.changes, oid, before) + if (data is not None) or (serial != ZODB.utils.z64): + # object is present in changes either as data or deletion record. + return data, serial + + # object is not present in changes at all - use base + return ZODB.utils.loadBeforeEx(self.base, oid, before) + + def loadBefore(self, oid, before): + data, serial = self.loadBeforeEx(oid, before) + + # find out next_serial. + # it is ok to use dumb/slow implementation since loadBefore should not + # be used and is provided only for backward compatibility. + next_serial = maxtid + while 1: + _, s = self.loadBeforeEx(oid, next_serial) + assert s >= serial + if s == serial: + # found - next_serial is serial of the next data record + break + next_serial = s + + if next_serial == maxtid: + next_serial = None + + # next_serial found -> return/raise what loadBefore users expect + if data is None: + if next_serial is None: + # object was never created + raise ZODB.POSException.POSKeyError(oid) else: - if result and not result[-1]: - # The oid is current in the base. We need to find - # the end tid in the base by fining the first tid - # in the changes. Unfortunately, there isn't an - # api for this, so we have to walk back using - # loadBefore. - - if tid == maxtid: - # Special case: we were looking for the - # current value. We won't find anything in - # changes, so we're done. - return result - - end_tid = maxtid - t = self.changes.loadBefore(oid, end_tid) - while t: - end_tid = t[1] - t = self.changes.loadBefore(oid, end_tid) - result = result[:2] + ( - end_tid if end_tid != maxtid else None, - ) - - return result + # object was deleted + return None + + # regular data record + return data, serial, next_serial def loadBlob(self, oid, serial): try: diff --git a/src/ZODB/FileStorage/FileStorage.py b/src/ZODB/FileStorage/FileStorage.py index 2e291f63d..4a45798fd 100644 --- a/src/ZODB/FileStorage/FileStorage.py +++ b/src/ZODB/FileStorage/FileStorage.py @@ -57,6 +57,7 @@ from ZODB.interfaces import IStorageIteration from ZODB.interfaces import IStorageRestoreable from ZODB.interfaces import IStorageUndoable +from ZODB.interfaces import IStorageLoadBeforeEx from ZODB.POSException import ConflictError from ZODB.POSException import MultipleUndoErrors from ZODB.POSException import POSKeyError @@ -144,6 +145,7 @@ def __init__(self, afile): IStorageCurrentRecordIteration, IExternalGC, IStorage, + IStorageLoadBeforeEx, ) class FileStorage( FileStorageFormatter, @@ -572,6 +574,37 @@ def loadSerial(self, oid, serial): else: return self._loadBack_impl(oid, h.back)[0] + def loadBeforeEx(self, oid, before): + """loadBeforeEx implements IStorageLoadBeforeEx.""" + with self._files.get() as _file: + try: + pos = self._lookup_pos(oid) + except POSKeyError: + # object does not exist + return None, z64 + + while 1: + h = self._read_data_header(pos, oid, _file) + if h.tid < before: + break + pos = h.prev + if not pos: + # object not yet created as of (data, serial) + """Load object data as observed before given database state. + + loadBeforeEx returns data for object with given object ID as observed + by most recent database transaction with ID < before. Two values are + returned: + + - The data record, + - The transaction ID of the data record. + + If the object does not exist, or is deleted as of requested database + state, loadBeforeEx returns data=None, and serial indicates transaction + ID of the most recent deletion done in transaction with ID < before, or + null tid if there is no such deletion. + + Note: no POSKeyError is raised even if object id is not in the storage. + """ + + class IMultiCommitStorage(IStorage): """A multi-commit storage can commit multiple transactions at once. diff --git a/src/ZODB/mvccadapter.py b/src/ZODB/mvccadapter.py index 052342252..05dee5c9e 100644 --- a/src/ZODB/mvccadapter.py +++ b/src/ZODB/mvccadapter.py @@ -10,7 +10,8 @@ import zope.interface from . import interfaces, serialize, POSException -from .utils import p64, u64, Lock, oid_repr, tid_repr +from .utils import p64, u64, z64, maxtid, Lock, loadBeforeEx, oid_repr, \ + tid_repr class Base(object): @@ -155,8 +156,20 @@ def poll_invalidations(self): def load(self, oid): assert self._start is not None - r = self._storage.loadBefore(oid, self._start) - if r is None: + data, serial = loadBeforeEx(self._storage, oid, self._start) + if data is None: + # raise POSKeyError if object does not exist at all + # TODO raise POSKeyError always and switch to raising ReadOnlyError + # only when actually detecting that load is being affected by + # simultaneous pack (see below). + if serial == z64: + # XXX second call to loadBeforeEx - it will become unneeded + # once we switch to raising POSKeyError. + _, serial_exists = loadBeforeEx(self._storage, oid, maxtid) + if serial_exists == z64: + # object does not exist at all + raise POSException.POSKeyError(oid) + # object was deleted or not-yet-created. # raise ReadConflictError - not - POSKeyError due to backward # compatibility: a pack(t+δ) could be running simultaneously to our @@ -178,11 +191,14 @@ def load(self, oid): # whether pack is/was actually running and its details, take that # into account, and raise ReadConflictError only in the presence of # database being simultaneously updated from back of its log. + # + # See https://github.com/zopefoundation/ZODB/pull/322 for + # preliminary steps in this direction. raise POSException.ReadConflictError( "load %s @%s: object deleted, likely by simultaneous pack" % (oid_repr(oid), tid_repr(p64(u64(self._start) - 1)))) - return r[:2] + return data, serial def prefetch(self, oids): try: @@ -263,10 +279,10 @@ def poll_invalidations(self): new_oid = pack = store = read_only_writer def load(self, oid, version=''): - r = self._storage.loadBefore(oid, self._before) - if r is None: + data, serial = loadBeforeEx(self._storage, oid, self._before) + if data is None: raise POSException.POSKeyError(oid) - return r[:2] + return data, serial class UndoAdapterInstance(Base): diff --git a/src/ZODB/tests/IExternalGC.test b/src/ZODB/tests/IExternalGC.test index 52983d36f..050fd182c 100644 --- a/src/ZODB/tests/IExternalGC.test +++ b/src/ZODB/tests/IExternalGC.test @@ -66,9 +66,10 @@ Now if we try to load data for the objects, we get a POSKeyError: We can still get the data if we load before the time we deleted. - >>> storage.loadBefore(oid0, conn.root()._p_serial) == (p0, s0, tid) + >>> from ZODB.utils import loadBeforeEx, z64 + >>> loadBeforeEx(storage, oid0, conn.root()._p_serial) == (p0, s0) True - >>> storage.loadBefore(oid1, conn.root()._p_serial) == (p1, s1, tid) + >>> loadBeforeEx(storage, oid1, conn.root()._p_serial) == (p1, s1) True >>> with open(storage.loadBlob(oid1, s1)) as fp: fp.read() 'some data' @@ -92,15 +93,11 @@ gone: ... POSKeyError: ... - >>> storage.loadBefore(oid0, conn.root()._p_serial) # doctest: +ELLIPSIS - Traceback (most recent call last): - ... - POSKeyError: ... + >>> loadBeforeEx(storage, oid0, conn.root()._p_serial) == (None, z64) + True - >>> storage.loadBefore(oid1, conn.root()._p_serial) # doctest: +ELLIPSIS - Traceback (most recent call last): - ... - POSKeyError: ... + >>> loadBeforeEx(storage, oid1, conn.root()._p_serial) == (None, z64) + True >>> storage.loadBlob(oid1, s1) # doctest: +ELLIPSIS Traceback (most recent call last): diff --git a/src/ZODB/tests/MVCCMappingStorage.py b/src/ZODB/tests/MVCCMappingStorage.py index 2937e214e..83c0bfa12 100644 --- a/src/ZODB/tests/MVCCMappingStorage.py +++ b/src/ZODB/tests/MVCCMappingStorage.py @@ -46,6 +46,7 @@ def new_instance(self): inst.new_oid = self.new_oid inst.pack = self.pack inst.loadBefore = self.loadBefore + inst.loadBeforeEx = self.loadBeforeEx inst._ltid = self._ltid inst._main_lock = self._lock return inst diff --git a/src/ZODB/tests/hexstorage.py b/src/ZODB/tests/hexstorage.py index 8218ce829..96c64c181 100644 --- a/src/ZODB/tests/hexstorage.py +++ b/src/ZODB/tests/hexstorage.py @@ -39,6 +39,14 @@ def __init__(self, base): setattr(self, name, v) zope.interface.directlyProvides(self, zope.interface.providedBy(base)) + if hasattr(base, 'loadBeforeEx') and \ + 'loadBeforeEx' not in self.copied_methods: + def loadBeforeEx(oid, before): + data, serial = self.base.loadBeforeEx(oid, before) + if data is not None: + data = unhexlify(data[2:]) + return data, serial + self.loadBeforeEx = loadBeforeEx def __getattr__(self, name): return getattr(self.base, name) @@ -131,7 +139,7 @@ class ServerHexStorage(HexStorage): """ copied_methods = HexStorage.copied_methods + ( - 'load', 'loadBefore', 'loadSerial', 'store', 'restore', + 'load', 'loadBeforeEx', 'loadBefore', 'loadSerial', 'store', 'restore', 'iterator', 'storeBlob', 'restoreBlob', 'record_iternext', ) diff --git a/src/ZODB/tests/testConnection.py b/src/ZODB/tests/testConnection.py index 60ce7ffb9..d8217e903 100644 --- a/src/ZODB/tests/testConnection.py +++ b/src/ZODB/tests/testConnection.py @@ -1332,6 +1332,13 @@ def load(self, oid, version=''): raise TypeError('StubStorage does not support versions.') return self._data[oid] + def loadBeforeEx(self, oid, before): + try: + data, serial = self._transdata[oid] + except KeyError: + return None, z64 + return data, serial + def loadBefore(self, oid, tid): return self._data[oid] + (None, ) diff --git a/src/ZODB/tests/testDemoStorage.py b/src/ZODB/tests/testDemoStorage.py index 8c0c4eba3..b807625c6 100644 --- a/src/ZODB/tests/testDemoStorage.py +++ b/src/ZODB/tests/testDemoStorage.py @@ -23,6 +23,7 @@ StorageTestBase, Synchronization, ) +from ZODB.tests.MinPO import MinPO import os if os.environ.get('USE_ZOPE_TESTING_DOCTEST'): @@ -32,7 +33,9 @@ import random import transaction import unittest +import ZODB.Connection import ZODB.DemoStorage +import ZODB.FileStorage import ZODB.tests.hexstorage import ZODB.tests.util import ZODB.utils @@ -267,6 +270,105 @@ def load_before_base_storage_current(): >>> base.close() """ +# additional DemoStorage tests that do not fit into common DemoStorageTests +# setup. + + +class DemoStorageTests2(ZODB.tests.util.TestCase): + def checkLoadAfterDelete(self): + """Verify that DemoStorage correctly handles load requests for objects + deleted in read-write part of the storage. + + https://github.com/zopefoundation/ZODB/issues/318 + """ + FileStorage = ZODB.FileStorage.FileStorage + DemoStorage = ZODB.DemoStorage.DemoStorage + TransactionMetaData = ZODB.Connection.TransactionMetaData + + # mkbase prepares base part of the storage. + def mkbase(): # -> zbase + zbase = FileStorage("base.fs") + db = DB(zbase) + conn = db.open() + root = conn.root() + + root['obj'] = obj = MinPO(0) + transaction.commit() + + obj.value += 1 + transaction.commit() + + conn.close() + db.close() + zbase.close() + + zbase = FileStorage("base.fs", read_only=True) + return zbase + + # prepare base + overlay + zbase = mkbase() + zoverlay = FileStorage("overlay.fs") + zdemo = DemoStorage(base=zbase, changes=zoverlay) + + # overlay: modify obj and root + db = DB(zdemo) + conn = db.open() + root = conn.root() + obj = root['obj'] + oid = obj._p_oid + obj.value += 1 + # modify root as well so that there is root revision saved in overlay + # that points to obj + root['x'] = 1 + transaction.commit() + atLive = obj._p_serial + + # overlay: delete obj from root making it a garbage + del root['obj'] + transaction.commit() + atUnlink = root._p_serial + + # unmount DemoStorage + conn.close() + db.close() + zdemo.close() # closes zbase and zoverlay as well + del zbase, zoverlay + + # simulate GC on base+overlay + zoverlay = FileStorage("overlay.fs") + txn = transaction.get() + txn_meta = TransactionMetaData(txn.user, txn.description, + txn.extension) + zoverlay.tpc_begin(txn_meta) + zoverlay.deleteObject(oid, atLive, txn_meta) + zoverlay.tpc_vote(txn_meta) + atGC = zoverlay.tpc_finish(txn_meta) + + # remount base+overlay + zbase = FileStorage("base.fs", read_only=True) + zdemo = ZODB.DemoStorage.DemoStorage(base=zbase, changes=zoverlay) + db = DB(zdemo) + + # verify: + # load(obj, atLive) -> 2 + # load(obj, atUnlink) -> 2 (garbage, but still in DB) + # load(obj, atGC) -> POSKeyError, not 1 from base + def getObjAt(at): + conn = db.open(at=at) + obj = conn.get(oid) + self.assertIsInstance(obj, MinPO) + v = obj.value + conn.close() + return v + + self.assertEqual(getObjAt(atLive), 2) + self.assertEqual(getObjAt(atUnlink), 2) + self.assertRaises(ZODB.POSException.POSKeyError, getObjAt, atGC) + + # end + db.close() + zdemo.close() # closes zbase and zoverlay as well + def test_suite(): suite = unittest.TestSuite(( @@ -289,4 +391,5 @@ def test_suite(): 'check')) suite.addTest(unittest.makeSuite(DemoStorageWrappedAroundHexMappingStorage, 'check')) + suite.addTest(unittest.makeSuite(DemoStorageTests2, 'check')) return suite diff --git a/src/ZODB/tests/test_storage.py b/src/ZODB/tests/test_storage.py index ea515eca3..53757439f 100644 --- a/src/ZODB/tests/test_storage.py +++ b/src/ZODB/tests/test_storage.py @@ -128,6 +128,17 @@ def loadBefore(self, the_oid, the_tid): return self._index[(the_oid, tid)], tid, end_tid + def loadBeforeEx(self, oid, before): + try: + r = self.loadBefore(oid, before) + except KeyError: + return None, z64 + if r is None: + # not-yet created (deleteObject not supported -> serial=0) + return None, z64 + data, serial, _ = r + return data, serial + def loadSerial(self, oid, serial): return self._index[(oid, serial)] diff --git a/src/ZODB/tests/testmvcc.py b/src/ZODB/tests/testmvcc.py index 12506661d..23c5a09ba 100644 --- a/src/ZODB/tests/testmvcc.py +++ b/src/ZODB/tests/testmvcc.py @@ -35,7 +35,7 @@ ***IMPORTANT***: The MVCC approach has changed since these tests were originally written. The new approach is much simpler because we no longer call load to get the current state of an object. We call -loadBefore instead, having gotten a transaction time at the start of a +loadBeforeEx instead, having gotten a transaction time at the start of a transaction. As a result, the rhythm of the tests is a little odd, because we no longer need to probe a complex dance that doesn't exist any more. @@ -290,7 +290,7 @@ Now deactivate "b" in the first connection, and (re)fetch it. The first connection should still see 1, due to MVCC, but to get this old state -TmpStore needs to handle the loadBefore() method. +TmpStore needs to handle the loadBeforeEx() or loadBefore() methods. >>> r1["b"]._p_deactivate() @@ -322,7 +322,7 @@ Rather than add all the complexity of ZEO to these tests, the MinimalMemoryStorage has a hook. We'll write a subclass that will -deliver an invalidation when it loads (or loadBefore's) an object. +deliver an invalidation when it loads (or loadBeforeEx's) an object. The hook allows us to test the Connection code. >>> class TestStorage(MinimalMemoryStorage): diff --git a/src/ZODB/utils.py b/src/ZODB/utils.py index 062c4b737..32612c875 100644 --- a/src/ZODB/utils.py +++ b/src/ZODB/utils.py @@ -17,6 +17,7 @@ import sys import time import threading +import warnings from binascii import hexlify, unhexlify from tempfile import mkstemp @@ -398,8 +399,57 @@ def load_current(storage, oid, version=''): some time in the future. """ assert not version - r = storage.loadBefore(oid, maxtid) - if r is None: + data, serial = loadBeforeEx(storage, oid, maxtid) + if data is None: raise ZODB.POSException.POSKeyError(oid) - assert r[2] is None - return r[:2] + return data, serial + + +_loadBeforeExWarned = set() # of storage class + + +def loadBeforeEx(storage, oid, before): + """loadBeforeEx provides IStorageLoadBeforeEx semantic for all storages. + + Storages that do not implement loadBeforeEx are served via loadBefore. + """ + loadBeforeEx = getattr(storage, 'loadBeforeEx', None) + if loadBeforeEx is not None: + try: + return loadBeforeEx(oid, before) + except NotImplementedError: + pass + + # storage does not provide IStorageLoadBeforeEx - fall back to loadBefore + try: + r = storage.loadBefore(oid, before) + except ZODB.POSException.POSKeyError: + return (None, z64) # object does not exist at all + + if r is None: + # object was removed; however loadBefore does not tell when. + # return serial=0. This can, however, lead to data corruption with e.g. + # DemoStorage (https://github.com/zopefoundation/ZODB/issues/318), so + # emit corresponding warning. + if type(storage) not in _loadBeforeExWarned: + # there is potential race around _loadBeforeExWarned access, but + # due to the GIL this race cannot result in that set corruption, + # and can only lead to us emitting the warning twice instead of + # just once. -> do not spend CPU on lock and just ignore it. + warnings.warn( + "FIXME %s does not provide loadBeforeEx - emulating it via " + "loadBefore, but ...\n" + "\t... 1) access is be potentially slower, and\n" + "\t... 2) not full semantic of loadBeforeEx could be " + "provided.\n" + "\t... this can lead to data corruption in the presence " + "of delete records.\n" + "\t... -> please see " + "https://github.com/zopefoundation/ZODB/issues/318 for " + "details." % + type(storage), PendingDeprecationWarning) + _loadBeforeExWarned.add(type(storage)) + return (None, z64) + + data, serial, next_serial = r + return (data, serial)