diff --git a/dirsrvtests/tests/suites/backups/backup_test.py b/dirsrvtests/tests/suites/backups/backup_test.py index 22885b43f..1246469a3 100644 --- a/dirsrvtests/tests/suites/backups/backup_test.py +++ b/dirsrvtests/tests/suites/backups/backup_test.py @@ -17,16 +17,20 @@ from datetime import datetime from lib389._constants import DN_DM, PASSWORD, DEFAULT_SUFFIX, INSTALL_LATEST_CONFIG from lib389.properties import BACKEND_SAMPLE_ENTRIES, TASK_WAIT -from lib389.topologies import topology_st as topo, topology_m2 as topo_m2 +from lib389.topologies import topology_st as topo, topology_m2 as topo_m2, set_timeout from lib389.backend import Backends, Backend from lib389.dbgen import dbgen_users from lib389.tasks import BackupTask, RestoreTask from lib389.config import BDB_LDBMConfig +from lib389.idm.nscontainer import nsContainers from lib389 import DSEldif from lib389.utils import ds_is_older, get_default_db_lib from lib389.replica import ReplicationManager +from threading import Thread, Event import tempfile + + pytestmark = pytest.mark.tier1 DEBUGGING = os.getenv("DEBUGGING", default=False) @@ -36,6 +40,11 @@ logging.getLogger(__name__).setLevel(logging.INFO) log = logging.getLogger(__name__) +# test_online_backup_and_dse_write may hang if 6372 is not fixed +# so lets use a shorter timeout +set_timeout(30*60) + +event = Event() BESTRUCT = [ { "bename" : "be1", "suffix": "dc=be1", "nbusers": 1000 }, @@ -348,6 +357,38 @@ def test_backup_task_after_failure(mytopo): assert exitCode == 0, "Backup failed. Issue #6229 may not be fixed." +def load_dse(inst): + conts = nsContainers(inst, 'cn=config') + while not event.is_set(): + cont = conts.create(properties={'cn': 'test_online_backup_and_dse_write'}) + cont.delete() + + +def test_online_backup_and_dse_write(topo): + """Test online backup while attempting to add/delete entries in dse.ldif. + + :id: 4a1edd2c-be15-11ef-8bc8-482ae39447e5 + :setup: One standalone instance + :steps: + 1. Start a thread that loops adding then removing in the dse.ldif + 2. Perform 10 online backups + 3. Stop the thread + :expectedresults: + 1. Success + 2. Success (or timeout if issue #6372 is not fixed.) + 3. Success + """ + inst = topo.standalone + t = Thread(target=load_dse, args=[inst]) + t.start() + for x in range(10): + with tempfile.TemporaryDirectory() as backup_dir: + assert inst.tasks.db2bak(backup_dir=f'{backup_dir}', args={TASK_WAIT: True}) == 0 + event.set() + t.join() + event.clear() + + if __name__ == '__main__': # Run isolated # -s for DEBUG mode diff --git a/ldap/servers/slapd/dse.c b/ldap/servers/slapd/dse.c index 2edc3f1f6..e3157c1ce 100644 --- a/ldap/servers/slapd/dse.c +++ b/ldap/servers/slapd/dse.c @@ -41,6 +41,7 @@ #include /* Needed to access read_config_dse */ #include "proto-slap.h" +#include #include /* provides fsync/close */ @@ -73,9 +74,6 @@ #define DSE_USE_LOCK 1 #define DSE_NO_LOCK 0 -/* Global lock used during backups */ -static PRLock *backup_lock = NULL; - struct dse_callback { int operation; @@ -104,6 +102,8 @@ struct dse /* initialize the dse */ int dse_is_updateable; /* if non-zero, this DSE can be written to */ int dse_readonly_error_reported; /* used to ensure that read-only errors are logged only once */ + pthread_mutex_t dse_backup_lock; /* used to block write when online backup is in progress */ + bool dse_backup_in_progress; /* tell that online backup is in progress (protected by dse_rwlock) */ }; struct dse_node @@ -155,6 +155,91 @@ static int dse_write_entry(caddr_t data, caddr_t arg); static int ldif_record_end(char *p); static int dse_call_callback(struct dse *pdse, Slapi_PBlock *pb, int operation, int flags, Slapi_Entry *entryBefore, Slapi_Entry *entryAfter, int *returncode, char *returntext); +/* Lock the dse in read mode */ +INLINE_DIRECTIVE static void +dse_lock_read(struct dse *pdse, int use_lock) +{ + if (use_lock == DSE_USE_LOCK && pdse->dse_rwlock) { + slapi_rwlock_rdlock(pdse->dse_rwlock); + } +} + +/* Lock the dse in write mode and wait until the */ +INLINE_DIRECTIVE static void +dse_lock_write(struct dse *pdse, int use_lock) +{ + if (use_lock != DSE_USE_LOCK || !pdse->dse_rwlock) { + return; + } + slapi_rwlock_wrlock(pdse->dse_rwlock); + while (pdse->dse_backup_in_progress) { + slapi_rwlock_unlock(pdse->dse_rwlock); + /* Wait util dse_backup_lock is unlocked */ + pthread_mutex_lock(&pdse->dse_backup_lock); + pthread_mutex_unlock(&pdse->dse_backup_lock); + slapi_rwlock_wrlock(pdse->dse_rwlock); + } +} + +/* release the dse lock */ +INLINE_DIRECTIVE static void +dse_lock_unlock(struct dse *pdse, int use_lock) +{ + if (use_lock == DSE_USE_LOCK && pdse->dse_rwlock) { + slapi_rwlock_unlock(pdse->dse_rwlock); + } +} + +/* Call cb(pdse) */ +INLINE_DIRECTIVE static void +dse_call_cb(void (*cb)(struct dse*)) +{ + Slapi_Backend *be = slapi_be_select_by_instance_name("DSE"); + if (be) { + struct dse *pdse = NULL; + slapi_be_Rlock(be); + pdse = be->be_database->plg_private; + if (pdse) { + cb(pdse); + } + slapi_be_Unlock(be); + } +} + +/* Helper for dse_backup_lock() */ +static void +dse_backup_lock_cb(struct dse *pdse) +{ + pthread_mutex_lock(&pdse->dse_backup_lock); + slapi_rwlock_wrlock(pdse->dse_rwlock); + pdse->dse_backup_in_progress = true; + slapi_rwlock_unlock(pdse->dse_rwlock); +} + +/* Helper for dse_backup_unlock() */ +static void +dse_backup_unlock_cb(struct dse *pdse) +{ + slapi_rwlock_wrlock(pdse->dse_rwlock); + pdse->dse_backup_in_progress = false; + slapi_rwlock_unlock(pdse->dse_rwlock); + pthread_mutex_unlock(&pdse->dse_backup_lock); +} + +/* Tells that a backup thread is starting */ +void +dse_backup_lock() +{ + dse_call_cb(dse_backup_lock_cb); +} + +/* Tells that a backup thread is ending */ +void +dse_backup_unlock() +{ + dse_call_cb(dse_backup_unlock_cb); +} + /* * Map a DN onto a dse_node. * Returns NULL if not found. @@ -192,18 +277,12 @@ dse_get_entry_copy(struct dse *pdse, const Slapi_DN *dn, int use_lock) Slapi_Entry *e = NULL; struct dse_node *n; - if (use_lock == DSE_USE_LOCK && pdse->dse_rwlock) { - slapi_rwlock_rdlock(pdse->dse_rwlock); - } - + dse_lock_read(pdse, use_lock); n = dse_find_node(pdse, dn); if (n != NULL) { e = slapi_entry_dup(n->entry); } - - if (use_lock == DSE_USE_LOCK && pdse->dse_rwlock) { - slapi_rwlock_unlock(pdse->dse_rwlock); - } + dse_lock_unlock(pdse, use_lock); return e; } @@ -393,6 +472,7 @@ dse_new(char *filename, char *tmpfilename, char *backfilename, char *startokfile pdse->dse_callback = NULL; pdse->dse_is_updateable = dse_permission_to_write(pdse, SLAPI_LOG_TRACE); + pthread_mutex_init(&pdse->dse_backup_lock, NULL); } slapi_ch_free((void **)&realconfigdir); } @@ -429,8 +509,7 @@ dse_destroy(struct dse *pdse) if (NULL == pdse) { return 0; /* no one checks this return value */ } - if (pdse->dse_rwlock) - slapi_rwlock_wrlock(pdse->dse_rwlock); + dse_lock_write(pdse, DSE_USE_LOCK); slapi_ch_free((void **)&(pdse->dse_filename)); slapi_ch_free((void **)&(pdse->dse_tmpfile)); slapi_ch_free((void **)&(pdse->dse_fileback)); @@ -439,8 +518,8 @@ dse_destroy(struct dse *pdse) dse_callback_deletelist(&pdse->dse_callback); charray_free(pdse->dse_filelist); nentries = avl_free(pdse->dse_tree, dse_internal_delete_entry); + dse_lock_unlock(pdse, DSE_USE_LOCK); if (pdse->dse_rwlock) { - slapi_rwlock_unlock(pdse->dse_rwlock); slapi_destroy_rwlock(pdse->dse_rwlock); } slapi_ch_free((void **)&pdse); @@ -928,9 +1007,7 @@ dse_check_for_readonly_error(Slapi_PBlock *pb, struct dse *pdse) { int rc = 0; /* default: no error */ - if (pdse->dse_rwlock) - slapi_rwlock_rdlock(pdse->dse_rwlock); - + dse_lock_read(pdse, DSE_USE_LOCK); if (!pdse->dse_is_updateable) { if (!pdse->dse_readonly_error_reported) { if (NULL != pdse->dse_filename) { @@ -944,9 +1021,7 @@ dse_check_for_readonly_error(Slapi_PBlock *pb, struct dse *pdse) } rc = 1; /* return an error to the client */ } - - if (pdse->dse_rwlock) - slapi_rwlock_unlock(pdse->dse_rwlock); + dse_lock_unlock(pdse, DSE_USE_LOCK); if (rc != 0) { slapi_send_ldap_result(pb, LDAP_UNWILLING_TO_PERFORM, NULL, @@ -973,8 +1048,6 @@ dse_write_file_nolock(struct dse *pdse) fpw.fpw_rc = 0; fpw.fpw_prfd = NULL; - dse_backup_lock(); - if (NULL != pdse->dse_filename) { if ((fpw.fpw_prfd = PR_Open(pdse->dse_tmpfile, PR_RDWR | PR_CREATE_FILE | PR_TRUNCATE, SLAPD_DEFAULT_DSE_FILE_MODE)) == NULL) { rc = PR_GetOSError(); @@ -1107,8 +1180,7 @@ dse_add_entry_pb(struct dse *pdse, Slapi_Entry *e, Slapi_PBlock *pb) slapi_pblock_get(pb, SLAPI_DSE_MERGE_WHEN_ADDING, &merge); /* keep write lock during both tree update and file write operations */ - if (pdse->dse_rwlock) - slapi_rwlock_wrlock(pdse->dse_rwlock); + dse_lock_write(pdse, DSE_USE_LOCK); if (merge) { rc = avl_insert(&(pdse->dse_tree), n, entry_dn_cmp, dupentry_merge); } else { @@ -1131,8 +1203,7 @@ dse_add_entry_pb(struct dse *pdse, Slapi_Entry *e, Slapi_PBlock *pb) } else { /* duplicate entry ignored */ dse_node_delete(&n); /* This also deletes the contained entry */ } - if (pdse->dse_rwlock) - slapi_rwlock_unlock(pdse->dse_rwlock); + dse_lock_unlock(pdse, DSE_USE_LOCK); if (rc == -1) { /* duplicate entry ignored */ @@ -1299,8 +1370,7 @@ dse_replace_entry(struct dse *pdse, Slapi_Entry *e, int write_file, int use_lock int rc = -1; if (NULL != e) { struct dse_node *n = dse_node_new(e); - if (use_lock && pdse->dse_rwlock) - slapi_rwlock_wrlock(pdse->dse_rwlock); + dse_lock_write(pdse, use_lock); rc = avl_insert(&(pdse->dse_tree), n, entry_dn_cmp, dupentry_replace); if (write_file) dse_write_file_nolock(pdse); @@ -1310,8 +1380,7 @@ dse_replace_entry(struct dse *pdse, Slapi_Entry *e, int write_file, int use_lock dse_node_delete(&n); rc = 0; /* for return to caller */ } - if (use_lock && pdse->dse_rwlock) - slapi_rwlock_unlock(pdse->dse_rwlock); + dse_lock_unlock(pdse, use_lock); } return rc; } @@ -1398,8 +1467,7 @@ dse_delete_entry(struct dse *pdse, Slapi_PBlock *pb, const Slapi_Entry *e) slapi_pblock_get(pb, SLAPI_DSE_DONT_WRITE_WHEN_ADDING, &dont_write_file); /* keep write lock for both tree deleting and file writing */ - if (pdse->dse_rwlock) - slapi_rwlock_wrlock(pdse->dse_rwlock); + dse_lock_write(pdse, DSE_USE_LOCK); if ((deleted_node = (struct dse_node *)avl_delete(&pdse->dse_tree, n, entry_dn_cmp))) { dse_node_delete(&deleted_node); } @@ -1411,8 +1479,7 @@ dse_delete_entry(struct dse *pdse, Slapi_PBlock *pb, const Slapi_Entry *e) SLAPI_OPERATION_DELETE); dse_write_file_nolock(pdse); } - if (pdse->dse_rwlock) - slapi_rwlock_unlock(pdse->dse_rwlock); + dse_lock_unlock(pdse, DSE_USE_LOCK); return 1; } @@ -1574,11 +1641,9 @@ do_dse_search(struct dse *pdse, Slapi_PBlock *pb, int scope, const Slapi_DN *bas * entries that change, we skip looking through the DSE entries. */ if (pb_op == NULL || !operation_is_flag_set(pb_op, OP_FLAG_PS_CHANGESONLY)) { - if (pdse->dse_rwlock) - slapi_rwlock_rdlock(pdse->dse_rwlock); + dse_lock_read(pdse, DSE_USE_LOCK); dse_apply_nolock(pdse, dse_search_filter_entry, (caddr_t)&stuff); - if (pdse->dse_rwlock) - slapi_rwlock_unlock(pdse->dse_rwlock); + dse_lock_unlock(pdse, DSE_USE_LOCK); } if (stuff.ss) /* something was found which matched our criteria */ @@ -2925,32 +2990,3 @@ dse_next_search_entry(Slapi_PBlock *pb) return 0; } -/* When a backup is occurring we can not allow the writing the dse.ldif file */ -void -dse_init_backup_lock() -{ - backup_lock = PR_NewLock(); -} - -void -dse_destroy_backup_lock() -{ - PR_DestroyLock(backup_lock); - backup_lock = NULL; -} - -void -dse_backup_lock() -{ - if (backup_lock) { - PR_Lock(backup_lock); - } -} - -void -dse_backup_unlock() -{ - if (backup_lock) { - PR_Unlock(backup_lock); - } -} diff --git a/ldap/servers/slapd/libglobs.c b/ldap/servers/slapd/libglobs.c index 327937223..855ca9c3c 100644 --- a/ldap/servers/slapd/libglobs.c +++ b/ldap/servers/slapd/libglobs.c @@ -2033,9 +2033,6 @@ FrontendConfig_init(void) /* Done, unlock! */ CFG_UNLOCK_WRITE(cfg); - /* init the dse file backup lock */ - dse_init_backup_lock(); - init_config_get_and_set(); } diff --git a/ldap/servers/slapd/main.c b/ldap/servers/slapd/main.c index 1cdfeaa36..cc9f43799 100644 --- a/ldap/servers/slapd/main.c +++ b/ldap/servers/slapd/main.c @@ -1164,7 +1164,6 @@ main(int argc, char **argv) slapd_ssl_destroy(); ndn_cache_destroy(); NSS_Shutdown(); - dse_destroy_backup_lock(); /* * Server has stopped, lets force everything to disk: logs diff --git a/ldap/servers/slapd/slapi-private.h b/ldap/servers/slapd/slapi-private.h index f0f6a4b70..b119da0bf 100644 --- a/ldap/servers/slapd/slapi-private.h +++ b/ldap/servers/slapd/slapi-private.h @@ -1415,8 +1415,6 @@ void modify_update_last_modified_attr(Slapi_PBlock *pb, Slapi_Mods *smods); void add_internal_modifiersname(Slapi_PBlock *pb, Slapi_Entry *e); /* dse.c */ -void dse_init_backup_lock(void); -void dse_destroy_backup_lock(void); void dse_backup_lock(void); void dse_backup_unlock(void); diff --git a/src/lib389/lib389/tasks.py b/src/lib389/lib389/tasks.py index c1a2e7aaa..6bf302862 100644 --- a/src/lib389/lib389/tasks.py +++ b/src/lib389/lib389/tasks.py @@ -525,7 +525,7 @@ def importLDIF(self, suffix=None, benamebase=None, input_file=None, entry.setValues('nsIncludeSuffix', suffix) # start the task and possibly wait for task completion - self.conn.add_s(entry) + self.conn.add_s(entry, escapehatch='i am sure') exitCode = 0 warningCode = 0 @@ -598,7 +598,7 @@ def exportLDIF(self, suffix=None, benamebase=None, output_file=None, entry.setValues('nsExportReplica', 'true') # start the task and possibly wait for task completion - self.conn.add_s(entry) + self.conn.add_s(entry, escapehatch='i am sure') exitCode = 0 warningCode = 0 if args and args.get(TASK_WAIT, False): @@ -649,7 +649,7 @@ def db2bak(self, backup_dir=None, args=None): # start the task and possibly wait for task completion try: - self.conn.add_s(entry) + self.conn.add_s(entry, escapehatch='i am sure') except ldap.ALREADY_EXISTS: self.log.error("Fail to add the backup task (%s)", dn) return -1 @@ -706,7 +706,7 @@ def bak2db(self, backup_dir=None, args=None): # start the task and possibly wait for task completion try: - self.conn.add_s(entry) + self.conn.add_s(entry, escapehatch='i am sure') except ldap.ALREADY_EXISTS: self.log.error("Fail to add the backup task (%s)", dn) return -1 @@ -834,7 +834,7 @@ def reindex(self, suffix=None, benamebase=None, attrname=None, args=None, vlv=Fa # start the task and possibly wait for task completion try: - self.conn.add_s(entry) + self.conn.add_s(entry, escapehatch='i am sure') except ldap.ALREADY_EXISTS: self.log.error("Fail to add the index task for %s", attrname) return -1 @@ -914,7 +914,7 @@ def fixupMemberOf(self, suffix=None, benamebase=None, filt=None, # start the task and possibly wait for task completion try: - self.conn.add_s(entry) + self.conn.add_s(entry, escapehatch='i am sure') except ldap.ALREADY_EXISTS: self.log.error("Fail to add the memberOf fixup task") return -1 @@ -975,7 +975,7 @@ def fixupTombstones(self, bename=None, args=None): # start the task and possibly wait for task completion try: - self.conn.add_s(entry) + self.conn.add_s(entry, escapehatch='i am sure') except ldap.ALREADY_EXISTS: self.log.error("Fail to add the fixup tombstone task") return -1 @@ -1031,7 +1031,7 @@ def automemberRebuild(self, suffix=DEFAULT_SUFFIX, scope='sub', # start the task and possibly wait for task completion try: - self.conn.add_s(entry) + self.conn.add_s(entry, escapehatch='i am sure') except ldap.ALREADY_EXISTS: self.log.error("Fail to add Automember Rebuild Membership task") return -1 @@ -1087,7 +1087,7 @@ def automemberExport(self, suffix=DEFAULT_SUFFIX, scope='sub', # start the task and possibly wait for task completion try: - self.conn.add_s(entry) + self.conn.add_s(entry, escapehatch='i am sure') except ldap.ALREADY_EXISTS: self.log.error("Fail to add Automember Export Updates task") return -1 @@ -1138,7 +1138,7 @@ def automemberMap(self, ldif_in=None, ldif_out=None, args=None): # start the task and possibly wait for task completion try: - self.conn.add_s(entry) + self.conn.add_s(entry, escapehatch='i am sure') except ldap.ALREADY_EXISTS: self.log.error("Fail to add Automember Map Updates task") return -1 @@ -1183,7 +1183,7 @@ def fixupLinkedAttrs(self, linkdn=None, args=None): # start the task and possibly wait for task completion try: - self.conn.add_s(entry) + self.conn.add_s(entry, escapehatch='i am sure') except ldap.ALREADY_EXISTS: self.log.error("Fail to add Fixup Linked Attributes task") return -1 @@ -1227,7 +1227,7 @@ def schemaReload(self, schemadir=None, args=None): # start the task and possibly wait for task completion try: - self.conn.add_s(entry) + self.conn.add_s(entry, escapehatch='i am sure') except ldap.ALREADY_EXISTS: self.log.error("Fail to add Schema Reload task") return -1 @@ -1272,7 +1272,7 @@ def fixupWinsyncMembers(self, suffix=DEFAULT_SUFFIX, # start the task and possibly wait for task completion try: - self.conn.add_s(entry) + self.conn.add_s(entry, escapehatch='i am sure') except ldap.ALREADY_EXISTS: self.log.error("Fail to add fixupWinsyncMembers 'memberuid task'") return -1 @@ -1319,7 +1319,7 @@ def syntaxValidate(self, suffix=DEFAULT_SUFFIX, fstr='objectclass=top', # start the task and possibly wait for task completion try: - self.conn.add_s(entry) + self.conn.add_s(entry, escapehatch='i am sure') except ldap.ALREADY_EXISTS: self.log.error("Fail to add Syntax Validate task") return -1 @@ -1370,7 +1370,7 @@ def usnTombstoneCleanup(self, suffix=DEFAULT_SUFFIX, bename=None, # start the task and possibly wait for task completion try: - self.conn.add_s(entry) + self.conn.add_s(entry, escapehatch='i am sure') except ldap.ALREADY_EXISTS: self.log.error("Fail to add USN tombstone cleanup task") return -1 @@ -1421,7 +1421,7 @@ def sysconfigReload(self, configfile=None, logchanges=None, args=None): entry.setValues('logchanges', logchanges) # start the task and possibly wait for task completion try: - self.conn.add_s(entry) + self.conn.add_s(entry, escapehatch='i am sure') except ldap.ALREADY_EXISTS: self.log.error("Fail to add Sysconfig Reload task") return -1 @@ -1473,7 +1473,7 @@ def cleanAllRUV(self, suffix=None, replicaid=None, force=None, args=None): entry.setValues('replica-force-cleaning', 'yes') # start the task and possibly wait for task completion try: - self.conn.add_s(entry) + self.conn.add_s(entry, escapehatch='i am sure') except ldap.ALREADY_EXISTS: self.log.error("Fail to add cleanAllRUV task") return (dn, -1) @@ -1528,7 +1528,7 @@ def abortCleanAllRUV(self, suffix=None, replicaid=None, certify=None, # start the task and possibly wait for task completion try: - self.conn.add_s(entry) + self.conn.add_s(entry, escapehatch='i am sure') except ldap.ALREADY_EXISTS: self.log.error("Fail to add Abort cleanAllRUV task") return (dn, -1) @@ -1582,7 +1582,7 @@ def upgradeDB(self, nsArchiveDir=None, nsDatabaseType=None, # start the task and possibly wait for task completion try: - self.conn.add_s(entry) + self.conn.add_s(entry, escapehatch='i am sure') except ldap.ALREADY_EXISTS: self.log.error("Fail to add upgradedb task") return -1