From 46530fc4849d66104751e517ede351f0d805ffb7 Mon Sep 17 00:00:00 2001 From: Thierry Bordaz Date: Wed, 18 Dec 2024 12:11:56 +0100 Subject: [PATCH] Issue 6470 - Some replication status data are reset upon a restart Bug description: The replication agreement contains operational attributes related to the total init: nsds5replicaLastInitStart, nsds5replicaLastInitEnd, nsds5replicaLastInitStatus. Those attributes are reset at restart Fix description: When reading the replication agreement from config (agmt_new_from_entry) restore the attributes into the in-memory RA. Updates the RA config entry from the in-memory RA during shutdown/cleanallruv/enable_ra fixes: #6470 Reviewed by: --- .../suites/replication/single_master_test.py | 131 +++++++++++++++++ ldap/servers/plugins/replication/repl5.h | 4 + ldap/servers/plugins/replication/repl5_agmt.c | 133 +++++++++++++++++- .../plugins/replication/repl5_agmtlist.c | 1 + .../plugins/replication/repl_cleanallruv.c | 1 + .../plugins/replication/repl_globals.c | 3 + 6 files changed, 270 insertions(+), 3 deletions(-) diff --git a/dirsrvtests/tests/suites/replication/single_master_test.py b/dirsrvtests/tests/suites/replication/single_master_test.py index 448cf305a9..8fa7dd2ed4 100644 --- a/dirsrvtests/tests/suites/replication/single_master_test.py +++ b/dirsrvtests/tests/suites/replication/single_master_test.py @@ -13,6 +13,7 @@ from lib389.idm.user import UserAccounts, TEST_USER_PROPERTIES from lib389.replica import ReplicationManager, Replicas +from lib389.agreement import Agreements from lib389.backend import Backends from lib389.topologies import topology_m1c1 as topo_r # Replication @@ -156,6 +157,136 @@ def test_lastupdate_attr_before_init(topo_nr): json_obj = json.loads(json_status) log.debug("JSON status message: {}".format(json_obj)) +def test_total_init_operational_attr(topo_r): + """Check that operation attributes nsds5replicaLastInitStatus + nsds5replicaLastInitStart and nsds5replicaLastInitEnd + are preserved between restart + + :id: 6ba00bb1-87c0-47dd-86e0-ccf892b3985b + :customerscenario: True + :setup: Replication setup with supplier and consumer instances, + test user on supplier + :steps: + 1. Check that user was replicated to consumer + 2. Trigger a first total init + 3. Check status/start/end values are set on the supplier + 4. Restart supplier + 5. Check previous status/start/end values are preserved + 6. Trigger a second total init + 7. Check status/start/end values are set on the supplier + 8. Restart supplier + 9. Check previous status/start/end values are preserved + 10. Check status/start/end values are different between + first and second total init + :expectedresults: + 1. The user should be replicated to consumer + 2. Total init should be successful + 3. It must exist a values + 4. Operation should be successful + 5. Check values are identical before/after restart + 6. Total init should be successful + 7. It must exist a values + 8. Operation should be successful + 9. Check values are identical before/after restart + 10. values must differ between first/second total init + """ + + supplier = topo_r.ms["supplier1"] + consumer = topo_r.cs["consumer1"] + repl = ReplicationManager(DEFAULT_SUFFIX) + + # Create a test user + m_users = UserAccounts(topo_r.ms["supplier1"], DEFAULT_SUFFIX) + m_user = m_users.ensure_state(properties=TEST_USER_PROPERTIES) + m_user.ensure_present('mail', 'testuser@redhat.com') + + # Then check it is replicated + log.info("Check that replication is working") + repl.wait_for_replication(supplier, consumer) + c_users = UserAccounts(topo_r.cs["consumer1"], DEFAULT_SUFFIX) + c_user = c_users.get('testuser') + assert c_user + + # Retrieve the replication agreement S1->C1 + replica_supplier = Replicas(supplier).get(DEFAULT_SUFFIX) + agmts_supplier = Agreements(supplier, replica_supplier.dn) + supplier_consumer = None + for agmt in agmts_supplier.list(): + if (agmt.get_attr_val_utf8('nsDS5ReplicaPort') == str(consumer.port) and + agmt.get_attr_val_utf8('nsDS5ReplicaHost') == consumer.host): + supplier_consumer = agmt + break + assert supplier_consumer + + + # Trigger a first total init and check that + # start/end/status is updated AND preserved during a restart + log.info("First total init") + supplier_consumer.begin_reinit() + (done, error) = supplier_consumer.wait_reinit() + assert done is True + + status_1 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitStatus") + assert status_1 + + initStart_1 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitStart") + assert initStart_1 + + initEnd_1 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitEnd") + assert initEnd_1 + + log.info("Check values from first total init are preserved") + supplier.restart() + post_restart_status_1 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitStatus") + assert post_restart_status_1 + assert post_restart_status_1 == status_1 + + post_restart_initStart_1 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitStart") + assert post_restart_initStart_1 + assert post_restart_initStart_1 == initStart_1 + + post_restart_initEnd_1 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitEnd") + assert post_restart_initEnd_1 == initEnd_1 + + # Trigger a second total init and check that + # start/end/status is updated (differ from previous values) + # AND new values are preserved during a restart + time.sleep(1) + log.info("Second total init") + supplier_consumer.begin_reinit() + (done, error) = supplier_consumer.wait_reinit() + assert done is True + + status_2 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitStatus") + assert status_2 + + initStart_2 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitStart") + assert initStart_2 + + initEnd_2 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitEnd") + assert initEnd_2 + + log.info("Check values from second total init are preserved") + supplier.restart() + post_restart_status_2 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitStatus") + assert post_restart_status_2 + assert post_restart_status_2 == status_2 + + post_restart_initStart_2 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitStart") + assert post_restart_initStart_2 + assert post_restart_initStart_2 == initStart_2 + + post_restart_initEnd_2 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitEnd") + assert post_restart_initEnd_2 == initEnd_2 + + # Check that values are updated by total init + log.info("Check values from first/second total init are different") + assert status_2 == status_1 + assert initStart_2 != initStart_1 + assert initEnd_2 != initEnd_1 + + + if __name__ == '__main__': # Run isolated # -s for DEBUG mode diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h index 2ba2cfaa72..6e5552f54e 100644 --- a/ldap/servers/plugins/replication/repl5.h +++ b/ldap/servers/plugins/replication/repl5.h @@ -169,6 +169,9 @@ extern const char *type_nsds5ReplicaBootstrapCredentials; extern const char *type_nsds5ReplicaBootstrapBindMethod; extern const char *type_nsds5ReplicaBootstrapTransportInfo; extern const char *type_replicaKeepAliveUpdateInterval; +extern const char *type_nsds5ReplicaLastInitStart; +extern const char *type_nsds5ReplicaLastInitEnd; +extern const char *type_nsds5ReplicaLastInitStatus; /* Attribute names for windows replication agreements */ extern const char *type_nsds7WindowsReplicaArea; @@ -435,6 +438,7 @@ void agmt_notify_change(Repl_Agmt *ra, Slapi_PBlock *pb); Object *agmt_get_consumer_ruv(Repl_Agmt *ra); ReplicaId agmt_get_consumer_rid(Repl_Agmt *ra, void *conn); int agmt_set_consumer_ruv(Repl_Agmt *ra, RUV *ruv); +void agmt_update_init_status(Repl_Agmt *ra); void agmt_update_consumer_ruv(Repl_Agmt *ra); CSN *agmt_get_consumer_schema_csn(Repl_Agmt *ra); void agmt_set_consumer_schema_csn(Repl_Agmt *ra, CSN *csn); diff --git a/ldap/servers/plugins/replication/repl5_agmt.c b/ldap/servers/plugins/replication/repl5_agmt.c index fd5f23a77b..11af903d74 100644 --- a/ldap/servers/plugins/replication/repl5_agmt.c +++ b/ldap/servers/plugins/replication/repl5_agmt.c @@ -56,6 +56,7 @@ #include "repl5_prot_private.h" #include "cl5_api.h" #include "slapi-plugin.h" +#include "slap.h" #include "../../slapd/back-ldbm/dbimpl.h" /* for dblayer_is_lmdb */ #define DEFAULT_TIMEOUT 120 /* (seconds) default outbound LDAP connection */ @@ -532,9 +533,32 @@ agmt_new_from_entry(Slapi_Entry *e) ra->last_update_status[0] = '\0'; ra->update_in_progress = PR_FALSE; ra->stop_in_progress = PR_FALSE; - ra->last_init_end_time = 0UL; - ra->last_init_start_time = 0UL; - ra->last_init_status[0] = '\0'; + val = (char *)slapi_entry_attr_get_ref(e, type_nsds5ReplicaLastInitEnd); + if (val) { + time_t init_end_time; + + init_end_time = parse_genTime((char *) val); + if (init_end_time == NO_TIME || init_end_time == SLAPD_END_TIME) { + ra->last_init_end_time = 0UL; + } else { + ra->last_init_end_time = init_end_time; + } + } + val = (char *)slapi_entry_attr_get_ref(e, type_nsds5ReplicaLastInitStart); + if (val) { + time_t init_start_time; + + init_start_time = parse_genTime((char *) val); + if (init_start_time == NO_TIME || init_start_time == SLAPD_END_TIME) { + ra->last_init_start_time = 0UL; + } else { + ra->last_init_start_time = init_start_time; + } + } + val = (char *)slapi_entry_attr_get_ref(e, type_nsds5ReplicaLastInitStatus); + if (val) { + strcpy(ra->last_init_status, val); + } ra->changecounters = (struct changecounter **)slapi_ch_calloc(MAX_NUM_OF_SUPPLIERS + 1, sizeof(struct changecounter *)); ra->num_changecounters = 0; @@ -2527,6 +2551,108 @@ agmt_set_consumer_ruv(Repl_Agmt *ra, RUV *ruv) return 0; } +void +agmt_update_init_status(Repl_Agmt *ra) +{ + int rc; + Slapi_PBlock *pb; + LDAPMod **mods; + int nb_mods = 0; + int mod_idx; + Slapi_Mod smod_start_time = {0}; + Slapi_Mod smod_end_time = {0}; + Slapi_Mod smod_status = {0}; + + PR_ASSERT(ra); + PR_Lock(ra->lock); + + if (ra->last_init_start_time) { + nb_mods++; + } + if (ra->last_init_end_time) { + nb_mods++; + } + if (ra->last_init_status[0] != '\0') { + nb_mods++; + } + mods = (LDAPMod **) slapi_ch_malloc((nb_mods + 1) * sizeof(LDAPMod *)); + mod_idx = 0; + if (ra->last_init_start_time) { + struct berval val; + char *time_tmp = NULL; + slapi_mod_init(&smod_start_time, 1); + slapi_mod_set_type(&smod_start_time, type_nsds5ReplicaLastInitStart); + slapi_mod_set_operation(&smod_start_time, LDAP_MOD_REPLACE | LDAP_MOD_BVALUES); + + time_tmp = format_genTime(ra->last_init_start_time); + val.bv_val = time_tmp; + val.bv_len = strlen(time_tmp); + slapi_mod_add_value(&smod_start_time, &val); + slapi_ch_free((void **)&time_tmp); + mods[mod_idx] = (LDAPMod *)slapi_mod_get_ldapmod_byref(&smod_start_time); + mod_idx++; + } + if (ra->last_init_end_time) { + struct berval val; + char *time_tmp = NULL; + slapi_mod_init(&smod_end_time, 1); + slapi_mod_set_type(&smod_end_time, type_nsds5ReplicaLastInitEnd); + slapi_mod_set_operation(&smod_end_time, LDAP_MOD_REPLACE | LDAP_MOD_BVALUES); + + time_tmp = format_genTime(ra->last_init_end_time); + val.bv_val = time_tmp; + val.bv_len = strlen(time_tmp); + slapi_mod_add_value(&smod_end_time, &val); + slapi_ch_free((void **)&time_tmp); + mods[mod_idx] = (LDAPMod *)slapi_mod_get_ldapmod_byref(&smod_end_time); + mod_idx++; + } + if (ra->last_init_status[0] != '\0') { + struct berval val; + char *init_status = NULL; + slapi_mod_init(&smod_status, 1); + slapi_mod_set_type(&smod_status, type_nsds5ReplicaLastInitStatus); + slapi_mod_set_operation(&smod_status, LDAP_MOD_REPLACE | LDAP_MOD_BVALUES); + + init_status = slapi_ch_strdup(ra->last_init_status); + val.bv_val = init_status; + val.bv_len = strlen(init_status); + slapi_mod_add_value(&smod_status, &val); + slapi_ch_free((void **)&init_status); + mods[mod_idx] = (LDAPMod *)slapi_mod_get_ldapmod_byref(&smod_status); + mod_idx++; + } + + if (nb_mods) { + /* it is ok to release the lock here because we are done with the agreement data. + we have to do it before issuing the modify operation because it causes + agmtlist_notify_all to be called which uses the same lock - hence the deadlock */ + PR_Unlock(ra->lock); + + pb = slapi_pblock_new(); + mods[nb_mods] = NULL; + + slapi_modify_internal_set_pb_ext(pb, ra->dn, mods, NULL, NULL, + repl_get_plugin_identity(PLUGIN_MULTISUPPLIER_REPLICATION), 0); + slapi_modify_internal_pb(pb); + + slapi_pblock_get(pb, SLAPI_PLUGIN_INTOP_RESULT, &rc); + if (rc != LDAP_SUCCESS && rc != LDAP_NO_SUCH_ATTRIBUTE) { + slapi_log_err(SLAPI_LOG_ERR, repl_plugin_name, "agmt_update_consumer_ruv - " + "%s: agmt_update_consumer_ruv: " + "failed to update consumer's RUV; LDAP error - %d\n", + ra->long_name, rc); + } + + slapi_pblock_destroy(pb); + } else { + PR_Unlock(ra->lock); + } + slapi_mod_done(&smod_start_time); + slapi_mod_done(&smod_end_time); + slapi_mod_done(&smod_status); +} + void agmt_update_consumer_ruv(Repl_Agmt *ra) { @@ -3146,6 +3272,7 @@ agmt_set_enabled_from_entry(Repl_Agmt *ra, Slapi_Entry *e, char *returntext) PR_Unlock(ra->lock); agmt_stop(ra); agmt_update_consumer_ruv(ra); + agmt_update_init_status(ra); agmt_set_last_update_status(ra, 0, 0, "agreement disabled"); return rc; } diff --git a/ldap/servers/plugins/replication/repl5_agmtlist.c b/ldap/servers/plugins/replication/repl5_agmtlist.c index 0ebbc376ae..9109f92a30 100644 --- a/ldap/servers/plugins/replication/repl5_agmtlist.c +++ b/ldap/servers/plugins/replication/repl5_agmtlist.c @@ -786,6 +786,7 @@ agmtlist_shutdown() ra = (Repl_Agmt *)object_get_data(ro); agmt_stop(ra); agmt_update_consumer_ruv(ra); + agmt_update_init_status(ra); next_ro = objset_next_obj(agmt_set, ro); /* Object ro was released in objset_next_obj, * but the address ro can be still used to remove ro from objset. */ diff --git a/ldap/servers/plugins/replication/repl_cleanallruv.c b/ldap/servers/plugins/replication/repl_cleanallruv.c index 4052d98fd4..d002f823d1 100644 --- a/ldap/servers/plugins/replication/repl_cleanallruv.c +++ b/ldap/servers/plugins/replication/repl_cleanallruv.c @@ -2441,6 +2441,7 @@ clean_agmts(cleanruv_data *data) "Cleaning agmt (%s) ...", agmt_get_long_name(agmt)); agmt_stop(agmt); agmt_update_consumer_ruv(agmt); + agmt_update_init_status(agmt); agmt_start(agmt); agmt_obj = agmtlist_get_next_agreement_for_replica(data->replica, agmt_obj); } diff --git a/ldap/servers/plugins/replication/repl_globals.c b/ldap/servers/plugins/replication/repl_globals.c index 24204a639b..03547d9a73 100644 --- a/ldap/servers/plugins/replication/repl_globals.c +++ b/ldap/servers/plugins/replication/repl_globals.c @@ -118,6 +118,9 @@ const char *type_nsds5ReplicaBootstrapBindDN = "nsds5ReplicaBootstrapBindDN"; const char *type_nsds5ReplicaBootstrapCredentials = "nsds5ReplicaBootstrapCredentials"; const char *type_nsds5ReplicaBootstrapBindMethod = "nsds5ReplicaBootstrapBindMethod"; const char *type_nsds5ReplicaBootstrapTransportInfo = "nsds5ReplicaBootstrapTransportInfo"; +const char *type_nsds5ReplicaLastInitStart = "nsds5replicaLastInitStart"; +const char *type_nsds5ReplicaLastInitEnd = "nsds5replicaLastInitEnd"; +const char *type_nsds5ReplicaLastInitStatus = "nsds5replicaLastInitStatus"; /* windows sync specific attributes */ const char *type_nsds7WindowsReplicaArea = "nsds7WindowsReplicaSubtree";