Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 6470 - Some replication status data are reset upon a restart #6471

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 131 additions & 0 deletions dirsrvtests/tests/suites/replication/single_master_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from lib389.idm.user import UserAccounts, TEST_USER_PROPERTIES

from lib389.replica import ReplicationManager, Replicas
from lib389.agreement import Agreements
from lib389.backend import Backends

from lib389.topologies import topology_m1c1 as topo_r # Replication
Expand Down Expand Up @@ -156,6 +157,136 @@ def test_lastupdate_attr_before_init(topo_nr):
json_obj = json.loads(json_status)
log.debug("JSON status message: {}".format(json_obj))

def test_total_init_operational_attr(topo_r):
"""Check that operation attributes nsds5replicaLastInitStatus
nsds5replicaLastInitStart and nsds5replicaLastInitEnd
are preserved between restart

:id: 6ba00bb1-87c0-47dd-86e0-ccf892b3985b
:customerscenario: True
:setup: Replication setup with supplier and consumer instances,
test user on supplier
:steps:
1. Check that user was replicated to consumer
2. Trigger a first total init
3. Check status/start/end values are set on the supplier
4. Restart supplier
5. Check previous status/start/end values are preserved
6. Trigger a second total init
7. Check status/start/end values are set on the supplier
8. Restart supplier
9. Check previous status/start/end values are preserved
10. Check status/start/end values are different between
first and second total init
:expectedresults:
1. The user should be replicated to consumer
2. Total init should be successful
3. It must exist a values
4. Operation should be successful
5. Check values are identical before/after restart
6. Total init should be successful
7. It must exist a values
8. Operation should be successful
9. Check values are identical before/after restart
10. values must differ between first/second total init
"""

supplier = topo_r.ms["supplier1"]
consumer = topo_r.cs["consumer1"]
repl = ReplicationManager(DEFAULT_SUFFIX)

# Create a test user
m_users = UserAccounts(topo_r.ms["supplier1"], DEFAULT_SUFFIX)
m_user = m_users.ensure_state(properties=TEST_USER_PROPERTIES)
m_user.ensure_present('mail', '[email protected]')

# Then check it is replicated
log.info("Check that replication is working")
repl.wait_for_replication(supplier, consumer)
c_users = UserAccounts(topo_r.cs["consumer1"], DEFAULT_SUFFIX)
c_user = c_users.get('testuser')
assert c_user

# Retrieve the replication agreement S1->C1
replica_supplier = Replicas(supplier).get(DEFAULT_SUFFIX)
agmts_supplier = Agreements(supplier, replica_supplier.dn)
supplier_consumer = None
for agmt in agmts_supplier.list():
if (agmt.get_attr_val_utf8('nsDS5ReplicaPort') == str(consumer.port) and
agmt.get_attr_val_utf8('nsDS5ReplicaHost') == consumer.host):
supplier_consumer = agmt
break
assert supplier_consumer


Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rogue new line:)

# Trigger a first total init and check that
# start/end/status is updated AND preserved during a restart
log.info("First total init")
supplier_consumer.begin_reinit()
(done, error) = supplier_consumer.wait_reinit()
assert done is True

status_1 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitStatus")
assert status_1

initStart_1 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitStart")
assert initStart_1

initEnd_1 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitEnd")
assert initEnd_1

log.info("Check values from first total init are preserved")
supplier.restart()
post_restart_status_1 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitStatus")
assert post_restart_status_1
assert post_restart_status_1 == status_1

post_restart_initStart_1 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitStart")
assert post_restart_initStart_1
assert post_restart_initStart_1 == initStart_1

post_restart_initEnd_1 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitEnd")
assert post_restart_initEnd_1 == initEnd_1

# Trigger a second total init and check that
# start/end/status is updated (differ from previous values)
# AND new values are preserved during a restart
time.sleep(1)
log.info("Second total init")
supplier_consumer.begin_reinit()
(done, error) = supplier_consumer.wait_reinit()
assert done is True

status_2 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitStatus")
assert status_2

initStart_2 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitStart")
assert initStart_2

initEnd_2 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitEnd")
assert initEnd_2

log.info("Check values from second total init are preserved")
supplier.restart()
post_restart_status_2 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitStatus")
assert post_restart_status_2
assert post_restart_status_2 == status_2

post_restart_initStart_2 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitStart")
assert post_restart_initStart_2
assert post_restart_initStart_2 == initStart_2

post_restart_initEnd_2 = supplier_consumer.get_attr_val_utf8("nsds5replicaLastInitEnd")
assert post_restart_initEnd_2 == initEnd_2

# Check that values are updated by total init
log.info("Check values from first/second total init are different")
assert status_2 == status_1
assert initStart_2 != initStart_1
assert initEnd_2 != initEnd_1



Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another new line we can live without:)

if __name__ == '__main__':
# Run isolated
# -s for DEBUG mode
Expand Down
4 changes: 4 additions & 0 deletions ldap/servers/plugins/replication/repl5.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,9 @@ extern const char *type_nsds5ReplicaBootstrapCredentials;
extern const char *type_nsds5ReplicaBootstrapBindMethod;
extern const char *type_nsds5ReplicaBootstrapTransportInfo;
extern const char *type_replicaKeepAliveUpdateInterval;
extern const char *type_nsds5ReplicaLastInitStart;
extern const char *type_nsds5ReplicaLastInitEnd;
extern const char *type_nsds5ReplicaLastInitStatus;

/* Attribute names for windows replication agreements */
extern const char *type_nsds7WindowsReplicaArea;
Expand Down Expand Up @@ -435,6 +438,7 @@ void agmt_notify_change(Repl_Agmt *ra, Slapi_PBlock *pb);
Object *agmt_get_consumer_ruv(Repl_Agmt *ra);
ReplicaId agmt_get_consumer_rid(Repl_Agmt *ra, void *conn);
int agmt_set_consumer_ruv(Repl_Agmt *ra, RUV *ruv);
void agmt_update_init_status(Repl_Agmt *ra);
void agmt_update_consumer_ruv(Repl_Agmt *ra);
CSN *agmt_get_consumer_schema_csn(Repl_Agmt *ra);
void agmt_set_consumer_schema_csn(Repl_Agmt *ra, CSN *csn);
Expand Down
133 changes: 130 additions & 3 deletions ldap/servers/plugins/replication/repl5_agmt.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
#include "repl5_prot_private.h"
#include "cl5_api.h"
#include "slapi-plugin.h"
#include "slap.h"
#include "../../slapd/back-ldbm/dbimpl.h" /* for dblayer_is_lmdb */

#define DEFAULT_TIMEOUT 120 /* (seconds) default outbound LDAP connection */
Expand Down Expand Up @@ -532,9 +533,32 @@ agmt_new_from_entry(Slapi_Entry *e)
ra->last_update_status[0] = '\0';
ra->update_in_progress = PR_FALSE;
ra->stop_in_progress = PR_FALSE;
ra->last_init_end_time = 0UL;
ra->last_init_start_time = 0UL;
ra->last_init_status[0] = '\0';
val = (char *)slapi_entry_attr_get_ref(e, type_nsds5ReplicaLastInitEnd);
if (val) {
time_t init_end_time;

init_end_time = parse_genTime((char *) val);
if (init_end_time == NO_TIME || init_end_time == SLAPD_END_TIME) {
ra->last_init_end_time = 0UL;
} else {
ra->last_init_end_time = init_end_time;
}
}
val = (char *)slapi_entry_attr_get_ref(e, type_nsds5ReplicaLastInitStart);
if (val) {
time_t init_start_time;

init_start_time = parse_genTime((char *) val);
if (init_start_time == NO_TIME || init_start_time == SLAPD_END_TIME) {
ra->last_init_start_time = 0UL;
} else {
ra->last_init_start_time = init_start_time;
}
}
val = (char *)slapi_entry_attr_get_ref(e, type_nsds5ReplicaLastInitStatus);
if (val) {
strcpy(ra->last_init_status, val);
}
ra->changecounters = (struct changecounter **)slapi_ch_calloc(MAX_NUM_OF_SUPPLIERS + 1,
sizeof(struct changecounter *));
ra->num_changecounters = 0;
Expand Down Expand Up @@ -2527,6 +2551,108 @@ agmt_set_consumer_ruv(Repl_Agmt *ra, RUV *ruv)
return 0;
}

void
agmt_update_init_status(Repl_Agmt *ra)
{
int rc;
Slapi_PBlock *pb;
LDAPMod **mods;
int nb_mods = 0;
int mod_idx;
Slapi_Mod smod_start_time = {0};
Slapi_Mod smod_end_time = {0};
Slapi_Mod smod_status = {0};

PR_ASSERT(ra);
PR_Lock(ra->lock);

if (ra->last_init_start_time) {
nb_mods++;
}
if (ra->last_init_end_time) {
nb_mods++;
}
if (ra->last_init_status[0] != '\0') {
nb_mods++;
}
Copy link
Member

@droideck droideck Jan 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
}
}
if (nb_mods == 0) {
PR_Unlock(ra->lock);
return;
}

I think it's better to return here if there are no mods.
It feels a bit strange to check it at the end of the function and then call slapi_mod_done(*); even though no mods were done...

mods = (LDAPMod **) slapi_ch_malloc((nb_mods + 1) * sizeof(LDAPMod *));
mod_idx = 0;
if (ra->last_init_start_time) {
struct berval val;
char *time_tmp = NULL;
slapi_mod_init(&smod_start_time, 1);
slapi_mod_set_type(&smod_start_time, type_nsds5ReplicaLastInitStart);
slapi_mod_set_operation(&smod_start_time, LDAP_MOD_REPLACE | LDAP_MOD_BVALUES);

time_tmp = format_genTime(ra->last_init_start_time);
val.bv_val = time_tmp;
val.bv_len = strlen(time_tmp);
slapi_mod_add_value(&smod_start_time, &val);
slapi_ch_free((void **)&time_tmp);
mods[mod_idx] = (LDAPMod *)slapi_mod_get_ldapmod_byref(&smod_start_time);
mod_idx++;
}
if (ra->last_init_end_time) {
struct berval val;
char *time_tmp = NULL;
slapi_mod_init(&smod_end_time, 1);
slapi_mod_set_type(&smod_end_time, type_nsds5ReplicaLastInitEnd);
slapi_mod_set_operation(&smod_end_time, LDAP_MOD_REPLACE | LDAP_MOD_BVALUES);

time_tmp = format_genTime(ra->last_init_end_time);
val.bv_val = time_tmp;
val.bv_len = strlen(time_tmp);
slapi_mod_add_value(&smod_end_time, &val);
slapi_ch_free((void **)&time_tmp);
mods[mod_idx] = (LDAPMod *)slapi_mod_get_ldapmod_byref(&smod_end_time);
mod_idx++;
}
if (ra->last_init_status[0] != '\0') {
struct berval val;
char *init_status = NULL;
slapi_mod_init(&smod_status, 1);
slapi_mod_set_type(&smod_status, type_nsds5ReplicaLastInitStatus);
slapi_mod_set_operation(&smod_status, LDAP_MOD_REPLACE | LDAP_MOD_BVALUES);

init_status = slapi_ch_strdup(ra->last_init_status);
val.bv_val = init_status;
val.bv_len = strlen(init_status);
slapi_mod_add_value(&smod_status, &val);
slapi_ch_free((void **)&init_status);
mods[mod_idx] = (LDAPMod *)slapi_mod_get_ldapmod_byref(&smod_status);
mod_idx++;
}

if (nb_mods) {
/* it is ok to release the lock here because we are done with the agreement data.
we have to do it before issuing the modify operation because it causes
agmtlist_notify_all to be called which uses the same lock - hence the deadlock */
PR_Unlock(ra->lock);

pb = slapi_pblock_new();
mods[nb_mods] = NULL;

slapi_modify_internal_set_pb_ext(pb, ra->dn, mods, NULL, NULL,
repl_get_plugin_identity(PLUGIN_MULTISUPPLIER_REPLICATION), 0);
slapi_modify_internal_pb(pb);

slapi_pblock_get(pb, SLAPI_PLUGIN_INTOP_RESULT, &rc);
if (rc != LDAP_SUCCESS && rc != LDAP_NO_SUCH_ATTRIBUTE) {
slapi_log_err(SLAPI_LOG_ERR, repl_plugin_name, "agmt_update_consumer_ruv - "
"%s: agmt_update_consumer_ruv: "
"failed to update consumer's RUV; LDAP error - %d\n",
ra->long_name, rc);
}

slapi_pblock_destroy(pb);
} else {
PR_Unlock(ra->lock);
}
slapi_mod_done(&smod_start_time);
slapi_mod_done(&smod_end_time);
slapi_mod_done(&smod_status);
}

void
agmt_update_consumer_ruv(Repl_Agmt *ra)
{
Expand Down Expand Up @@ -3146,6 +3272,7 @@ agmt_set_enabled_from_entry(Repl_Agmt *ra, Slapi_Entry *e, char *returntext)
PR_Unlock(ra->lock);
agmt_stop(ra);
agmt_update_consumer_ruv(ra);
agmt_update_init_status(ra);
agmt_set_last_update_status(ra, 0, 0, "agreement disabled");
return rc;
}
Expand Down
1 change: 1 addition & 0 deletions ldap/servers/plugins/replication/repl5_agmtlist.c
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,7 @@ agmtlist_shutdown()
ra = (Repl_Agmt *)object_get_data(ro);
agmt_stop(ra);
agmt_update_consumer_ruv(ra);
agmt_update_init_status(ra);
next_ro = objset_next_obj(agmt_set, ro);
/* Object ro was released in objset_next_obj,
* but the address ro can be still used to remove ro from objset. */
Expand Down
1 change: 1 addition & 0 deletions ldap/servers/plugins/replication/repl_cleanallruv.c
Original file line number Diff line number Diff line change
Expand Up @@ -2441,6 +2441,7 @@ clean_agmts(cleanruv_data *data)
"Cleaning agmt (%s) ...", agmt_get_long_name(agmt));
agmt_stop(agmt);
agmt_update_consumer_ruv(agmt);
agmt_update_init_status(agmt);
agmt_start(agmt);
agmt_obj = agmtlist_get_next_agreement_for_replica(data->replica, agmt_obj);
}
Expand Down
3 changes: 3 additions & 0 deletions ldap/servers/plugins/replication/repl_globals.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@ const char *type_nsds5ReplicaBootstrapBindDN = "nsds5ReplicaBootstrapBindDN";
const char *type_nsds5ReplicaBootstrapCredentials = "nsds5ReplicaBootstrapCredentials";
const char *type_nsds5ReplicaBootstrapBindMethod = "nsds5ReplicaBootstrapBindMethod";
const char *type_nsds5ReplicaBootstrapTransportInfo = "nsds5ReplicaBootstrapTransportInfo";
const char *type_nsds5ReplicaLastInitStart = "nsds5replicaLastInitStart";
const char *type_nsds5ReplicaLastInitEnd = "nsds5replicaLastInitEnd";
const char *type_nsds5ReplicaLastInitStatus = "nsds5replicaLastInitStatus";

/* windows sync specific attributes */
const char *type_nsds7WindowsReplicaArea = "nsds7WindowsReplicaSubtree";
Expand Down
Loading