Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 5944 - Reversion of the entry cache should be limited to BETXN … #5994

Merged
merged 1 commit into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions dirsrvtests/tests/suites/betxns/betxn_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@
#
import pytest
import ldap
import os
from lib389.tasks import *
from lib389.utils import *
from lib389.topologies import topology_st
from lib389.plugins import (SevenBitCheckPlugin, AttributeUniquenessPlugin,
MemberOfPlugin, ManagedEntriesPlugin,
ReferentialIntegrityPlugin, MEPTemplates,
MEPConfigs)
from lib389.plugins import MemberOfPlugin
from lib389.idm.user import UserAccounts, TEST_USER_PROPERTIES
from lib389.idm.organizationalunit import OrganizationalUnits
from lib389.idm.group import Groups, Group
Expand Down Expand Up @@ -356,6 +358,92 @@ def test_ri_and_mep_cache_corruption(topology_st):
# Success
log.info("Test PASSED")

def test_revert_cache(topology_st, request):
"""Tests that reversion of the entry cache does not occur
during 'normal' failure (like schema violation) but only
when a plugin fails

:id: a2361285-b939-4da0-aa80-7fc54d12c981

:setup: Standalone instance

:steps:
1. Create a user account (with a homeDirectory)
2. Remove the error log file
3. Add a second value of homeDirectory
4. Check that error log does not contain entry cache reversion
5. Configure memberof to trigger a failure
6. Do a update the will fail in memberof plugin
7. Check that error log does contain entry cache reversion

:expectedresults:
1. Succeeds
2. Fails with OBJECT_CLASS_VIOLATION
3. Succeeds
4. Succeeds
5. Succeeds
6. Fails with OBJECT_CLASS_VIOLATION
7. Succeeds
"""
# Create an test user entry
log.info('Create a user without nsMemberOF objectclass')
try:
users = UserAccounts(topology_st.standalone, DEFAULT_SUFFIX, rdn='ou=people')
user = users.create_test_user()
user.replace('objectclass', ['top', 'account', 'person', 'inetorgperson', 'posixAccount', 'organizationalPerson', 'nsAccount'])
except ldap.LDAPError as e:
log.fatal('Failed to create test user: error ' + e.args[0]['desc'])
assert False

# Remove the current error log file
topology_st.standalone.stop()
lpath = topology_st.standalone.ds_error_log._get_log_path()
os.unlink(lpath)
topology_st.standalone.start()

#
# Add a second value to 'homeDirectory'
# leads to ldap.OBJECT_CLASS_VIOLATION
# And check that the entry cache was not reverted
try:
topology_st.standalone.modify_s(user.dn, [(ldap.MOD_ADD, 'homeDirectory', ensure_bytes('/home/user_new'))])
assert False
except ldap.OBJECT_CLASS_VIOLATION:
pass
assert not topology_st.standalone.ds_error_log.match('.*WARN - flush_hash - Upon BETXN callback failure, entry cache is flushed during.*')

# Prepare memberof so that it will fail during a next update
# If memberof plugin can not add 'memberof' to the
# member entry, it retries after adding
# 'memberOfAutoAddOC' objectclass to the member.
# If it fails again the plugin fails with 'object
# violation'
# To trigger this failure, set 'memberOfAutoAddOC'
# to a value that does *not* allow 'memberof' attribute
memberof = MemberOfPlugin(topology_st.standalone)
memberof.enable()
memberof.replace('memberOfAutoAddOC', 'account')
memberof.replace('memberofentryscope', DEFAULT_SUFFIX)
topology_st.standalone.restart()

# Try to add the user to demo_group
# It should fail because user entry has not 'nsmemberof' objectclass
# As this is a BETXN plugin that fails it should revert the entry cache
try:
GROUP_DN = "cn=demo_group,ou=groups," + DEFAULT_SUFFIX
topology_st.standalone.modify_s(GROUP_DN,
[(ldap.MOD_REPLACE, 'member', ensure_bytes(user.dn))])
assert False
except ldap.OBJECT_CLASS_VIOLATION:
pass
assert topology_st.standalone.ds_error_log.match('.*WARN - flush_hash - Upon BETXN callback failure, entry cache is flushed during.*')

def fin():
user.delete()
memberof = MemberOfPlugin(topology_st.standalone)
memberof.replace('memberOfAutoAddOC', 'nsmemberof')

request.addfinalizer(fin)

if __name__ == '__main__':
# Run isolated
Expand Down
10 changes: 10 additions & 0 deletions ldap/servers/slapd/back-ldbm/cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@
*/
slapi_log_err(SLAPI_LOG_FATAL, "dbgec_test_if_entry_pointer_is_valid", "cache.c[%d]: Wrong entry address: %p Previous entry address is: %p hash table slot is %d\n", line, e, prev, slot);
slapi_log_backtrace(SLAPI_LOG_FATAL);
*(char*)23 = 1; /* abort() somehow corrupt gdb stack backtrace so lets generate a SIGSEGV */

Check warning on line 536 in ldap/servers/slapd/back-ldbm/cache.c

View workflow job for this annotation

GitHub Actions / compile (GCC)

writing 1 byte into a region of size 0 [-Wstringop-overflow=]

Check warning on line 536 in ldap/servers/slapd/back-ldbm/cache.c

View workflow job for this annotation

GitHub Actions / compile (GCC)

writing 1 byte into a region of size 0 [-Wstringop-overflow=]

Check warning on line 536 in ldap/servers/slapd/back-ldbm/cache.c

View workflow job for this annotation

GitHub Actions / compile (GCC)

writing 1 byte into a region of size 0 [-Wstringop-overflow=]

Check warning on line 536 in ldap/servers/slapd/back-ldbm/cache.c

View workflow job for this annotation

GitHub Actions / compile (GCC)

writing 1 byte into a region of size 0 [-Wstringop-overflow=]

Check warning on line 536 in ldap/servers/slapd/back-ldbm/cache.c

View workflow job for this annotation

GitHub Actions / compile (GCC Static Analyzer)

writing 1 byte into a region of size 0 [-Wstringop-overflow=]

Check warning on line 536 in ldap/servers/slapd/back-ldbm/cache.c

View workflow job for this annotation

GitHub Actions / compile (GCC Static Analyzer)

writing 1 byte into a region of size 0 [-Wstringop-overflow=]

Check warning on line 536 in ldap/servers/slapd/back-ldbm/cache.c

View workflow job for this annotation

GitHub Actions / compile (GCC Static Analyzer)

writing 1 byte into a region of size 0 [-Wstringop-overflow=]

Check warning on line 536 in ldap/servers/slapd/back-ldbm/cache.c

View workflow job for this annotation

GitHub Actions / compile (GCC Static Analyzer)

writing 1 byte into a region of size 0 [-Wstringop-overflow=]
abort();
}
}
Expand All @@ -556,7 +556,12 @@
{
Hashtable *ht = cache->c_idtable; /* start with the ID table as it's in both ENTRY and DN caches */
void *e, *laste = NULL;
char flush_etime[ETIME_BUFSIZ] = {0};
struct timespec duration;
struct timespec flush_start;
struct timespec flush_end;

clock_gettime(CLOCK_MONOTONIC, &flush_start);
cache_lock(cache);

for (size_t i = 0; i < ht->size; i++) {
Expand Down Expand Up @@ -638,6 +643,11 @@
}

cache_unlock(cache);

clock_gettime(CLOCK_MONOTONIC, &flush_end);
slapi_timespec_diff(&flush_end, &flush_start, &duration);
snprintf(flush_etime, ETIME_BUFSIZ, "%" PRId64 ".%.09" PRId64 "", (int64_t)duration.tv_sec, (int64_t)duration.tv_nsec);
slapi_log_err(SLAPI_LOG_WARNING, "flush_hash", "Upon BETXN callback failure, entry cache is flushed during %s\n", flush_etime);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not know how frequent this message is displayed in a real deployment
and I wonder if WARNING is not a bit too frightening (Maybe INFO would be better )

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had the same concern. Failure of a plugin is not a big deal but revert_cache is so slow (especially with large entry cache) and almost freeze the server (all requests accessing to the entry cache are stuck) that I was thinking the first thing that an admin would do is to check the error log. Making it a warning may match what LDAP clients reports.

}

void
Expand Down
16 changes: 15 additions & 1 deletion ldap/servers/slapd/back-ldbm/ldbm_add.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ ldbm_back_add(Slapi_PBlock *pb)
int op_id;
int result_sent = 0;
int32_t parent_op = 0;
int32_t betxn_callback_fails = 0; /* if a BETXN fails we need to revert entry cache */
struct timespec parent_time;

if (slapi_pblock_get(pb, SLAPI_CONN_ID, &conn_id) < 0) {
Expand Down Expand Up @@ -919,6 +920,9 @@ ldbm_back_add(Slapi_PBlock *pb)
slapi_pblock_get(pb, SLAPI_PB_RESULT_TEXT, &ldap_result_message);
slapi_log_err(SLAPI_LOG_DEBUG, "ldbm_back_add", "SLAPI_PLUGIN_BE_TXN_PRE_ADD_FN plugin failed: %d\n",
ldap_result_code ? ldap_result_code : retval);
if (retval) {
betxn_callback_fails = 1;
}
goto error_return;
}
}
Expand Down Expand Up @@ -1229,11 +1233,15 @@ ldbm_back_add(Slapi_PBlock *pb)
slapi_pblock_set(pb, SLAPI_PLUGIN_OPRETURN, ldap_result_code ? &ldap_result_code : &retval);
}
slapi_pblock_get(pb, SLAPI_PB_RESULT_TEXT, &ldap_result_message);
if (retval) {
betxn_callback_fails = 1;
}
goto error_return;
}

retval = plugin_call_mmr_plugin_postop(pb, NULL,SLAPI_PLUGIN_BE_TXN_POST_ADD_FN);
if (retval) {
betxn_callback_fails = 1;
ldbm_set_error(pb, retval, &ldap_result_code, &ldap_result_message);
goto error_return;
}
Expand Down Expand Up @@ -1326,9 +1334,15 @@ ldbm_back_add(Slapi_PBlock *pb)
slapi_pblock_set(pb, SLAPI_PLUGIN_OPRETURN, &opreturn);
}
slapi_pblock_get(pb, SLAPI_PB_RESULT_TEXT, &ldap_result_message);
if (retval) {
betxn_callback_fails = 1;
}
}
/* the repl postop needs to be called for aborted operations */
retval = plugin_call_mmr_plugin_postop(pb, NULL,SLAPI_PLUGIN_BE_TXN_POST_ADD_FN);
if (retval) {
betxn_callback_fails = 1;
}
if (addingentry) {
if (inst && cache_is_in_cache(&inst->inst_cache, addingentry)) {
CACHE_REMOVE(&inst->inst_cache, addingentry);
Expand Down Expand Up @@ -1366,7 +1380,7 @@ ldbm_back_add(Slapi_PBlock *pb)
}

/* Revert the caches if this is the parent operation */
if (parent_op) {
if (parent_op && betxn_callback_fails) {
revert_cache(inst, &parent_time);
}
}
Expand Down
18 changes: 17 additions & 1 deletion ldap/servers/slapd/back-ldbm/ldbm_delete.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ ldbm_back_delete(Slapi_PBlock *pb)
int result_sent = 0;
Connection *pb_conn;
int32_t parent_op = 0;
int32_t betxn_callback_fails = 0; /* if a BETXN fails we need to revert entry cache */
struct timespec parent_time;

if (slapi_pblock_get(pb, SLAPI_CONN_ID, &conn_id) < 0) {
Expand Down Expand Up @@ -434,6 +435,9 @@ ldbm_back_delete(Slapi_PBlock *pb)
&ldap_result_code : &retval );
}
slapi_pblock_get(pb, SLAPI_PB_RESULT_TEXT, &ldap_result_message);
if (retval) {
betxn_callback_fails = 1;
}
goto error_return;
}
}
Expand Down Expand Up @@ -717,6 +721,9 @@ ldbm_back_delete(Slapi_PBlock *pb)
ldap_result_code ? &ldap_result_code : &retval);
}
slapi_pblock_get(pb, SLAPI_PB_RESULT_TEXT, &ldap_result_message);
if (retval) {
betxn_callback_fails = 1;
}
goto error_return;
}
}
Expand Down Expand Up @@ -746,6 +753,9 @@ ldbm_back_delete(Slapi_PBlock *pb)
}
/* retval is -1 */
slapi_pblock_get(pb, SLAPI_PB_RESULT_TEXT, &ldap_result_message);
if (rc) {
betxn_callback_fails = 1;
}
goto error_return;
}
slapi_pblock_set(pb, SLAPI_DELETE_BEPREOP_ENTRY, orig_entry);
Expand Down Expand Up @@ -1266,6 +1276,7 @@ ldbm_back_delete(Slapi_PBlock *pb)
slapi_pblock_set(pb, SLAPI_PLUGIN_OPRETURN, &retval);
}
slapi_pblock_get(pb, SLAPI_PB_RESULT_TEXT, &ldap_result_message);
betxn_callback_fails = 1;
goto error_return;
}
if (parent_found) {
Expand All @@ -1281,6 +1292,7 @@ ldbm_back_delete(Slapi_PBlock *pb)

retval = plugin_call_mmr_plugin_postop(pb, NULL,SLAPI_PLUGIN_BE_TXN_POST_DELETE_FN);
if (retval) {
betxn_callback_fails = 1;
ldbm_set_error(pb, retval, &ldap_result_code, &ldap_result_message);
goto error_return;
}
Expand Down Expand Up @@ -1416,8 +1428,12 @@ ldbm_back_delete(Slapi_PBlock *pb)
slapi_pblock_set(pb, SLAPI_PLUGIN_OPRETURN, ldap_result_code ? &ldap_result_code : &retval);
}
slapi_pblock_get(pb, SLAPI_PB_RESULT_TEXT, &ldap_result_message);
betxn_callback_fails = 1;
}
retval = plugin_call_mmr_plugin_postop(pb, NULL,SLAPI_PLUGIN_BE_TXN_POST_DELETE_FN);
if (retval) {
betxn_callback_fails = 1;
}

/* Release SERIAL LOCK */
dblayer_txn_abort(be, &txn); /* abort crashes in case disk full */
Expand All @@ -1437,7 +1453,7 @@ ldbm_back_delete(Slapi_PBlock *pb)
}

/* Revert the caches if this is the parent operation */
if (parent_op) {
if (parent_op && betxn_callback_fails) {
revert_cache(inst, &parent_time);
}

Expand Down
12 changes: 11 additions & 1 deletion ldap/servers/slapd/back-ldbm/ldbm_modify.c
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,7 @@ ldbm_back_modify(Slapi_PBlock *pb)
int ec_locked = 0;
int result_sent = 0;
int32_t parent_op = 0;
int32_t betxn_callback_fails = 0; /* if a BETXN fails we need to revert entry cache */
struct timespec parent_time;
Slapi_Mods *smods_add_rdn = NULL;

Expand Down Expand Up @@ -817,6 +818,9 @@ ldbm_back_modify(Slapi_PBlock *pb)
slapi_pblock_set(pb, SLAPI_PLUGIN_OPRETURN, ldap_result_code ? &ldap_result_code : &retval);
}
slapi_pblock_get(pb, SLAPI_PB_RESULT_TEXT, &ldap_result_message);
if (retval) {
betxn_callback_fails = 1;
}
goto error_return;
}

Expand Down Expand Up @@ -1017,10 +1021,12 @@ ldbm_back_modify(Slapi_PBlock *pb)
slapi_pblock_set(pb, SLAPI_PLUGIN_OPRETURN, ldap_result_code ? &ldap_result_code : &retval);
}
slapi_pblock_get(pb, SLAPI_PB_RESULT_TEXT, &ldap_result_message);
betxn_callback_fails = 1;
goto error_return;
}
retval = plugin_call_mmr_plugin_postop(pb, NULL,SLAPI_PLUGIN_BE_TXN_POST_MODIFY_FN);
if (retval) {
betxn_callback_fails = 1;
ldbm_set_error(pb, retval, &ldap_result_code, &ldap_result_message);
goto error_return;
}
Expand Down Expand Up @@ -1083,8 +1089,12 @@ ldbm_back_modify(Slapi_PBlock *pb)
if (!opreturn) {
slapi_pblock_set(pb, SLAPI_PLUGIN_OPRETURN, ldap_result_code ? &ldap_result_code : &retval);
}
betxn_callback_fails = 1;
}
retval = plugin_call_mmr_plugin_postop(pb, NULL,SLAPI_PLUGIN_BE_TXN_POST_MODIFY_FN);
if (retval) {
betxn_callback_fails = 1;
}

/* It is safer not to abort when the transaction is not started. */
/* Release SERIAL LOCK */
Expand All @@ -1096,7 +1106,7 @@ ldbm_back_modify(Slapi_PBlock *pb)
rc = SLAPI_FAIL_GENERAL;
}
/* Revert the caches if this is the parent operation */
if (parent_op) {
if (parent_op && betxn_callback_fails) {
revert_cache(inst, &parent_time);
}
}
Expand Down
12 changes: 10 additions & 2 deletions ldap/servers/slapd/back-ldbm/ldbm_modrdn.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ ldbm_back_modrdn(Slapi_PBlock *pb)
int result_sent = 0;
Connection *pb_conn = NULL;
int32_t parent_op = 0;
int32_t betxn_callback_fails = 0; /* if a BETXN fails we need to revert entry cache */
struct timespec parent_time;
Slapi_Mods *smods_add_rdn = NULL;

Expand Down Expand Up @@ -999,6 +1000,9 @@ ldbm_back_modrdn(Slapi_PBlock *pb)
slapi_pblock_set(pb, SLAPI_PLUGIN_OPRETURN, ldap_result_code ? &ldap_result_code : &retval);
}
slapi_pblock_get(pb, SLAPI_PB_RESULT_TEXT, &ldap_result_message);
if (retval) {
betxn_callback_fails = 1;
}
goto error_return;
}

Expand Down Expand Up @@ -1250,10 +1254,12 @@ ldbm_back_modrdn(Slapi_PBlock *pb)
slapi_pblock_set(pb, SLAPI_PLUGIN_OPRETURN, ldap_result_code ? &ldap_result_code : &retval);
}
slapi_pblock_get(pb, SLAPI_PB_RESULT_TEXT, &ldap_result_message);
betxn_callback_fails = 1;
goto error_return;
}
retval = plugin_call_mmr_plugin_postop(pb, NULL,SLAPI_PLUGIN_BE_TXN_POST_MODRDN_FN);
if (retval) {
betxn_callback_fails = 1;
ldbm_set_error(pb, retval, &ldap_result_code, &ldap_result_message);
goto error_return;
}
Expand Down Expand Up @@ -1319,7 +1325,7 @@ ldbm_back_modrdn(Slapi_PBlock *pb)

error_return:
/* Revert the caches if this is the parent operation */
if (parent_op) {
if (parent_op && betxn_callback_fails) {
revert_cache(inst, &parent_time);
}

Expand Down Expand Up @@ -1401,7 +1407,9 @@ ldbm_back_modrdn(Slapi_PBlock *pb)
}
slapi_pblock_get(pb, SLAPI_PB_RESULT_TEXT, &ldap_result_message);

/* Revert the caches if this is the parent operation */
/* As it is a BETXN plugin failure then
* revert the caches if this is the parent operation
*/
if (parent_op) {
revert_cache(inst, &parent_time);
}
Expand Down
Loading