Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Resolve the inconsistency between member configuration and log index in snapshot meta #386

2 changes: 1 addition & 1 deletion build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ do
--)
shift
break
;;
;;
esac
shift
done
Expand Down
2 changes: 1 addition & 1 deletion cmake/braft.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ ExternalProject_Add(
DEPENDS brpc
# The pr on braft is not merged, so I am using my own warehouse to run the test for the time being
GIT_REPOSITORY "https://github.com/pikiwidb/braft.git"
GIT_TAG v1.1.2-alpha2
GIT_TAG v1.1.2.1
GIT_SHALLOW true
CMAKE_ARGS
-DCMAKE_BUILD_TYPE=${LIB_BUILD_TYPE}
Expand Down
2 changes: 1 addition & 1 deletion save_load.sh
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ redis-benchmark -p 7777 -c 5 -n 10000 -r 10000 -d 1024 -t hset
redis-cli -p 7777 raft.node dosnapshot
redis-benchmark -p 7777 -c 5 -n 10000 -r 10000 -d 1024 -t hset

redis-cli -p 8888 raft.cluster join 127.0.0.1:7777
redis-cli -p 8888 raft.cluster join 127.0.0.1:7777
4 changes: 2 additions & 2 deletions src/pikiwidb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ static int InitLimit() {
limit.rlim_cur = maxfiles;
limit.rlim_max = maxfiles;
if (setrlimit(RLIMIT_NOFILE, &limit) != -1) {
WARN("your 'limit -n ' of {} is not enough for PikiwiDB to start. PikiwiDB have successfully reconfig it to ",
WARN("your 'limit -n ' of {} is not enough for PikiwiDB to start. PikiwiDB have successfully reconfig it to {}",
old_limit, limit.rlim_cur);
} else {
ERROR(
Expand Down Expand Up @@ -282,7 +282,7 @@ int main(int ac, char* av[]) {
daemonize();
}

InitLimit();
// InitLimit(); // Code problem, program startup direct segment error
pstd::InitRandom();
SignalSetup();
InitLogs();
Expand Down
13 changes: 12 additions & 1 deletion src/praft/praft.cc
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,16 @@ storage::LogIndex PRaft::GetLastLogIndex(bool is_flush) {
return node_->get_last_log_index(is_flush);
}

void PRaft::GetConfigurationByIndex(const int64_t index, braft::ConfigurationEntry* conf,
braft::ConfigurationEntry* learner_conf) {
if (!node_) {
ERROR("Node is not initialized");
return;
}

node_->get_configuration(index, conf, learner_conf);
}

void PRaft::SendNodeRequest(PClient* client) {
assert(client);

Expand Down Expand Up @@ -663,8 +673,9 @@ int PRaft::on_snapshot_load(braft::SnapshotReader* reader) {
2. When a node is improperly shut down and restarted, the minimum flush-index should
be obtained as the starting point for fault recovery.
*/
// replay from <replay_point + 1>
uint64_t replay_point = PSTORE.GetBackend(db_id_)->GetStorage()->GetSmallestFlushedLogIndex();
node_->set_self_playback_point(replay_point);
node_->set_last_applied_index_and_term(replay_point);
is_node_first_start_up_ = false;
INFO("set replay_point: {}", replay_point);

Expand Down
3 changes: 3 additions & 0 deletions src/praft/praft.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <tuple>
#include <vector>

#include "braft/configuration_manager.h"
#include "braft/file_system_adaptor.h"
#include "braft/raft.h"
#include "brpc/server.h"
Expand Down Expand Up @@ -140,6 +141,8 @@ class PRaft : public braft::StateMachine {
butil::Status GetListPeers(std::vector<braft::PeerId>* peers);
storage::LogIndex GetTerm(uint64_t log_index);
storage::LogIndex GetLastLogIndex(bool is_flush = false);
void GetConfigurationByIndex(const int64_t index, braft::ConfigurationEntry* conf,
braft::ConfigurationEntry* learner_conf);

bool IsInitialized() const { return node_ != nullptr && server_ != nullptr; }

Expand Down
19 changes: 18 additions & 1 deletion src/praft/psnapshot.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

#include "psnapshot.h"

#include "braft/configuration.h"
#include "braft/configuration_manager.h"
#include "braft/local_file_meta.pb.h"
#include "braft/snapshot.h"
#include "butil/files/file_path.h"
Expand Down Expand Up @@ -79,12 +81,27 @@ braft::FileAdaptor* PPosixFileSystemAdaptor::open(const std::string& path, int o
PSTORE.HandleTaskSpecificDB(tasks);
AddAllFiles(snapshot_path, &snapshot_meta_memtable, snapshot_path);

// update snapshot last log index and last_log_term
// update snapshot last log index, last_log_term, conf of last log index and learners
auto& new_meta = const_cast<braft::SnapshotMeta&>(snapshot_meta_memtable.meta());
auto last_log_index = PSTORE.GetBackend(db_id)->GetStorage()->GetSmallestFlushedLogIndex();
new_meta.set_last_included_index(last_log_index);
auto last_log_term = PRAFT.GetTerm(last_log_index);
new_meta.set_last_included_term(last_log_term);
braft::ConfigurationEntry conf_entry;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

在github看着好像代码没对齐

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

braft::ConfigurationEntry learner_conf_entry;
PRAFT.GetConfigurationByIndex(last_log_index, &conf_entry, &learner_conf_entry);
new_meta.clear_peers();
for (auto iter = conf_entry.conf.begin(); iter != conf_entry.conf.end(); ++iter) {
*new_meta.add_peers() = iter->to_string();
}
new_meta.clear_old_peers();
for (auto iter = conf_entry.old_conf.begin(); iter != conf_entry.old_conf.end(); ++iter) {
*new_meta.add_old_peers() = iter->to_string();
}
new_meta.clear_learners();
for (auto iter = learner_conf_entry.conf.begin(); iter != learner_conf_entry.conf.end(); ++iter) {
*new_meta.add_learners() = iter->to_string();
}
Comment on lines +84 to +104
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

优化快照元数据更新逻辑。

代码中使用了 const_cast 来修改 snapshot_meta_memtable.meta(),这可能导致未定义的行为,建议寻找替代方法来处理不可变对象的修改。
此外,更新快照元数据的逻辑看起来正确,但建议添加更多的错误处理和日志记录,以便在更新过程中捕获和诊断可能的问题。

INFO("Succeed to fix db_{} snapshot meta: {}, {}", db_id, last_log_index, last_log_term);

auto rc = snapshot_meta_memtable.save_to_file(fs, meta_path);
Expand Down
Loading