Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix readonly panic when write abort height #101

Merged
merged 1 commit into from
Mar 27, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 35 additions & 31 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,22 +178,26 @@ fn main() {
// raft task handle
let mut opt_raft_task: Option<JoinHandle<()>> = None;

// used to delay abort raft task
// None means I'm validator no need to abort
// Some means I'm not validator, and the height is when I should abort
// but we need to delay abort, because the raft task may not complete
// to fix restart node during delay abort, we should presist abort height
// and reload abort height when restart
// create raft data path if not exists
std::fs::create_dir_all(&config.raft_data_path)
.expect("create raft data path failed");
let abort_height_path = Path::new(&config.raft_data_path).join("abort_height");
let mut opt_abort_height = if abort_height_path.exists() {
let height = std::fs::read_to_string(&abort_height_path)
// abort hight is which height node from validator to non-validator
let mut abort_height = if abort_height_path.exists() {
std::fs::read_to_string(&abort_height_path)
.expect("read abort height failed")
.parse::<u64>()
.expect("parse abort height failed");
Some(height)
.expect("parse abort height failed")
} else {
None
0
};

// presist abort height to avoid write file frequently
// we only write file when abort height changed
let mut presist_abort_height = abort_height;

loop {
tokio::time::sleep(Duration::from_secs(3)).await;

Expand Down Expand Up @@ -234,12 +238,25 @@ fn main() {
logger,
"get reconfigure from controller, height is {}", trigger_config.height
);
let is_validator = trigger_config.validators.contains(&node_addr);
if is_validator {
// if we delete this node from validator list at height 100
// finally abort height shoule be 99
// raft start remove node at height 100
// we need abort raft rask at height 101 to let it complete remove node
// so we need add 2 to abort height
abort_height = trigger_config.height + 2;
} else {
info!(logger, "I'm not in the validators list");
if abort_height > presist_abort_height {
std::fs::write(&abort_height_path, abort_height.to_string())
.expect("write abort height failed");
presist_abort_height = abort_height;
}
}

// if node restart during delay abort, we should start raft task at first
if trigger_config.validators.contains(&node_addr)
|| (opt_abort_height.is_some()
&& trigger_config.height <= opt_abort_height.unwrap())
{
opt_abort_height = None;
if is_validator || trigger_config.height < abort_height {
if opt_raft_task.is_none()
|| opt_raft_task.as_ref().unwrap().is_finished()
{
Expand All @@ -265,23 +282,10 @@ fn main() {
});
opt_raft_task = Some(handle);
}
} else {
info!(logger, "I'm not in the validators list");
if opt_abort_height.is_none() {
std::fs::write(
&abort_height_path,
trigger_config.height.to_string(),
)
.expect("write abort height failed");
opt_abort_height = Some(trigger_config.height);
}
if let Some(ref handle) = opt_raft_task {
if !handle.is_finished()
&& trigger_config.height > opt_abort_height.unwrap()
{
info!(logger, "abort raft");
handle.abort();
}
} else if let Some(ref handle) = opt_raft_task {
if !handle.is_finished() {
info!(logger, "abort raft");
handle.abort();
}
}
}
Expand Down
Loading