From 49e5a7fe93de5ece695b561989434e3d91271af1 Mon Sep 17 00:00:00 2001 From: Sankar <1890648+srgsanky@users.noreply.github.com> Date: Tue, 7 May 2024 18:22:28 -0700 Subject: [PATCH 1/8] Make cluster meet reliable under link failures When there is a link failure while an ongoing MEET request is sent the sending node stops sending anymore MEET and starts sending PINGs. Since every node responds to PINGs from unknown nodes with a PONG, the receiving node never adds the sending node. But the sending node adds the receiving node when it sees a PONG. This can lead to asymmetry in cluster membership. This changes makes the sender keep sending MEET until it sees a PONG, avoiding the asymmetry. Signed-off-by: Sankar <1890648+srgsanky@users.noreply.github.com> --- src/cluster_legacy.c | 32 +++++++++--- src/debug.c | 6 +++ src/server.h | 2 + tests/cluster/tests/30-reliable-meet.tcl | 54 +++++++++++++++++++++ tests/cluster/tests/includes/init-tests.tcl | 21 ++++---- 5 files changed, 98 insertions(+), 17 deletions(-) create mode 100644 tests/cluster/tests/30-reliable-meet.tcl diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index f1a8d87b74..c6cec01b7b 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -2975,7 +2975,13 @@ int clusterIsValidPacket(clusterLink *link) { int clusterProcessPacket(clusterLink *link) { /* Validate that the packet is well-formed */ - if (!clusterIsValidPacket(link)) return 1; + if (!clusterIsValidPacket(link)) { + if (server.cluster_close_link_on_packet_drop) { + freeClusterLink(link); + return 0; + } + return 1; + } clusterMsg *hdr = (clusterMsg*) link->rcvbuf; uint16_t type = ntohs(hdr->type); @@ -3088,6 +3094,12 @@ int clusterProcessPacket(clusterLink *link) { serverLog(LL_DEBUG,"%s packet received: %.40s", clusterGetMessageTypeString(type), link->node ? link->node->name : "NULL"); + + if (sender && (sender->flags & CLUSTER_NODE_MEET)) { + // Once we get a response for MEET from the sender, we can stop sending more MEET + sender->flags &= ~CLUSTER_NODE_MEET; + } + if (!link->inbound) { if (nodeInHandshake(link->node)) { /* If we already have this node, try to change the @@ -3613,13 +3625,17 @@ void clusterLinkConnectHandler(connection *conn) { * replaced by the clusterSendPing() call. */ node->ping_sent = old_ping_sent; } - /* We can clear the flag after the first packet is sent. - * If we'll never receive a PONG, we'll never send new packets - * to this node. Instead after the PONG is received and we - * are no longer in meet/handshake status, we want to send - * normal PING packets. */ - node->flags &= ~CLUSTER_NODE_MEET; - + /* NOTE: We cannot clear the MEET flag from the node until we get a response + * from the other node. If the MEET packet is not accepted by the other node + * due to link failure, we want to continue sending MEET. If we don't + * continue sending MEET, this node will know about the other node, but the + * other node will never add this node. Every node always responds to PINGs + * from unknown nodes with a PONG, so this node will know about the other + * node and continue sending PINGs. But the other node won't add this node + * until it sees a MEET (or it gets to know about this node from a trusted + * third node). In this case, clearing the MEET flag here leads to asymmetry + * in the cluster membership. + */ serverLog(LL_DEBUG,"Connecting with Node %.40s at %s:%d", node->name, node->ip, node->cport); } diff --git a/src/debug.c b/src/debug.c index 5327a231ac..21ff9a064a 100644 --- a/src/debug.c +++ b/src/debug.c @@ -434,6 +434,9 @@ void debugCommand(client *c) { " Show low level info about `key` and associated value.", "DROP-CLUSTER-PACKET-FILTER ", " Drop all packets that match the filtered type. Set to -1 allow all packets.", +"CLOSE-CLUSTER-LINK-ON-PACKET-DROP <0|1>", +" This is valid only when DROP-CLUSTER-PACKET-FILTER is set to a valid packet type." +" When set to 1, the cluster link is closed after dropping a packet based on the filter." "OOM", " Crash the server simulating an out-of-memory error.", "PANIC", @@ -603,6 +606,9 @@ NULL return; server.cluster_drop_packet_filter = packet_type; addReply(c,shared.ok); + } else if (!strcasecmp(c->argv[1]->ptr, "close-cluster-link-on-packet-drop") && c->argc == 3) { + server.cluster_close_link_on_packet_drop = atoi(c->argv[2]->ptr); + addReply(c,shared.ok); } else if (!strcasecmp(c->argv[1]->ptr,"object") && c->argc == 3) { dictEntry *de; robj *val; diff --git a/src/server.h b/src/server.h index 98ac30b7d6..b0cecd2cf7 100644 --- a/src/server.h +++ b/src/server.h @@ -2027,6 +2027,8 @@ struct valkeyServer { unsigned long long cluster_link_msg_queue_limit_bytes; /* Memory usage limit on individual link msg queue */ int cluster_drop_packet_filter; /* Debug config that allows tactically * dropping packets of a specific type */ + int cluster_close_link_on_packet_drop; /* Debug config that goes along with cluster_drop_packet_filter. + When set, the link is closed on packet drop. */ /* Scripting */ mstime_t busy_reply_threshold; /* Script / module timeout in milliseconds */ int pre_command_oom_state; /* OOM before command (script?) was started */ diff --git a/tests/cluster/tests/30-reliable-meet.tcl b/tests/cluster/tests/30-reliable-meet.tcl new file mode 100644 index 0000000000..84fde8b4e6 --- /dev/null +++ b/tests/cluster/tests/30-reliable-meet.tcl @@ -0,0 +1,54 @@ +set do_not_join_cluster_nodes 1 + +source "../tests/includes/init-tests.tcl" + +# Create a cluster composed of the specified number of primaries. +proc create_primaries_only_cluster {primaries} { + cluster_allocate_slots $primaries + set ::cluster_master_nodes $primaries +} + +set CLUSTER_PACKET_TYPE_MEET 2 +set CLUSTER_PACKET_TYPE_NONE -1 + +set a 0 +set b 1 + +test "Cluster nodes haven't met each other" { + assert {[llength [get_cluster_nodes $a]] == 1} + assert {[llength [get_cluster_nodes $b]] == 1} +} + +test "Create a 2 nodes cluster with 2 shards" { + create_primaries_only_cluster 2 +} + +test "MEET is reliabile when target drops the initial MEETs" { + set b_port [get_instance_attrib valkey $b port] + + # Make b drop the initial MEET messages due to link failure + R $b DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_MEET + R $b DEBUG CLOSE-CLUSTER-LINK-ON-PACKET-DROP 1 + + R $a CLUSTER MEET 127.0.0.1 $b_port + + wait_for_condition 1000 50 { + [CI $b cluster_stats_messages_meet_received] >= 3 + } else { + fail "Cluster node $a never sent multiple MEETs to $b" + } + + # Make sure the nodes still don't know about each other + assert {[llength [get_cluster_nodes $a connected]] == 1} + assert {[llength [get_cluster_nodes $b connected]] == 1} + + R $b DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_NONE + + # If the MEET is reliable, both a and b will turn to cluster state ok + wait_for_condition 1000 50 { + [CI $a cluster_state] eq {ok} && [CI $b cluster_state] eq {ok} + } else { + fail "$a cluster_state:[CI $a cluster_state], $b cluster_state: [CI $b cluster_state]" + } +} + diff --git a/tests/cluster/tests/includes/init-tests.tcl b/tests/cluster/tests/includes/init-tests.tcl index f1cb3a8b6b..d01258f6ce 100644 --- a/tests/cluster/tests/includes/init-tests.tcl +++ b/tests/cluster/tests/includes/init-tests.tcl @@ -44,6 +44,7 @@ test "Cluster nodes hard reset" { R $id config set repl-diskless-load disabled R $id config set cluster-announce-hostname "" R $id DEBUG DROP-CLUSTER-PACKET-FILTER -1 + R $id DEBUG CLOSE-CLUSTER-LINK-ON-PACKET-DROP 0 R $id config rewrite } } @@ -73,16 +74,18 @@ proc join_nodes_in_cluster {} { return 1 } -test "Cluster Join and auto-discovery test" { - # Use multiple attempts since sometimes nodes timeout - # while attempting to connect. - for {set attempts 3} {$attempts > 0} {incr attempts -1} { - if {[join_nodes_in_cluster] == 1} { - break +if {![info exists do_not_join_cluster_nodes] || !$do_not_join_cluster_nodes} { + test "Cluster Join and auto-discovery test" { + # Use multiple attempts since sometimes nodes timeout + # while attempting to connect. + for {set attempts 3} {$attempts > 0} {incr attempts -1} { + if {[join_nodes_in_cluster] == 1} { + break + } + } + if {$attempts == 0} { + fail "Cluster failed to form full mesh" } - } - if {$attempts == 0} { - fail "Cluster failed to form full mesh" } } From ec68829a393ac8f73bdb6b9749f989025f923bb8 Mon Sep 17 00:00:00 2001 From: Sankar <1890648+srgsanky@users.noreply.github.com> Date: Sun, 19 May 2024 13:41:04 -0700 Subject: [PATCH 2/8] Moved test under unit and addressed other comments Signed-off-by: Sankar <1890648+srgsanky@users.noreply.github.com> --- src/cluster_legacy.c | 2 +- tests/cluster/tests/includes/init-tests.tcl | 20 ++++---- .../cluster/cluster-reliable-meet.tcl} | 51 ++++++++++++++----- 3 files changed, 48 insertions(+), 25 deletions(-) rename tests/{cluster/tests/30-reliable-meet.tcl => unit/cluster/cluster-reliable-meet.tcl} (51%) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 8f801f4751..cea1c03fe7 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -3096,7 +3096,7 @@ int clusterProcessPacket(clusterLink *link) { link->node ? link->node->name : "NULL"); if (sender && (sender->flags & CLUSTER_NODE_MEET)) { - // Once we get a response for MEET from the sender, we can stop sending more MEET + /* Once we get a response for MEET from the sender, we can stop sending more MEET. */ sender->flags &= ~CLUSTER_NODE_MEET; } diff --git a/tests/cluster/tests/includes/init-tests.tcl b/tests/cluster/tests/includes/init-tests.tcl index d01258f6ce..41fc49aa6d 100644 --- a/tests/cluster/tests/includes/init-tests.tcl +++ b/tests/cluster/tests/includes/init-tests.tcl @@ -74,19 +74,17 @@ proc join_nodes_in_cluster {} { return 1 } -if {![info exists do_not_join_cluster_nodes] || !$do_not_join_cluster_nodes} { - test "Cluster Join and auto-discovery test" { - # Use multiple attempts since sometimes nodes timeout - # while attempting to connect. - for {set attempts 3} {$attempts > 0} {incr attempts -1} { - if {[join_nodes_in_cluster] == 1} { - break - } - } - if {$attempts == 0} { - fail "Cluster failed to form full mesh" +test "Cluster Join and auto-discovery test" { + # Use multiple attempts since sometimes nodes timeout + # while attempting to connect. + for {set attempts 3} {$attempts > 0} {incr attempts -1} { + if {[join_nodes_in_cluster] == 1} { + break } } + if {$attempts == 0} { + fail "Cluster failed to form full mesh" + } } test "Before slots allocation, all nodes report cluster failure" { diff --git a/tests/cluster/tests/30-reliable-meet.tcl b/tests/unit/cluster/cluster-reliable-meet.tcl similarity index 51% rename from tests/cluster/tests/30-reliable-meet.tcl rename to tests/unit/cluster/cluster-reliable-meet.tcl index 84fde8b4e6..b8e7d5b2a3 100644 --- a/tests/cluster/tests/30-reliable-meet.tcl +++ b/tests/unit/cluster/cluster-reliable-meet.tcl @@ -1,37 +1,55 @@ -set do_not_join_cluster_nodes 1 +# make sure the test infra won't use SELECT +set old_singledb $::singledb +set ::singledb 1 -source "../tests/includes/init-tests.tcl" +tags {tls:skip external:skip cluster} { -# Create a cluster composed of the specified number of primaries. -proc create_primaries_only_cluster {primaries} { - cluster_allocate_slots $primaries - set ::cluster_master_nodes $primaries +set base_conf [list cluster-enabled yes] +start_multiple_servers 2 [list overrides $base_conf] { + +test "Cluster nodes are reachable" { + for {set id 0} {$id < [llength $::servers]} {incr id} { + # Every node should be reachable. + wait_for_condition 1000 50 { + ([catch {R $id ping} ping_reply] == 0) && + ($ping_reply eq {PONG}) + } else { + catch {R $id ping} err + fail "Node #$id keeps replying '$err' to PING." + } + } +} + +test "Before slots allocation, all nodes report cluster failure" { + wait_for_cluster_state fail } set CLUSTER_PACKET_TYPE_MEET 2 set CLUSTER_PACKET_TYPE_NONE -1 -set a 0 -set b 1 +set b 0 +set a 1 test "Cluster nodes haven't met each other" { assert {[llength [get_cluster_nodes $a]] == 1} assert {[llength [get_cluster_nodes $b]] == 1} } -test "Create a 2 nodes cluster with 2 shards" { - create_primaries_only_cluster 2 +test "Allocate slots" { + cluster_allocate_slots 2 0 } test "MEET is reliabile when target drops the initial MEETs" { - set b_port [get_instance_attrib valkey $b port] - # Make b drop the initial MEET messages due to link failure R $b DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_MEET R $b DEBUG CLOSE-CLUSTER-LINK-ON-PACKET-DROP 1 + set b_port [srv 0 port] + R $a CLUSTER MEET 127.0.0.1 $b_port + # Wait for at least a few MEETs to be sent so that we are sure that b is + # dropping them. wait_for_condition 1000 50 { [CI $b cluster_stats_messages_meet_received] >= 3 } else { @@ -46,9 +64,16 @@ test "MEET is reliabile when target drops the initial MEETs" { # If the MEET is reliable, both a and b will turn to cluster state ok wait_for_condition 1000 50 { - [CI $a cluster_state] eq {ok} && [CI $b cluster_state] eq {ok} + [CI $a cluster_state] eq {ok} && [CI $b cluster_state] eq {ok} && + [CI $b cluster_stats_messages_meet_received] >= 4 && + [CI $a cluster_stats_messages_meet_sent] == [CI $b cluster_stats_messages_meet_received] } else { fail "$a cluster_state:[CI $a cluster_state], $b cluster_state: [CI $b cluster_state]" } } +} ;# stop servers + +} ;# tags + +set ::singledb $old_singledb From a760aa66a615cdda8ff68ca4066ca0b6168466d2 Mon Sep 17 00:00:00 2001 From: Sankar <1890648+srgsanky@users.noreply.github.com> Date: Sun, 19 May 2024 21:20:33 -0700 Subject: [PATCH 3/8] Addressed comments about the unit test Signed-off-by: Sankar <1890648+srgsanky@users.noreply.github.com> --- tests/cluster/tests/includes/init-tests.tcl | 1 - tests/unit/cluster/cluster-reliable-meet.tcl | 120 +++++++++---------- 2 files changed, 56 insertions(+), 65 deletions(-) diff --git a/tests/cluster/tests/includes/init-tests.tcl b/tests/cluster/tests/includes/init-tests.tcl index 41fc49aa6d..f1cb3a8b6b 100644 --- a/tests/cluster/tests/includes/init-tests.tcl +++ b/tests/cluster/tests/includes/init-tests.tcl @@ -44,7 +44,6 @@ test "Cluster nodes hard reset" { R $id config set repl-diskless-load disabled R $id config set cluster-announce-hostname "" R $id DEBUG DROP-CLUSTER-PACKET-FILTER -1 - R $id DEBUG CLOSE-CLUSTER-LINK-ON-PACKET-DROP 0 R $id config rewrite } } diff --git a/tests/unit/cluster/cluster-reliable-meet.tcl b/tests/unit/cluster/cluster-reliable-meet.tcl index b8e7d5b2a3..890e80936e 100644 --- a/tests/unit/cluster/cluster-reliable-meet.tcl +++ b/tests/unit/cluster/cluster-reliable-meet.tcl @@ -3,76 +3,68 @@ set old_singledb $::singledb set ::singledb 1 tags {tls:skip external:skip cluster} { - -set base_conf [list cluster-enabled yes] -start_multiple_servers 2 [list overrides $base_conf] { - -test "Cluster nodes are reachable" { - for {set id 0} {$id < [llength $::servers]} {incr id} { - # Every node should be reachable. - wait_for_condition 1000 50 { - ([catch {R $id ping} ping_reply] == 0) && - ($ping_reply eq {PONG}) - } else { - catch {R $id ping} err - fail "Node #$id keeps replying '$err' to PING." + set base_conf [list cluster-enabled yes] + start_multiple_servers 2 [list overrides $base_conf] { + test "Cluster nodes are reachable" { + for {set id 0} {$id < [llength $::servers]} {incr id} { + # Every node should be reachable. + wait_for_condition 1000 50 { + ([catch {R $id ping} ping_reply] == 0) && + ($ping_reply eq {PONG}) + } else { + catch {R $id ping} err + fail "Node #$id keeps replying '$err' to PING." + } + } } - } -} - -test "Before slots allocation, all nodes report cluster failure" { - wait_for_cluster_state fail -} - -set CLUSTER_PACKET_TYPE_MEET 2 -set CLUSTER_PACKET_TYPE_NONE -1 - -set b 0 -set a 1 - -test "Cluster nodes haven't met each other" { - assert {[llength [get_cluster_nodes $a]] == 1} - assert {[llength [get_cluster_nodes $b]] == 1} -} - -test "Allocate slots" { - cluster_allocate_slots 2 0 -} -test "MEET is reliabile when target drops the initial MEETs" { - # Make b drop the initial MEET messages due to link failure - R $b DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_MEET - R $b DEBUG CLOSE-CLUSTER-LINK-ON-PACKET-DROP 1 - - set b_port [srv 0 port] - - R $a CLUSTER MEET 127.0.0.1 $b_port - - # Wait for at least a few MEETs to be sent so that we are sure that b is - # dropping them. - wait_for_condition 1000 50 { - [CI $b cluster_stats_messages_meet_received] >= 3 - } else { - fail "Cluster node $a never sent multiple MEETs to $b" - } + test "Before slots allocation, all nodes report cluster failure" { + wait_for_cluster_state fail + } - # Make sure the nodes still don't know about each other - assert {[llength [get_cluster_nodes $a connected]] == 1} - assert {[llength [get_cluster_nodes $b connected]] == 1} + set CLUSTER_PACKET_TYPE_MEET 2 + set CLUSTER_PACKET_TYPE_NONE -1 - R $b DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_NONE + test "Cluster nodes haven't met each other" { + assert {[llength [get_cluster_nodes 1]] == 1} + assert {[llength [get_cluster_nodes 0]] == 1} + } - # If the MEET is reliable, both a and b will turn to cluster state ok - wait_for_condition 1000 50 { - [CI $a cluster_state] eq {ok} && [CI $b cluster_state] eq {ok} && - [CI $b cluster_stats_messages_meet_received] >= 4 && - [CI $a cluster_stats_messages_meet_sent] == [CI $b cluster_stats_messages_meet_received] - } else { - fail "$a cluster_state:[CI $a cluster_state], $b cluster_state: [CI $b cluster_state]" - } -} -} ;# stop servers + test "Allocate slots" { + cluster_allocate_slots 2 0 + } + test "MEET is reliabile when target drops the initial MEETs" { + # Make 0 drop the initial MEET messages due to link failure + R 0 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_MEET + R 0 DEBUG CLOSE-CLUSTER-LINK-ON-PACKET-DROP 1 + + R 1 CLUSTER MEET 127.0.0.1 [srv 0 port] + + # Wait for at least a few MEETs to be sent so that we are sure that 0 is + # dropping them. + wait_for_condition 1000 50 { + [CI 0 cluster_stats_messages_meet_received] >= 3 + } else { + fail "Cluster node $a never sent multiple MEETs to $b" + } + + # Make sure the nodes still don't know about each other + assert {[llength [get_cluster_nodes 1 connected]] == 1} + assert {[llength [get_cluster_nodes 0 connected]] == 1} + + R 0 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_NONE + + # If the MEET is reliable, both a and b will turn to cluster state ok + wait_for_condition 1000 50 { + [CI 1 cluster_state] eq {ok} && [CI 0 cluster_state] eq {ok} && + [CI 0 cluster_stats_messages_meet_received] >= 4 && + [CI 1 cluster_stats_messages_meet_sent] == [CI 0 cluster_stats_messages_meet_received] + } else { + fail "1 cluster_state:[CI 1 cluster_state], 0 cluster_state: [CI 0 cluster_state]" + } + } + } ;# stop servers } ;# tags set ::singledb $old_singledb From 0cf724aad138bd166ef96a75faed4a14ef8553bc Mon Sep 17 00:00:00 2001 From: Sankar <1890648+srgsanky@users.noreply.github.com> Date: Mon, 20 May 2024 19:15:43 -0700 Subject: [PATCH 4/8] Addressed comments 1. Reworked code comment 1. Added serverLogs 1. Renamed debug variable 1. Made close link filter to be directly coupled with drop filter Signed-off-by: Sankar <1890648+srgsanky@users.noreply.github.com> --- src/cluster_legacy.c | 29 ++++++++++++-------- src/debug.c | 2 +- src/server.h | 4 +-- tests/unit/cluster/cluster-reliable-meet.tcl | 2 +- 4 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index cea1c03fe7..fb6edaa3b0 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -2976,8 +2976,12 @@ int clusterProcessPacket(clusterLink *link) { /* Validate that the packet is well-formed */ if (!clusterIsValidPacket(link)) { - if (server.cluster_close_link_on_packet_drop) { + clusterMsg *hdr = (clusterMsg*) link->rcvbuf; + uint16_t type = ntohs(hdr->type); + if (server.debug_cluster_close_link_on_packet_drop && + type == server.cluster_drop_packet_filter) { freeClusterLink(link); + serverLog(LL_WARNING, "Closing link for matching packet type %hu", type); return 0; } return 1; @@ -3098,6 +3102,9 @@ int clusterProcessPacket(clusterLink *link) { if (sender && (sender->flags & CLUSTER_NODE_MEET)) { /* Once we get a response for MEET from the sender, we can stop sending more MEET. */ sender->flags &= ~CLUSTER_NODE_MEET; + serverLog(LL_NOTICE, + "Successfully completed handshake with %.40s (%s)", + sender->name, sender->human_nodename); } if (!link->inbound) { @@ -3625,16 +3632,16 @@ void clusterLinkConnectHandler(connection *conn) { * replaced by the clusterSendPing() call. */ node->ping_sent = old_ping_sent; } - /* NOTE: We cannot clear the MEET flag from the node until we get a response - * from the other node. If the MEET packet is not accepted by the other node - * due to link failure, we want to continue sending MEET. If we don't - * continue sending MEET, this node will know about the other node, but the - * other node will never add this node. Every node always responds to PINGs - * from unknown nodes with a PONG, so this node will know about the other - * node and continue sending PINGs. But the other node won't add this node - * until it sees a MEET (or it gets to know about this node from a trusted - * third node). In this case, clearing the MEET flag here leads to asymmetry - * in the cluster membership. + /* NOTE: Assume the current node is A and is asked to MEET another node B. + * Once A sends MEET to B, it cannot clear the MEET flag for B until it + * gets a response from B. If the MEET packet is not accepted by B due to + * link failure, A must continue sending MEET. If A doesn't continue sending + * MEET, A will know about B, but B will never add A. Every node always + * responds to PINGs from unknown nodes with a PONG, so A will know about B + * and continue sending PINGs. But B won't add A until it sees a MEET (or it + * gets to know about A from a trusted third node C). In this case, clearing + * the MEET flag here leads to asymmetry in the cluster membership. So, we + * clear the MEET flag in clusterProcessPacket. */ serverLog(LL_DEBUG,"Connecting with Node %.40s at %s:%d", node->name, node->ip, node->cport); diff --git a/src/debug.c b/src/debug.c index a4bd830680..dc52b6c4f8 100644 --- a/src/debug.c +++ b/src/debug.c @@ -607,7 +607,7 @@ NULL server.cluster_drop_packet_filter = packet_type; addReply(c,shared.ok); } else if (!strcasecmp(c->argv[1]->ptr, "close-cluster-link-on-packet-drop") && c->argc == 3) { - server.cluster_close_link_on_packet_drop = atoi(c->argv[2]->ptr); + server.debug_cluster_close_link_on_packet_drop = atoi(c->argv[2]->ptr); addReply(c,shared.ok); } else if (!strcasecmp(c->argv[1]->ptr,"object") && c->argc == 3) { dictEntry *de; diff --git a/src/server.h b/src/server.h index 53379c3b5a..fbf5def0c5 100644 --- a/src/server.h +++ b/src/server.h @@ -2027,8 +2027,8 @@ struct valkeyServer { unsigned long long cluster_link_msg_queue_limit_bytes; /* Memory usage limit on individual link msg queue */ int cluster_drop_packet_filter; /* Debug config that allows tactically * dropping packets of a specific type */ - int cluster_close_link_on_packet_drop; /* Debug config that goes along with cluster_drop_packet_filter. - When set, the link is closed on packet drop. */ + uint32_t debug_cluster_close_link_on_packet_drop : 1; /* Debug config that goes along with cluster_drop_packet_filter. + When set, the link is closed on packet drop. */ /* Scripting */ mstime_t busy_reply_threshold; /* Script / module timeout in milliseconds */ int pre_command_oom_state; /* OOM before command (script?) was started */ diff --git a/tests/unit/cluster/cluster-reliable-meet.tcl b/tests/unit/cluster/cluster-reliable-meet.tcl index 890e80936e..174c733f06 100644 --- a/tests/unit/cluster/cluster-reliable-meet.tcl +++ b/tests/unit/cluster/cluster-reliable-meet.tcl @@ -34,7 +34,7 @@ tags {tls:skip external:skip cluster} { cluster_allocate_slots 2 0 } - test "MEET is reliabile when target drops the initial MEETs" { + test "MEET is reliable when target drops the initial MEETs" { # Make 0 drop the initial MEET messages due to link failure R 0 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_MEET R 0 DEBUG CLOSE-CLUSTER-LINK-ON-PACKET-DROP 1 From 1abf81cfc44ae76f498bb9d342abf664cfab3754 Mon Sep 17 00:00:00 2001 From: Sankar <1890648+srgsanky@users.noreply.github.com> Date: Tue, 28 May 2024 17:14:51 -0700 Subject: [PATCH 5/8] Added test case for multiple MEETs Multiple MEETs will be handled like a normal PING message. Signed-off-by: Sankar <1890648+srgsanky@users.noreply.github.com> --- tests/unit/cluster/cluster-multiple-meets.tcl | 83 +++++++++++++++++++ tests/unit/cluster/cluster-reliable-meet.tcl | 2 +- 2 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 tests/unit/cluster/cluster-multiple-meets.tcl diff --git a/tests/unit/cluster/cluster-multiple-meets.tcl b/tests/unit/cluster/cluster-multiple-meets.tcl new file mode 100644 index 0000000000..07a2582133 --- /dev/null +++ b/tests/unit/cluster/cluster-multiple-meets.tcl @@ -0,0 +1,83 @@ +# make sure the test infra won't use SELECT +set old_singledb $::singledb +set ::singledb 1 + +tags {tls:skip external:skip cluster} { + set base_conf [list cluster-enabled yes] + start_multiple_servers 2 [list overrides $base_conf] { + test "Cluster nodes are reachable" { + for {set id 0} {$id < [llength $::servers]} {incr id} { + # Every node should be reachable. + wait_for_condition 1000 50 { + ([catch {R $id ping} ping_reply] == 0) && + ($ping_reply eq {PONG}) + } else { + catch {R $id ping} err + fail "Node #$id keeps replying '$err' to PING." + } + } + } + + test "Before slots allocation, all nodes report cluster failure" { + wait_for_cluster_state fail + } + + set CLUSTER_PACKET_TYPE_PONG 1 + set CLUSTER_PACKET_TYPE_NONE -1 + + test "Cluster nodes haven't met each other" { + assert {[llength [get_cluster_nodes 1]] == 1} + assert {[llength [get_cluster_nodes 0]] == 1} + } + + test "Allocate slots" { + cluster_allocate_slots 2 0;# primaries replicas + } + + test "Multiple MEETs from Node 1 to Node 0 should work" { + # Make 1 drop the PONG responses to MEET + R 1 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_PONG + # It is important to close the connection on drop, otherwise a subsequent MEET won't be sent + R 1 DEBUG CLOSE-CLUSTER-LINK-ON-PACKET-DROP 1 + + R 1 CLUSTER MEET 127.0.0.1 [srv 0 port] + + # Wait for at least a few MEETs to be sent so that we are sure that 1 is dropping the response to MEET. + wait_for_condition 1000 50 { + [CI 0 cluster_stats_messages_meet_received] > 1 && + [CI 1 cluster_state] eq {fail} && [CI 0 cluster_state] eq {ok} + } else { + fail "Cluster node 1 never sent multiple MEETs to 0" + } + + # 0 will be connected to 1, but 1 won't see that 0 is connected + assert {[llength [get_cluster_nodes 1 connected]] == 1} + assert {[llength [get_cluster_nodes 0 connected]] == 2} + + # Drop incoming and outgoing links from/to 1 + R 0 DEBUG CLUSTERLINK KILL ALL [R 1 CLUSTER MYID] + + # Wait for 0 to know about 1 again after 1 sends a MEET + wait_for_condition 1000 50 { + [llength [get_cluster_nodes 0 connected]] == 2 + } else { + fail "Cluster node 1 never sent multiple MEETs to 0" + } + + # Undo packet drop + R 1 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_NONE + R 1 DEBUG CLOSE-CLUSTER-LINK-ON-PACKET-DROP 0 + + # Both a and b will turn to cluster state ok + wait_for_condition 1000 50 { + [CI 1 cluster_state] eq {ok} && [CI 0 cluster_state] eq {ok} && + [CI 1 cluster_stats_messages_meet_sent] == [CI 0 cluster_stats_messages_meet_received] + } else { + fail "1 cluster_state:[CI 1 cluster_state], 0 cluster_state: [CI 0 cluster_state]" + } + } + } ;# stop servers +} ;# tags + +set ::singledb $old_singledb + diff --git a/tests/unit/cluster/cluster-reliable-meet.tcl b/tests/unit/cluster/cluster-reliable-meet.tcl index 174c733f06..41da97ab9b 100644 --- a/tests/unit/cluster/cluster-reliable-meet.tcl +++ b/tests/unit/cluster/cluster-reliable-meet.tcl @@ -46,7 +46,7 @@ tags {tls:skip external:skip cluster} { wait_for_condition 1000 50 { [CI 0 cluster_stats_messages_meet_received] >= 3 } else { - fail "Cluster node $a never sent multiple MEETs to $b" + fail "Cluster node 1 never sent multiple MEETs to 0" } # Make sure the nodes still don't know about each other From be5cb97774b73aa3ae261abdaf0a40f72f51451a Mon Sep 17 00:00:00 2001 From: Sankar <1890648+srgsanky@users.noreply.github.com> Date: Tue, 28 May 2024 19:00:59 -0700 Subject: [PATCH 6/8] Resolved the merge conflict missed in previous commit Signed-off-by: Sankar <1890648+srgsanky@users.noreply.github.com> --- src/debug.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/debug.c b/src/debug.c index 36e93c7747..64c7fea870 100644 --- a/src/debug.c +++ b/src/debug.c @@ -595,16 +595,11 @@ void debugCommand(client *c) { long packet_type; if (getLongFromObjectOrReply(c, c->argv[2], &packet_type, NULL) != C_OK) return; server.cluster_drop_packet_filter = packet_type; -<<<<<<< HEAD - addReply(c,shared.ok); + addReply(c, shared.ok); } else if (!strcasecmp(c->argv[1]->ptr, "close-cluster-link-on-packet-drop") && c->argc == 3) { server.debug_cluster_close_link_on_packet_drop = atoi(c->argv[2]->ptr); addReply(c,shared.ok); - } else if (!strcasecmp(c->argv[1]->ptr,"object") && c->argc == 3) { -======= - addReply(c, shared.ok); } else if (!strcasecmp(c->argv[1]->ptr, "object") && c->argc == 3) { ->>>>>>> upstream/unstable dictEntry *de; robj *val; char *strenc; From d6b9cbd4b149f8cfb7bc72785345a2d4313b38f9 Mon Sep 17 00:00:00 2001 From: Sankar <1890648+srgsanky@users.noreply.github.com> Date: Tue, 28 May 2024 19:49:49 -0700 Subject: [PATCH 7/8] Fixed clang-format errors Signed-off-by: Sankar <1890648+srgsanky@users.noreply.github.com> --- src/cluster_legacy.c | 10 ++++------ src/debug.c | 2 +- src/server.h | 5 +++-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 86160a846f..1afe2ea403 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -2846,10 +2846,9 @@ int clusterIsValidPacket(clusterLink *link) { int clusterProcessPacket(clusterLink *link) { /* Validate that the packet is well-formed */ if (!clusterIsValidPacket(link)) { - clusterMsg *hdr = (clusterMsg*) link->rcvbuf; + clusterMsg *hdr = (clusterMsg *)link->rcvbuf; uint16_t type = ntohs(hdr->type); - if (server.debug_cluster_close_link_on_packet_drop && - type == server.cluster_drop_packet_filter) { + if (server.debug_cluster_close_link_on_packet_drop && type == server.cluster_drop_packet_filter) { freeClusterLink(link); serverLog(LL_WARNING, "Closing link for matching packet type %hu", type); return 0; @@ -2957,9 +2956,8 @@ int clusterProcessPacket(clusterLink *link) { if (sender && (sender->flags & CLUSTER_NODE_MEET)) { /* Once we get a response for MEET from the sender, we can stop sending more MEET. */ sender->flags &= ~CLUSTER_NODE_MEET; - serverLog(LL_NOTICE, - "Successfully completed handshake with %.40s (%s)", - sender->name, sender->human_nodename); + serverLog(LL_NOTICE, "Successfully completed handshake with %.40s (%s)", sender->name, + sender->human_nodename); } if (!link->inbound) { if (nodeInHandshake(link->node)) { diff --git a/src/debug.c b/src/debug.c index 64c7fea870..38bfc38f2f 100644 --- a/src/debug.c +++ b/src/debug.c @@ -598,7 +598,7 @@ void debugCommand(client *c) { addReply(c, shared.ok); } else if (!strcasecmp(c->argv[1]->ptr, "close-cluster-link-on-packet-drop") && c->argc == 3) { server.debug_cluster_close_link_on_packet_drop = atoi(c->argv[2]->ptr); - addReply(c,shared.ok); + addReply(c, shared.ok); } else if (!strcasecmp(c->argv[1]->ptr, "object") && c->argc == 3) { dictEntry *de; robj *val; diff --git a/src/server.h b/src/server.h index f808a94c6b..52bb34892c 100644 --- a/src/server.h +++ b/src/server.h @@ -2069,8 +2069,9 @@ struct valkeyServer { int cluster_drop_packet_filter; /* Debug config that allows tactically * dropping packets of a specific type */ - uint32_t debug_cluster_close_link_on_packet_drop : 1; /* Debug config that goes along with cluster_drop_packet_filter. - When set, the link is closed on packet drop. */ + uint32_t + debug_cluster_close_link_on_packet_drop : 1; /* Debug config that goes along with cluster_drop_packet_filter. + When set, the link is closed on packet drop. */ sds cached_cluster_slot_info[CACHE_CONN_TYPE_MAX]; /* Scripting */ mstime_t busy_reply_threshold; /* Script / module timeout in milliseconds */ From 7ac84b6b9257b8cfb34ea9212aa0e23c0b221fa4 Mon Sep 17 00:00:00 2001 From: Sankar <1890648+srgsanky@users.noreply.github.com> Date: Wed, 29 May 2024 14:23:31 -0700 Subject: [PATCH 8/8] Fixed format to keep type and field in same line Signed-off-by: Sankar <1890648+srgsanky@users.noreply.github.com> --- src/server.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/server.h b/src/server.h index 52bb34892c..4666c09316 100644 --- a/src/server.h +++ b/src/server.h @@ -2068,10 +2068,8 @@ struct valkeyServer { unsigned long long cluster_link_msg_queue_limit_bytes; /* Memory usage limit on individual link msg queue */ int cluster_drop_packet_filter; /* Debug config that allows tactically * dropping packets of a specific type */ - - uint32_t - debug_cluster_close_link_on_packet_drop : 1; /* Debug config that goes along with cluster_drop_packet_filter. - When set, the link is closed on packet drop. */ + /* Debug config that goes along with cluster_drop_packet_filter. When set, the link is closed on packet drop. */ + uint32_t debug_cluster_close_link_on_packet_drop : 1; sds cached_cluster_slot_info[CACHE_CONN_TYPE_MAX]; /* Scripting */ mstime_t busy_reply_threshold; /* Script / module timeout in milliseconds */