[Openais] [PATCH] Implementation of automatic redundant ring recovery

Steven Dake sdake at redhat.com
Fri Mar 4 16:31:41 PST 2011


Lars,

As we discussed on IRC, I agreed to do a first rough cut of automatic
redundant ring recovery, and you agreed to find some suitable engineers
to finish the work on this topic.  If there are others in the community
interested in seeing this work merged into Corosync, feel free to amend
the patch, address any particular points that are remaining, and send an
updated patch.  Once the 7 points are addressed, we should be good to go
with a merge.

Thanks for your assistance.
-steve

On 03/04/2011 05:25 PM, Steven Dake wrote:
> Here is a very rough implementation of automatic ring recovery.  This patch
> only works for the rrp mode active.  It requires the following changes to be
> mergeable:
> 
> 1. endian detection & conversion of the message headers in totemrrp must be done
> 2. The implementation is entirely in the active algo.  Instead, the
>    implementation should be made to avoid an algorithm specific implementation
>    and only be implemented in one place (and work for both passive/active)
> 3. the timer variable timer_active_test_ring_timeout should be stored in
>    totemrrp_instance instead of active_instance
> 4. An array of timeouts should be created for each iface_no so as to not
>    overwrite timer variables with more then 2 rings.
> 5. If the ACTIVATE message is lost, the ring will not recover.  This needs
>    more consideration and correction.
> 6. The active test message is sent once per second (search 1000 which is msec)
>    This should be tunable and added to the man pages.
> 7. testing!
> 
> Example of operation in a 3 node cluster:
> nodes 1, 2, 3
> 
> On node 1:
> [root at f14-n1 ~]# corosync-cfgtool -s
> Printing ring status.
> Local node ID 1014671552
> RING ID 0
> 	id	= 192.168.122.60
> 	status	= ring 0 active with no faults
> RING ID 1
> 	id	= 192.168.123.60
> 	status	= ring 1 active with no faults
> 
> Then:
> [root at f14-n1 cs]# cat break
> iptables -A INPUT -s 192.168.123.60 -j DROP
> iptables -A OUTPUT -s 192.168.123.60 -j DROP
> [root at f14-n1 cs]# ./break
> 
> on node 1:
> 
> Mar 04 10:23:35 corosync [TOTEM ] Marking seqid 1655 ringid 1 interface 192.168.123.60 FAULTY - adminisrtative intervention required.
> 
> then:
> [root at f14-n1 cs]# cat allow
> iptables --flush
> [root at f14-n1 cs]# ./allow
> 
> Mar 04 10:24:18 corosync [TOTEM ] Automatically recovered ring 1
> 
> ot at f14-n1 cs]# corosync-cfgtool -s
> Printing ring status.
> Local node ID 1014671552
> RING ID 0
> 	id	= 192.168.122.60
> 	status	= ring 0 active with no faults
> RING ID 1
> 	id	= 192.168.123.60
> 	status	= ring 1 active with no faults
> 
> enjoy.
> 
> Signed-off-by: Steven Dake <sdake at redhat.com>
> ---
>  exec/totemrrp.c |  110 +++++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 files changed, 103 insertions(+), 7 deletions(-)
> 
> diff --git a/exec/totemrrp.c b/exec/totemrrp.c
> index a8ebd08..d09d37a 100644
> --- a/exec/totemrrp.c
> +++ b/exec/totemrrp.c
> @@ -103,6 +103,7 @@ struct active_instance {
>  	unsigned int last_token_seq;
>          poll_timer_handle timer_expired_token;
>          poll_timer_handle timer_problem_decrementer;
> +        poll_timer_handle timer_active_test_ring_timeout;
>  	void *totemrrp_context;
>  };
>  
> @@ -240,6 +241,8 @@ struct totemrrp_instance {
>  
>  	int processor_count;
>  
> +	int my_nodeid;
> +
>  	struct totem_config *totem_config;
>  };
>  
> @@ -462,6 +465,20 @@ static void active_timer_problem_decrementer_start (
>  static void active_timer_problem_decrementer_cancel (
>  	struct active_instance *active_instance);
>  
> +/*
> + * 0-5 reserved for totemsrp.c
> + */
> +#define MESSAGE_TYPE_RING_TEST_ACTIVE		6
> +#define MESSAGE_TYPE_RING_TEST_ACTIVATE		7
> +
> +struct message_header {
> +	char type;
> +	char encapsulated;
> +	unsigned short endian_detector;
> +	int ring_number;
> +	int nodeid_activator;
> +} __attribute__((packed));
> +
>  struct rrp_algo none_algo = {
>  	.name			= "none",
>  	.initialize		= NULL,
> @@ -1132,6 +1149,34 @@ static void timer_function_active_problem_decrementer (void *context)
>  	}
>  }
>  
> +static void timer_function_test_ring_timeout (void *context)
> +{
> +	struct active_instance *active_instance = (struct active_instance *)context;
> +	struct totemrrp_instance *instance = active_instance->rrp_instance;
> +	int faulty = 0;
> +	int i;
> +	struct message_header msg;
> +
> +	for (i = 0; i < instance->interface_count; i++) {
> +		msg.type = MESSAGE_TYPE_RING_TEST_ACTIVE;
> +		if (active_instance->faulty[i] == 1) {
> +			faulty = 1;
> +			msg.ring_number = i;
> +			msg.nodeid_activator = instance->my_nodeid;
> +			totemnet_token_send (
> +				instance->net_handles[i],
> +				&msg, sizeof (struct message_header));
> +		}
> +	}
> +	if (faulty) {
> +		poll_timer_add (instance->poll_handle,
> +			1000,
> +			(void *)active_instance,
> +			timer_function_test_ring_timeout,
> +			&active_instance->timer_active_test_ring_timeout);
> +	}
> +}
> +
>  static void timer_function_active_token_expired (void *context)
>  {
>  	struct active_instance *active_instance = (struct active_instance *)context;
> @@ -1161,6 +1206,12 @@ static void timer_function_active_token_expired (void *context)
>  		if (active_instance->counter_problems[i] >= rrp_instance->totem_config->rrp_problem_count_threshold)
>  		{
>  			active_instance->faulty[i] = 1;
> +			poll_timer_add (rrp_instance->poll_handle,
> +				1000,
> +				(void *)active_instance,
> +				timer_function_test_ring_timeout,
> +				&active_instance->timer_active_test_ring_timeout);
> +
>  			sprintf (rrp_instance->status[i],
>  				"Marking seqid %d ringid %u interface %s FAULTY - adminisrtative intervention required.",
>  				active_instance->last_token_seq,
> @@ -1266,7 +1317,7 @@ static void active_mcast_noflush_send (
>  }
>  
>  static void active_token_recv (
> -	struct totemrrp_instance *instance,
> +	struct totemrrp_instance *rrp_instance,
>  	unsigned int iface_no,
>  	void *context,
>  	const void *msg,
> @@ -1274,13 +1325,56 @@ static void active_token_recv (
>  	unsigned int token_seq)
>  {
>  	int i;
> -	struct active_instance *active_instance = (struct active_instance *)instance->rrp_algo_instance;
> +	struct active_instance *active_instance = (struct active_instance *)rrp_instance->rrp_algo_instance;
> +	const struct message_header *hdr = msg;
> +	struct message_header activate_msg;
> +
> +	active_instance->totemrrp_context = context;
> +
> +	if (hdr->type == MESSAGE_TYPE_RING_TEST_ACTIVE) {
> +
> +		log_printf (
> +			rrp_instance->totemrrp_log_level_debug,
> +			"received message requesting test of ring now active\n");
> +
> +		if (hdr->nodeid_activator == rrp_instance->my_nodeid) {
> +			/*
> +			 * Send an activate message
> +			 */
> +			activate_msg.type = MESSAGE_TYPE_RING_TEST_ACTIVATE;
> +			activate_msg.ring_number = hdr->ring_number;;
> +			activate_msg.nodeid_activator = rrp_instance->my_nodeid;
> +			totemnet_token_send (
> +				rrp_instance->net_handles[iface_no],
> +				&activate_msg, sizeof (struct message_header));
> +		} else {
> +			/*
> +			 * Send a ring test message
> +			 */
> +			totemnet_token_send (
> +				rrp_instance->net_handles[iface_no],
> +				msg, msg_len);
> +		}
> +		return;
> +	} else
> +	if (hdr->type == MESSAGE_TYPE_RING_TEST_ACTIVATE) {
> +		log_printf (
> +			rrp_instance->totemrrp_log_level_notice,
> +			"Automatically recovered ring %d\n", hdr->ring_number);
> +
> +		totemrrp_ring_reenable (rrp_instance);
> +		if (hdr->nodeid_activator != rrp_instance->my_nodeid) {
> +			totemnet_token_send (
> +				rrp_instance->net_handles[iface_no],
> +				msg, msg_len);
> +		}
> +		return;
> +	}
>  
> -	active_instance->totemrrp_context = context; // this should be in totemrrp_instance ?
>  	if (token_seq > active_instance->last_token_seq) {
>  		memcpy (active_instance->token, msg, msg_len);
>  		active_instance->token_len = msg_len;
> -		for (i = 0; i < instance->interface_count; i++) {
> +		for (i = 0; i < rrp_instance->interface_count; i++) {
>  			active_instance->last_token_recv[i] = 0;
>  		}
>  
> @@ -1292,7 +1386,7 @@ static void active_token_recv (
>  
>  	if (token_seq == active_instance->last_token_seq) {
>  		active_instance->last_token_recv[iface_no] = 1;
> -		for (i = 0; i < instance->interface_count; i++) {
> +		for (i = 0; i < rrp_instance->interface_count; i++) {
>  			if ((active_instance->last_token_recv[i] == 0) &&
>  				active_instance->faulty[i] == 0) {
>  				return; /* don't deliver token */
> @@ -1300,7 +1394,7 @@ static void active_token_recv (
>  		}
>  		active_timer_expired_token_cancel (active_instance);
>  
> -		instance->totemrrp_deliver_fn (
> +		rrp_instance->totemrrp_deliver_fn (
>  			context,
>  			msg,
>  			msg_len);
> @@ -1487,13 +1581,14 @@ void rrp_deliver_fn (
>  	unsigned int token_is;
>  
>  	struct deliver_fn_context *deliver_fn_context = (struct deliver_fn_context *)context;
> +	const struct message_header *hdr = msg;
>  
>  	deliver_fn_context->instance->totemrrp_token_seqid_get (
>  		msg,
>  		&token_seqid,
>  		&token_is);
>  
> -	if (token_is) {
> +	if (hdr->type == MESSAGE_TYPE_RING_TEST_ACTIVE || hdr->type == MESSAGE_TYPE_RING_TEST_ACTIVATE || token_is) {
>  		/*
>  		 * Deliver to the token receiver for this rrp algorithm
>  		 */
> @@ -1523,6 +1618,7 @@ void rrp_iface_change_fn (
>  {
>  	struct deliver_fn_context *deliver_fn_context = (struct deliver_fn_context *)context;
>  
> +	deliver_fn_context->instance->my_nodeid = iface_addr->nodeid;
>  	deliver_fn_context->instance->totemrrp_iface_change_fn (
>  		deliver_fn_context->context,
>  		iface_addr,



More information about the Openais mailing list