[Openais] [PATCH] Fix abort when token is lost in RECOVERY state

Jan Friesse jfriesse at redhat.com
Mon Mar 14 01:59:33 PDT 2011


A commit token should be rejected when a token is lost in the recovery
state.  This occurs naturally because the ring id increases by 4 for
every new ring.  Prior to this patch, if the token was lost, the old
ring id information was restored, causing a commit token to be accepted
when it should be rejected.  This erronously accepted commit token would
lead to an assertion which is fixed by this patch.

Backport of Corosync 6aa47fde953bf2179f5bd2dd07815fc7d80f47bb
---
 branches/whitetank/exec/totemsrp.c |   59 +++++++++++++-----------------------
 1 files changed, 21 insertions(+), 38 deletions(-)

diff --git a/branches/whitetank/exec/totemsrp.c b/branches/whitetank/exec/totemsrp.c
index 20de8c9..5f3c319 100644
--- a/branches/whitetank/exec/totemsrp.c
+++ b/branches/whitetank/exec/totemsrp.c
@@ -477,8 +477,6 @@ struct totemsrp_instance {
 
 	unsigned int set_aru;
 
-	int old_ring_state_saved;
-
 	int old_ring_state_aru;
 
 	unsigned int old_ring_state_high_seq_received;
@@ -1310,16 +1308,13 @@ static void cancel_merge_detect_timeout (struct totemsrp_instance *instance)
  */
 static void old_ring_state_save (struct totemsrp_instance *instance)
 {
-	if (instance->old_ring_state_saved == 0) {
-		instance->old_ring_state_saved = 1;
-		memcpy (&instance->my_old_ring_id, &instance->my_ring_id,
-			sizeof (struct memb_ring_id));
-		instance->old_ring_state_aru = instance->my_aru;
-		instance->old_ring_state_high_seq_received = instance->my_high_seq_received;
-		log_printf (instance->totemsrp_log_level_notice,
-			"Saving state aru %x high seq received %x\n",
-			instance->my_aru, instance->my_high_seq_received);
-	}
+	memcpy (&instance->my_old_ring_id, &instance->my_ring_id,
+		sizeof (struct memb_ring_id));
+	instance->old_ring_state_aru = instance->my_aru;
+	instance->old_ring_state_high_seq_received = instance->my_high_seq_received;
+	log_printf (instance->totemsrp_log_level_debug,
+		"Saving state aru %x high seq received %x\n",
+		instance->my_aru, instance->my_high_seq_received);
 }
 
 static void ring_save (struct totemsrp_instance *instance)
@@ -1336,25 +1331,19 @@ static void ring_reset (struct totemsrp_instance *instance)
 	instance->ring_saved = 0;
 }
 
-static void ring_state_restore (struct totemsrp_instance *instance)
+static void old_ring_state_restore (struct totemsrp_instance *instance)
 {
-	if (instance->old_ring_state_saved) {
-		memcpy (&instance->my_ring_id, &instance->my_old_ring_id,
-			sizeof (struct memb_ring_id));
-
-		instance->my_aru = instance->old_ring_state_aru;
-		instance->my_high_seq_received = instance->old_ring_state_high_seq_received;
-		log_printf (instance->totemsrp_log_level_notice,
-			"Restoring instance->my_aru %x my high seq received %x\n",
-			instance->my_aru, instance->my_high_seq_received);
-	}
+	instance->my_aru = instance->old_ring_state_aru;
+	instance->my_high_seq_received = instance->old_ring_state_high_seq_received;
+	log_printf (instance->totemsrp_log_level_notice,
+		"Restoring instance->my_aru %x my high seq received %x\n",
+		instance->my_aru, instance->my_high_seq_received);
 }
 
 static void old_ring_state_reset (struct totemsrp_instance *instance)
 {
 	log_printf (instance->totemsrp_log_level_debug,
 		"Resetting old ring state\n");
-	instance->old_ring_state_saved = 0;
 }
 
 static void reset_token_timeout (struct totemsrp_instance *instance) {
@@ -1434,6 +1423,12 @@ static void memb_join_message_send (struct totemsrp_instance *instance);
 
 static void memb_merge_detect_transmit (struct totemsrp_instance *instance);
 
+static void memb_recovery_state_token_loss (struct totemsrp_instance *instance)
+{
+	old_ring_state_restore (instance);
+	memb_state_gather_enter (instance, 5);
+}
+
 /*
  * Timers used for various states of the membership algorithm
  */
@@ -1465,8 +1460,7 @@ static void timer_function_orf_token_timeout (void *data)
 		case MEMB_STATE_RECOVERY:
 			log_printf (instance->totemsrp_log_level_notice,
 				"The token was lost in the RECOVERY state.\n");
-			ring_state_restore (instance);
-			memb_state_gather_enter (instance, 5);
+			memb_recovery_state_token_loss (instance);
 			break;
 	}
 }
@@ -2309,16 +2303,6 @@ static int orf_token_mcast (
 			break;
 		}
 		message_item = (struct message_item *)queue_item_get (mcast_queue);
-		/* preincrement required by algo */
-		if (instance->old_ring_state_saved &&
-			(instance->memb_state == MEMB_STATE_GATHER ||
-			instance->memb_state == MEMB_STATE_COMMIT)) {
-
-			log_printf (instance->totemsrp_log_level_debug,
-				"not multicasting at seqno is %d\n",
-			token->seq);
-			return (0);
-		}
 
 		message_item->mcast->seq = ++token->seq;
 		message_item->mcast->this_seqno = instance->global_seqno++;
@@ -4130,9 +4114,8 @@ static int message_handler_memb_join (
 
 				memb_join->ring_seq >= instance->my_ring_id.seq) {
 
-				ring_state_restore (instance);
-
 				memb_join_process (instance, memb_join);
+				memb_recovery_state_token_loss (instance);
 				memb_state_gather_enter (instance, 14);
 			}
 			break;
-- 
1.7.3.4



More information about the Openais mailing list