[Openais] [PATCH] Fix missing cpg join events

Angus Salkeld asalkeld at redhat.com
Thu Jun 3 14:40:39 PDT 2010


This is a port of a flatiron patch Steve send me.
It passes all cts tests.

(There are some debug messages I could remove ...)

-Angus

Signed-off-by: Angus Salkeld <asalkeld at redhat.com>
---
 services/cpg.c |   54 ++++++++++++++++++++++++++++++++++++++++++++----------
 1 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/services/cpg.c b/services/cpg.c
index eb4b509..d5a3724 100644
--- a/services/cpg.c
+++ b/services/cpg.c
@@ -158,6 +158,8 @@ DECLARE_HDB_DATABASE(cpg_iteration_handle_t_db,NULL);
 
 DECLARE_LIST_INIT(cpg_pd_list_head);
 
+static struct memb_ring_id my_ring_id;
+
 static unsigned int my_member_list[PROCESSOR_COUNT_MAX];
 
 static unsigned int my_member_list_entries;
@@ -194,6 +196,13 @@ static int cpg_lib_init_fn (void *conn);
 
 static int cpg_lib_exit_fn (void *conn);
 
+static void cpg_confchg_fn (
+	enum totem_configuration_type configuration_type,
+	const unsigned int *member_list, size_t member_list_entries,
+	const unsigned int *left_list, size_t left_list_entries,
+	const unsigned int *joined_list, size_t joined_list_entries,
+	const struct memb_ring_id *ring_id);
+
 static void message_handler_req_exec_cpg_procjoin (
 	const void *message,
 	unsigned int nodeid);
@@ -368,6 +377,7 @@ struct corosync_service_engine cpg_service_engine = {
 	.exec_dump_fn				= NULL,
 	.exec_engine				= cpg_exec_engine,
 	.exec_engine_count		        = sizeof (cpg_exec_engine) / sizeof (struct corosync_exec_handler),
+	.confchg_fn				= cpg_confchg_fn,
 	.sync_mode				= CS_SYNC_V1_APIV2,
 	.sync_init                              = (sync_init_v1_fn_t)cpg_sync_init_v2,
 	.sync_process                           = cpg_sync_process,
@@ -447,6 +457,7 @@ struct req_exec_cpg_downlist {
 	coroipc_request_header_t header __attribute__((aligned(8)));
 	/* merge decisions */
 	mar_uint32_t old_members __attribute__((aligned(8)));
+	struct memb_ring_id ring_id;
 	/* downlist below */
 	mar_uint32_t left_nodes __attribute__((aligned(8)));
 	mar_uint32_t nodeids[PROCESSOR_COUNT_MAX]  __attribute__((aligned(8)));
@@ -462,6 +473,16 @@ struct downlist_msg {
 
 static struct req_exec_cpg_downlist g_req_exec_cpg_downlist;
 
+static void cpg_confchg_fn (
+	enum totem_configuration_type configuration_type,
+	const unsigned int *member_list, size_t member_list_entries,
+	const unsigned int *left_list, size_t left_list_entries,
+	const unsigned int *joined_list, size_t joined_list_entries,
+	const struct memb_ring_id *ring_id)
+{
+	memcpy (&my_ring_id, ring_id, sizeof (struct memb_ring_id));
+}
+
 static void cpg_sync_init_v2 (
 	const unsigned int *trans_list,
 	size_t trans_list_entries,
@@ -581,6 +602,7 @@ static int notify_lib_totem_membership (
 }
 
 static int notify_lib_joinlist(
+	int from_nodeid,
 	const mar_cpg_name_t *group_name,
 	void *conn,
 	int joined_list_entries,
@@ -683,7 +705,8 @@ static int notify_lib_joinlist(
 				}
 				if (left_list_entries) {
 					if (left_list[0].pid == cpd->pid &&
-						left_list[0].nodeid == api->totem_nodeid_get()) {
+						left_list[0].nodeid == api->totem_nodeid_get() &&
+						from_nodeid == api->totem_nodeid_get()) {
 
 						cpd->pid = 0;
 						memset (&cpd->group_name, 0, sizeof(cpd->group_name));
@@ -770,7 +793,8 @@ static void downlist_master_choose_and_send (void)
 				left_list.pid = pi->pid;
 				left_list.reason = CONFCHG_CPG_REASON_NODEDOWN;
 
-				notify_lib_joinlist(&pi->group, NULL,
+				notify_lib_joinlist(stored_msg->sender_nodeid,
+					&pi->group, NULL,
 					0, NULL,
 					1, &left_list,
 					MESSAGE_RES_CPG_CONFCHG_CALLBACK);
@@ -970,6 +994,7 @@ static void do_proc_join(
 	struct list_head *list_to_add = NULL;
 
 	if (process_info_find (name, pid, nodeid) != NULL) {
+		log_printf (LOGSYS_LEVEL_WARNING, "returning here\n");
 		return ;
  	}
 	pi = malloc (sizeof (struct process_info));
@@ -1002,7 +1027,7 @@ static void do_proc_join(
 	notify_info.nodeid = nodeid;
 	notify_info.reason = reason;
 
-	notify_lib_joinlist(&pi->group, NULL,
+	notify_lib_joinlist(nodeid, &pi->group, NULL,
 			    1, &notify_info,
 			    0, NULL,
 			    MESSAGE_RES_CPG_CONFCHG_CALLBACK);
@@ -1026,6 +1051,13 @@ static void message_handler_req_exec_cpg_downlist(
 	struct downlist_msg *stored_msg;
 	int found;
 
+	if (memcmp (&req_exec_cpg_downlist->ring_id, &my_ring_id,
+		sizeof (struct memb_ring_id)) != 0) {
+		
+		log_printf (LOGSYS_LEVEL_WARNING, "invalid downlist\n");
+		return;
+	}
+
 	if (downlist_state != CPG_DOWNLIST_WAITING_FOR_MESSAGES) {
 		log_printf (LOGSYS_LEVEL_WARNING, "downlist left_list: %d received in state %d",
 			req_exec_cpg_downlist->left_nodes, downlist_state);
@@ -1093,7 +1125,7 @@ static void message_handler_req_exec_cpg_procleave (
 	notify_info.nodeid = nodeid;
 	notify_info.reason = req_exec_cpg_procjoin->reason;
 
-	notify_lib_joinlist(&req_exec_cpg_procjoin->group_name, NULL,
+	notify_lib_joinlist(nodeid, &req_exec_cpg_procjoin->group_name, NULL,
 		0, NULL,
 		1, &notify_info,
 		MESSAGE_RES_CPG_CONFCHG_CALLBACK);
@@ -1123,11 +1155,6 @@ static void message_handler_req_exec_cpg_joinlist (
 	log_printf(LOGSYS_LEVEL_DEBUG, "got joinlist message from node %x\n",
 		nodeid);
 
-	/* Ignore our own messages */
-	if (nodeid == api->totem_nodeid_get()) {
-		return;
-	}
-
 	while ((const char*)jle < message + res->size) {
 		do_proc_join (&jle->group_name, jle->pid, nodeid,
 			CONFCHG_CPG_REASON_NODEUP);
@@ -1189,6 +1216,8 @@ static void message_handler_req_exec_cpg_mcast (
 			}
 
 			api->ipc_dispatch_iov_send (cpd->conn, iovec, 2);
+		} else {
+			log_printf (LOGSYS_LEVEL_WARNING, "unknown mssage\n");
 		}
 	}
 }
@@ -1200,6 +1229,8 @@ static int cpg_exec_send_downlist(void)
 
 	g_req_exec_cpg_downlist.header.id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_DOWNLIST);
 	g_req_exec_cpg_downlist.header.size = sizeof(struct req_exec_cpg_downlist);
+	memcpy (&g_req_exec_cpg_downlist.ring_id, &my_ring_id,
+		sizeof (struct memb_ring_id));
 
 	g_req_exec_cpg_downlist.old_members = my_old_member_list_entries;
 
@@ -1227,8 +1258,10 @@ static int cpg_exec_send_joinlist(void)
 	}
 
 	/* Nothing to send */
-	if (!count)
+	if (!count) {
+		log_printf (LOGSYS_LEVEL_WARNING, "nothing to send\n");
 		return 0;
+	}
 
 	buf = alloca(sizeof(coroipc_response_header_t) + sizeof(struct join_list_entry) * count);
 	if (!buf) {
@@ -1244,6 +1277,7 @@ static int cpg_exec_send_joinlist(void)
 
  		if (pi->nodeid == api->totem_nodeid_get ()) {
  			memcpy (&jle->group_name, &pi->group, sizeof (mar_cpg_name_t));
+			log_printf (LOGSYS_LEVEL_WARNING, "sending join %s\n", jle->group_name.value);
  			jle->pid = pi->pid;
  			jle++;
 		}
-- 
1.6.6.1



More information about the Openais mailing list