[Openais] [PATCH] Fix missing cpg join events
Angus Salkeld
asalkeld at redhat.com
Thu Jun 3 14:40:39 PDT 2010
This is a port of a flatiron patch Steve send me.
It passes all cts tests.
(There are some debug messages I could remove ...)
-Angus
Signed-off-by: Angus Salkeld <asalkeld at redhat.com>
---
services/cpg.c | 54 ++++++++++++++++++++++++++++++++++++++++++++----------
1 files changed, 44 insertions(+), 10 deletions(-)
diff --git a/services/cpg.c b/services/cpg.c
index eb4b509..d5a3724 100644
--- a/services/cpg.c
+++ b/services/cpg.c
@@ -158,6 +158,8 @@ DECLARE_HDB_DATABASE(cpg_iteration_handle_t_db,NULL);
DECLARE_LIST_INIT(cpg_pd_list_head);
+static struct memb_ring_id my_ring_id;
+
static unsigned int my_member_list[PROCESSOR_COUNT_MAX];
static unsigned int my_member_list_entries;
@@ -194,6 +196,13 @@ static int cpg_lib_init_fn (void *conn);
static int cpg_lib_exit_fn (void *conn);
+static void cpg_confchg_fn (
+ enum totem_configuration_type configuration_type,
+ const unsigned int *member_list, size_t member_list_entries,
+ const unsigned int *left_list, size_t left_list_entries,
+ const unsigned int *joined_list, size_t joined_list_entries,
+ const struct memb_ring_id *ring_id);
+
static void message_handler_req_exec_cpg_procjoin (
const void *message,
unsigned int nodeid);
@@ -368,6 +377,7 @@ struct corosync_service_engine cpg_service_engine = {
.exec_dump_fn = NULL,
.exec_engine = cpg_exec_engine,
.exec_engine_count = sizeof (cpg_exec_engine) / sizeof (struct corosync_exec_handler),
+ .confchg_fn = cpg_confchg_fn,
.sync_mode = CS_SYNC_V1_APIV2,
.sync_init = (sync_init_v1_fn_t)cpg_sync_init_v2,
.sync_process = cpg_sync_process,
@@ -447,6 +457,7 @@ struct req_exec_cpg_downlist {
coroipc_request_header_t header __attribute__((aligned(8)));
/* merge decisions */
mar_uint32_t old_members __attribute__((aligned(8)));
+ struct memb_ring_id ring_id;
/* downlist below */
mar_uint32_t left_nodes __attribute__((aligned(8)));
mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8)));
@@ -462,6 +473,16 @@ struct downlist_msg {
static struct req_exec_cpg_downlist g_req_exec_cpg_downlist;
+static void cpg_confchg_fn (
+ enum totem_configuration_type configuration_type,
+ const unsigned int *member_list, size_t member_list_entries,
+ const unsigned int *left_list, size_t left_list_entries,
+ const unsigned int *joined_list, size_t joined_list_entries,
+ const struct memb_ring_id *ring_id)
+{
+ memcpy (&my_ring_id, ring_id, sizeof (struct memb_ring_id));
+}
+
static void cpg_sync_init_v2 (
const unsigned int *trans_list,
size_t trans_list_entries,
@@ -581,6 +602,7 @@ static int notify_lib_totem_membership (
}
static int notify_lib_joinlist(
+ int from_nodeid,
const mar_cpg_name_t *group_name,
void *conn,
int joined_list_entries,
@@ -683,7 +705,8 @@ static int notify_lib_joinlist(
}
if (left_list_entries) {
if (left_list[0].pid == cpd->pid &&
- left_list[0].nodeid == api->totem_nodeid_get()) {
+ left_list[0].nodeid == api->totem_nodeid_get() &&
+ from_nodeid == api->totem_nodeid_get()) {
cpd->pid = 0;
memset (&cpd->group_name, 0, sizeof(cpd->group_name));
@@ -770,7 +793,8 @@ static void downlist_master_choose_and_send (void)
left_list.pid = pi->pid;
left_list.reason = CONFCHG_CPG_REASON_NODEDOWN;
- notify_lib_joinlist(&pi->group, NULL,
+ notify_lib_joinlist(stored_msg->sender_nodeid,
+ &pi->group, NULL,
0, NULL,
1, &left_list,
MESSAGE_RES_CPG_CONFCHG_CALLBACK);
@@ -970,6 +994,7 @@ static void do_proc_join(
struct list_head *list_to_add = NULL;
if (process_info_find (name, pid, nodeid) != NULL) {
+ log_printf (LOGSYS_LEVEL_WARNING, "returning here\n");
return ;
}
pi = malloc (sizeof (struct process_info));
@@ -1002,7 +1027,7 @@ static void do_proc_join(
notify_info.nodeid = nodeid;
notify_info.reason = reason;
- notify_lib_joinlist(&pi->group, NULL,
+ notify_lib_joinlist(nodeid, &pi->group, NULL,
1, ¬ify_info,
0, NULL,
MESSAGE_RES_CPG_CONFCHG_CALLBACK);
@@ -1026,6 +1051,13 @@ static void message_handler_req_exec_cpg_downlist(
struct downlist_msg *stored_msg;
int found;
+ if (memcmp (&req_exec_cpg_downlist->ring_id, &my_ring_id,
+ sizeof (struct memb_ring_id)) != 0) {
+
+ log_printf (LOGSYS_LEVEL_WARNING, "invalid downlist\n");
+ return;
+ }
+
if (downlist_state != CPG_DOWNLIST_WAITING_FOR_MESSAGES) {
log_printf (LOGSYS_LEVEL_WARNING, "downlist left_list: %d received in state %d",
req_exec_cpg_downlist->left_nodes, downlist_state);
@@ -1093,7 +1125,7 @@ static void message_handler_req_exec_cpg_procleave (
notify_info.nodeid = nodeid;
notify_info.reason = req_exec_cpg_procjoin->reason;
- notify_lib_joinlist(&req_exec_cpg_procjoin->group_name, NULL,
+ notify_lib_joinlist(nodeid, &req_exec_cpg_procjoin->group_name, NULL,
0, NULL,
1, ¬ify_info,
MESSAGE_RES_CPG_CONFCHG_CALLBACK);
@@ -1123,11 +1155,6 @@ static void message_handler_req_exec_cpg_joinlist (
log_printf(LOGSYS_LEVEL_DEBUG, "got joinlist message from node %x\n",
nodeid);
- /* Ignore our own messages */
- if (nodeid == api->totem_nodeid_get()) {
- return;
- }
-
while ((const char*)jle < message + res->size) {
do_proc_join (&jle->group_name, jle->pid, nodeid,
CONFCHG_CPG_REASON_NODEUP);
@@ -1189,6 +1216,8 @@ static void message_handler_req_exec_cpg_mcast (
}
api->ipc_dispatch_iov_send (cpd->conn, iovec, 2);
+ } else {
+ log_printf (LOGSYS_LEVEL_WARNING, "unknown mssage\n");
}
}
}
@@ -1200,6 +1229,8 @@ static int cpg_exec_send_downlist(void)
g_req_exec_cpg_downlist.header.id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_DOWNLIST);
g_req_exec_cpg_downlist.header.size = sizeof(struct req_exec_cpg_downlist);
+ memcpy (&g_req_exec_cpg_downlist.ring_id, &my_ring_id,
+ sizeof (struct memb_ring_id));
g_req_exec_cpg_downlist.old_members = my_old_member_list_entries;
@@ -1227,8 +1258,10 @@ static int cpg_exec_send_joinlist(void)
}
/* Nothing to send */
- if (!count)
+ if (!count) {
+ log_printf (LOGSYS_LEVEL_WARNING, "nothing to send\n");
return 0;
+ }
buf = alloca(sizeof(coroipc_response_header_t) + sizeof(struct join_list_entry) * count);
if (!buf) {
@@ -1244,6 +1277,7 @@ static int cpg_exec_send_joinlist(void)
if (pi->nodeid == api->totem_nodeid_get ()) {
memcpy (&jle->group_name, &pi->group, sizeof (mar_cpg_name_t));
+ log_printf (LOGSYS_LEVEL_WARNING, "sending join %s\n", jle->group_name.value);
jle->pid = pi->pid;
jle++;
}
--
1.6.6.1
More information about the Openais
mailing list