[Openais] [PATCH] Allocate buffers for packet transmission in the network drivers

Steven Dake sdake at redhat.com
Thu Mar 10 00:04:32 PST 2011


This patch looks pretty good.

One change that needs to be made before this patch can be merged is that
the transport drivers need to be aware of their own maximum frame sizes
and allocate accordingly.  Ie totemrrp_malloc() doesn't need a frame
size parameter, (but instead totemudp.c would allocate a frame size that
is appropriate for the transport (in the case of udp, something like 10k
would be safe).

Another interesting idea that may be useful for a second patch is to
keep a list of freed frames to avoid the malloc/free overhead for the
common cases of totemsrp.  Surprisingly this is quite a significant
amount of overhead (according to oprofile).

A totemnet_malloc operation would then be something like:
  if free_list has a frame, return frame and delete frame from free list
for the instance
  else malloc frame using transport driver
totemnet_free operation would be something like:
  add frame to free list

To do this "slabbing" properly, a conditional parameter is probably
needed in the transport driver definition because totemnet_free for the
iba transport actually needs to call the free function (so it can be
reposted to the send queue).

Regards
-steve

On 03/09/2011 09:45 PM, Zane Bitter wrote:
> This change paves the way for eliminating a copy within the Infiniband
> driver in the future by transferring responsibility for allocating and
> freeing message buffers to the network driver layer.
> 
> Tested under valgrind on a single-node cluster.
> 
> Signed-off-by: Zane Bitter <zane.bitter at gmail.com>
> ---
>  exec/totemiba.c  |   10 ++++++++++
>  exec/totemiba.h  |    4 ++++
>  exec/totemnet.c  |   28 ++++++++++++++++++++++++++++
>  exec/totemnet.h  |    4 ++++
>  exec/totemrrp.c  |   14 ++++++++++++++
>  exec/totemrrp.h  |    7 +++++++
>  exec/totemsrp.c  |   22 ++++++++++++++++++----
>  exec/totemudp.c  |   10 ++++++++++
>  exec/totemudp.h  |    4 ++++
>  exec/totemudpu.c |   10 ++++++++++
>  exec/totemudpu.h |    4 ++++
>  11 files changed, 113 insertions(+), 4 deletions(-)
> 
> diff --git a/exec/totemiba.c b/exec/totemiba.c
> index a0379ff..45b3fa9 100644
> --- a/exec/totemiba.c
> +++ b/exec/totemiba.c
> @@ -1317,6 +1317,16 @@ int totemiba_initialize (
>  	return (res);
>  }
>  
> +void *totemiba_malloc (size_t size)
> +{
> +	return malloc(size);
> +}
> +
> +void totemiba_free (void *ptr)
> +{
> +	return free(ptr);
> +}
> +
>  int totemiba_processor_count_set (
>  	void *iba_context,
>  	int processor_count)
> diff --git a/exec/totemiba.h b/exec/totemiba.h
> index 4b2fbbf..dddfe78 100644
> --- a/exec/totemiba.h
> +++ b/exec/totemiba.h
> @@ -62,6 +62,10 @@ extern int totemiba_initialize (
>  	void (*target_set_completed) (
>  		void *context));
>  
> +extern void *totemiba_malloc (size_t size);
> +
> +extern void totemiba_free (void *ptr);
> +
>  extern int totemiba_processor_count_set (
>  	void *iba_context,
>  	int processor_count);
> diff --git a/exec/totemnet.c b/exec/totemnet.c
> index c7670f9..6283a4f 100644
> --- a/exec/totemnet.c
> +++ b/exec/totemnet.c
> @@ -35,6 +35,8 @@
>  
>  #include <config.h>
>  
> +#include <assert.h>
> +
>  #ifdef HAVE_RDMA
>  #include <totemiba.h>
>  #endif
> @@ -67,6 +69,10 @@ struct transport {
>  		void (*target_set_completed) (
>  			void *context));
>  
> +	void *(*malloc) (size_t size);
> +
> +	void (*free) (void *ptr);
> +
>  	int (*processor_count_set) (
>  		void *transport_context,
>  		int processor_count);
> @@ -127,6 +133,8 @@ struct transport transport_entries[] = {
>  	{
>  		.name = "UDP/IP Multicast",
>  		.initialize = totemudp_initialize,
> +		.malloc = totemudp_malloc,
> +		.free = totemudp_free,
>  		.processor_count_set = totemudp_processor_count_set,
>  		.token_send = totemudp_token_send,
>  		.mcast_flush_send = totemudp_mcast_flush_send,
> @@ -145,6 +153,8 @@ struct transport transport_entries[] = {
>  	{
>  		.name = "UDP/IP Unicast",
>  		.initialize = totemudpu_initialize,
> +		.malloc = totemudpu_malloc,
> +		.free = totemudpu_free,
>  		.processor_count_set = totemudpu_processor_count_set,
>  		.token_send = totemudpu_token_send,
>  		.mcast_flush_send = totemudpu_mcast_flush_send,
> @@ -166,6 +176,8 @@ struct transport transport_entries[] = {
>  	{
>  		.name = "Infiniband/IP",
>  		.initialize = totemiba_initialize,
> +		.malloc = totemiba_malloc,
> +		.free = totemiba_free,
>  		.processor_count_set = totemiba_processor_count_set,
>  		.token_send = totemiba_token_send,
>  		.mcast_flush_send = totemiba_mcast_flush_send,
> @@ -296,6 +308,22 @@ error_destroy:
>  	return (-1);
>  }
>  
> +void *totemnet_malloc (void *net_context, size_t size)
> +{
> +	struct totemnet_instance *instance = net_context;
> +	assert (instance != NULL);
> +	assert (instance->transport != NULL);
> +	return instance->transport->malloc (size);
> +}
> +
> +void totemnet_free (void *net_context, void *ptr)
> +{
> +	struct totemnet_instance *instance = net_context;
> +	assert (instance != NULL);
> +	assert (instance->transport != NULL);
> +	instance->transport->free (ptr);
> +}
> +
>  int totemnet_processor_count_set (
>  	void *net_context,
>  	int processor_count)
> diff --git a/exec/totemnet.h b/exec/totemnet.h
> index 7e6374c..a01872c 100644
> --- a/exec/totemnet.h
> +++ b/exec/totemnet.h
> @@ -69,6 +69,10 @@ extern int totemnet_initialize (
>  	void (*target_set_completed) (
>  		void *context));
>  
> +extern void *totemnet_malloc (void *net_context, size_t size);
> +
> +extern void totemnet_free (void *net_context, void *ptr);
> +
>  extern int totemnet_processor_count_set (
>  	void *net_context,
>  	int processor_count);
> diff --git a/exec/totemrrp.c b/exec/totemrrp.c
> index a8ebd08..a3320f2 100644
> --- a/exec/totemrrp.c
> +++ b/exec/totemrrp.c
> @@ -1665,6 +1665,20 @@ error_destroy:
>  	return (res);
>  }
>  
> +void *totemrrp_malloc (void *rrp_context, size_t size)
> +{
> +	struct totemrrp_instance *instance = rrp_context;
> +	assert (instance != NULL);
> +	return totemnet_malloc (instance->net_handles[0], size);
> +}
> +
> +void totemrrp_free (void *rrp_context, void *ptr)
> +{
> +	struct totemrrp_instance *instance = rrp_context;
> +	assert (instance != NULL);
> +	totemnet_free (instance->net_handles[0], ptr);
> +}
> +
>  int totemrrp_processor_count_set (
>  	void *rrp_context,
>  	unsigned int processor_count)
> diff --git a/exec/totemrrp.h b/exec/totemrrp.h
> index da79ed2..0b291cd 100644
> --- a/exec/totemrrp.h
> +++ b/exec/totemrrp.h
> @@ -78,6 +78,13 @@ extern int totemrrp_initialize (
>  		void *context)
>  	);
>  
> +extern void *totemrrp_malloc(
> +	void *rrp_context,
> +	size_t size);
> +
> +extern void totemrrp_free(
> +	void *rrp_context,
> +	void *ptr);
>  
>  extern int totemrrp_processor_count_set (
>  	void *rrp_context,
> diff --git a/exec/totemsrp.c b/exec/totemsrp.c
> index 62794c7..3a5f1a6 100644
> --- a/exec/totemsrp.c
> +++ b/exec/totemsrp.c
> @@ -606,6 +606,8 @@ static void timer_function_heartbeat_timeout (void *data);
>  static void timer_function_token_retransmit_timeout (void *data);
>  static void timer_function_token_hold_retransmit_timeout (void *data);
>  static void timer_function_merge_detect_timeout (void *data);
> +static void *totemsrp_malloc (struct totemsrp_instance *instance, size_t size);
> +static void totemsrp_free (struct totemsrp_instance *instance, void *ptr);
>  
>  void main_deliver_fn (
>  	void *context,
> @@ -1356,6 +1358,18 @@ static void memb_set_print (
>  }
>  #endif
>  
> +static void *totemsrp_malloc (struct totemsrp_instance *instance, size_t size)
> +{
> +	assert (instance != NULL);
> +	return totemrrp_malloc (instance->totemrrp_context, size);
> +}
> +
> +static void totemsrp_free (struct totemsrp_instance *instance, void *ptr)
> +{
> +	assert (instance != NULL);
> +	totemrrp_free (instance->totemrrp_context, ptr);
> +}
> +
>  static void reset_token_retransmit_timeout (struct totemsrp_instance *instance)
>  {
>  	poll_timer_delete (instance->totemsrp_poll_handle,
> @@ -2067,7 +2081,7 @@ static void memb_state_recovery_enter (
>  		messages_originated++;
>  		memset (&message_item, 0, sizeof (struct message_item));
>  	// TODO	 LEAK
> -		message_item.mcast = malloc (FRAME_SIZE_MAX);
> +		message_item.mcast = totemsrp_malloc (instance, FRAME_SIZE_MAX);
>  		assert (message_item.mcast);
>  		message_item.mcast->header.type = MESSAGE_TYPE_MCAST;
>  		srp_addr_copy (&message_item.mcast->system_from, &instance->my_id);
> @@ -2140,7 +2154,7 @@ int totemsrp_mcast (
>  	/*
>  	 * Allocate pending item
>  	 */
> -	message_item.mcast = malloc (FRAME_SIZE_MAX);
> +	message_item.mcast = totemsrp_malloc (instance, FRAME_SIZE_MAX);
>  	if (message_item.mcast == 0) {
>  		goto error_mcast;
>  	}
> @@ -2278,7 +2292,7 @@ static void messages_free (
>  			instance->last_released + i, &ptr);
>  		if (res == 0) {
>  			regular_message = ptr;
> -			free (regular_message->mcast);
> +			totemsrp_free (instance, regular_message->mcast);
>  		}
>  		sq_items_release (&instance->regular_sort_queue,
>  			instance->last_released + i);
> @@ -3823,7 +3837,7 @@ static int message_handler_mcast (
>  		 * Allocate new multicast memory block
>  		 */
>  // TODO LEAK
> -		sort_queue_item.mcast = malloc (msg_len);
> +		sort_queue_item.mcast = totemsrp_malloc (instance, msg_len);
>  		if (sort_queue_item.mcast == NULL) {
>  			return (-1); /* error here is corrected by the algorithm */
>  		}
> diff --git a/exec/totemudp.c b/exec/totemudp.c
> index b96bdbd..a0894ed 100644
> --- a/exec/totemudp.c
> +++ b/exec/totemudp.c
> @@ -1832,6 +1832,16 @@ int totemudp_initialize (
>  	return (0);
>  }
>  
> +void *totemudp_malloc (size_t size)
> +{
> +	return malloc(size);
> +}
> +
> +void totemudp_free (void *ptr)
> +{
> +	return free(ptr);
> +}
> +
>  int totemudp_processor_count_set (
>  	void *udp_context,
>  	int processor_count)
> diff --git a/exec/totemudp.h b/exec/totemudp.h
> index 6218794..d273f31 100644
> --- a/exec/totemudp.h
> +++ b/exec/totemudp.h
> @@ -63,6 +63,10 @@ extern int totemudp_initialize (
>  	void (*target_set_completed) (
>  		void *context));
>  
> +extern void *totemudp_malloc (size_t size);
> +
> +extern void totemudp_free (void *ptr);
> +
>  extern int totemudp_processor_count_set (
>  	void *udp_context,
>  	int processor_count);
> diff --git a/exec/totemudpu.c b/exec/totemudpu.c
> index 3fad618..faf48b7 100644
> --- a/exec/totemudpu.c
> +++ b/exec/totemudpu.c
> @@ -1497,6 +1497,16 @@ int totemudpu_initialize (
>  	return (0);
>  }
>  
> +void *totemudpu_malloc (size_t size)
> +{
> +	return malloc(size);
> +}
> +
> +void totemudpu_free (void *ptr)
> +{
> +	return free(ptr);
> +}
> +
>  int totemudpu_processor_count_set (
>  	void *udpu_context,
>  	int processor_count)
> diff --git a/exec/totemudpu.h b/exec/totemudpu.h
> index 2dcad24..865a77a 100644
> --- a/exec/totemudpu.h
> +++ b/exec/totemudpu.h
> @@ -63,6 +63,10 @@ extern int totemudpu_initialize (
>  	void (*target_set_completed) (
>  		void *context));
>  
> +extern void *totemudpu_malloc (size_t size);
> +
> +extern void totemudpu_free (void *ptr);
> +
>  extern int totemudpu_processor_count_set (
>  	void *udpu_context,
>  	int processor_count);
> 
> _______________________________________________
> Openais mailing list
> Openais at lists.linux-foundation.org
> https://lists.linux-foundation.org/mailman/listinfo/openais



More information about the Openais mailing list