/**
 * @file communicator.h
 * @brief Generic discription of communication
 *
 * Generic routines used for handling the communication
 * for both local and mpi communication.
 *
 * @copyright Copyright  (C)  2013 Moritz Hanke <hanke@dkrz.de>
 *                                 Rene Redler <rene.redler@mpimet.mpg.de>
 *                                 Thomas Jahns <jahns@dkrz.de>
 *
 * @version 1.0
 * @author Moritz Hanke <hanke@dkrz.de>
 *         Rene Redler <rene.redler@mpimet.mpg.de>
 *         Thomas Jahns <jahns@dkrz.de>
 */
/*
 * Keywords:
 * Maintainer: Moritz Hanke <hanke@dkrz.de>
 *             Rene Redler <rene.redler@mpimet.mpg.de>
 *             Thomas Jahns <jahns@dkrz.de>
 * URL: https://doc.redmine.dkrz.de/YAC/html/index.html
 *
 * This file is part of YAC.
 *
 * YAC is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * YAC is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with YAC.  If not, see <http://www.gnu.org/licenses/gpl.txt>.
 */

#ifndef COMMUNICATOR_H
#define COMMUNICATOR_H

#include "dep_list.h"

#define COMM_ANYSOURCE (-1)

enum yac_comm_data_type {

   COMM_UINT,
   COMM_DBLE,
   COMM_INT,
   COMM_PACKED,
   COMM_BYTE,
   // COMM_SIZE_T, // makes problems on heterogeneous systems
                   // -> use COMM_UINT64_T instead
   COMM_UINT64_T,
   COMM_CHAR,
};

// forward declaration
struct communicator;

/**
 * message that is passed to \ref func_recv_callback
 */
struct comm_message {

   void * buffer;            //!< buffer containing the received message \n allocation and deallocation of this buffer is handled by the %communicator
   enum yac_comm_data_type type; //!< data type of the elements in buffer
   unsigned count;           //!< number of received elements in buffer
   unsigned tag;             //!< tag of the message
   unsigned dst_rank;        //!< rank of the receiver process
   unsigned src_rank;        //!< rank of the sender process
};

struct comm_request {
  void (*waitall)(unsigned count, struct comm_request ** requests);
  unsigned (*testsome)(unsigned count, struct comm_request ** requests);
  void (*free_request)(struct comm_request ** request);
};

extern const struct comm_request * const COMM_REQUEST_NULL;

//! arguments: %communicator, user_data
typedef void (*func_send_callback) (struct communicator *, struct comm_message, void *);
//! function pointer used in \ref yac_comm_recv_callback \n arguments: %communicator, message, user_data
typedef void (*func_recv_callback) (struct communicator *, struct comm_message, void *);
//! function pointer used in \ref yac_comm_recv_callback \n arguments: %communicator, user_data
typedef void (*func_recv_callback_cancel) (struct communicator *, void *);
//! function pointer used in reduce operations
typedef void (*func_comm_op)(void * invec, void * inoutvec, unsigned count,
                             enum yac_comm_data_type type);
//! 
typedef void (*func_unique_tags_callback)(struct communicator *,
                                          unsigned * unique_tags, void *);
                                       

extern func_comm_op COMM_OP_SUM;
extern func_comm_op COMM_OP_OR;

/**! priorities used by \ref yac_comm_recv_callback\n
 * (highest priority needs to have lowest numerical value)
 */ 
enum yac_message_priority {
   MSG_PRIO_HIGH = 0,
   MSG_PRIO_MIDDLE = 1,
   MSG_PRIO_LOW = 2,

   // has to be the last
   MSG_PRIO_INVALID
};

struct communicator_vtable {

   void (*free)(struct communicator * comm);
   unsigned (*get_size)(struct communicator * comm);
   unsigned (*get_rank)(struct communicator * comm);
   void (*bsend)(void const * send_buffer, unsigned count,
                 enum yac_comm_data_type data_type, unsigned dest,
                 unsigned tag, struct communicator * comm);
   void (*isend)(void const * send_buffer, unsigned count,
                 enum yac_comm_data_type data_type, unsigned dest,
                 unsigned tag, struct communicator * comm,
                 struct comm_request ** request);
   void (*recv_callback)(unsigned count, enum yac_comm_data_type data_type,
                         unsigned source, unsigned tag,
                         struct communicator * comm, void * user_data,
                         func_recv_callback callback,
                         func_recv_callback_cancel recv_callback_cancel,
                         enum yac_message_priority priority);
   void (*recv)(void * recv_buffer, unsigned * count,
                enum yac_comm_data_type data_type, unsigned source,
                unsigned tag, struct communicator * comm);
   void (*irecv)(void * recv_buffer, unsigned count,
                 enum yac_comm_data_type data_type, unsigned source,
                 unsigned tag, struct communicator * comm,
                 struct comm_request ** request);
   unsigned (*wait)(struct communicator * comm, unsigned tag);
   void (*cancel_recv)(struct communicator * comm, unsigned tag);
   void (*allgather)(void * send_buffer, unsigned send_count,
                     void * recv_buffer, unsigned recv_count, 
                     enum yac_comm_data_type data_type,
                     struct communicator * comm);
   void (*allgatherv)(void * send_buffer, unsigned send_count,
                      void * recv_buffer, unsigned * recv_counts, 
                      enum yac_comm_data_type data_type,
                      struct communicator * comm);
   struct communicator * (*dup)(struct communicator * comm);
   struct communicator * (*split)(struct communicator * comm,
                                  int color, int key);
   struct communicator * (*split_intercomm)(struct communicator * comm,
                                            int group,
                                            unsigned * local_group_ranks,
                                            unsigned * remote_group_ranks);
   unsigned (*get_remote_size)(struct communicator * comm);
   void (*abort)(struct communicator * comm);
   unsigned (*get_unique_tag)(struct communicator * comm,
                              char * identifier_string);
   void (*get_unique_tags)(struct communicator * comm,
                           char const * unique_generator_string,
                           char const ** tag_strings, unsigned num_tag_strings,
                           unsigned * tags);
   int (*pack_size)(int count, enum yac_comm_data_type type,
                    struct communicator * comm);
   void (*pack)(void * in_buffer, int in_count, enum yac_comm_data_type type,
                void * out_buffer, int out_buffer_size, int * position,
                struct communicator * comm);
   void (*unpack)(void * in_buffer, int in_size, int * position,
                  void * out_buffer, int out_count,
                  enum yac_comm_data_type type, struct communicator * comm);
   void (*translate_ranks)(struct communicator * comm_a,
                           struct communicator * comm_b,
                           unsigned * ranks);
   void (*bcast_callback)(void * buffer, unsigned count,
                          enum yac_comm_data_type data_type, unsigned root,
                          unsigned tag, struct communicator * comm,
                          void * user_data, func_recv_callback callback,
                          enum yac_message_priority priority);
   void (*gatherv_callback)(void * send_buffer, unsigned * counts, 
                            enum yac_comm_data_type data_type, unsigned root,
                            unsigned tag, struct communicator * comm,
                            void * user_data,
                            func_recv_callback callback,
                            enum yac_message_priority priority);
   void (*allgather_callback)(void * send_buffer, unsigned count, 
                              enum yac_comm_data_type data_type, unsigned tag,
                              struct communicator * comm, void * user_data,
                              func_recv_callback callback,
                              enum yac_message_priority priority);
   void (*allgatherv_callback)(void * send_buffer, unsigned * counts, 
                               enum yac_comm_data_type data_type, unsigned tag,
                               struct communicator * comm, void * user_data,
                               func_recv_callback callback,
                               enum yac_message_priority priority);
   void (*scatterv_callback)(void * send_buffer, unsigned * counts, 
                             enum yac_comm_data_type data_type, unsigned root,
                             unsigned tag, struct communicator * comm,
                             void * user_data, func_recv_callback callback,
                             enum yac_message_priority priority);
   void (*scatter_callback)(void * send_buffer, unsigned count, 
                            enum yac_comm_data_type data_type, unsigned root,
                            unsigned tag, struct communicator * comm,
                            void * user_data, func_recv_callback callback,
                            enum yac_message_priority priority);
   void (*alltoallv_callback)(void * send_buffer, unsigned * counts,
                              enum yac_comm_data_type data_type,
                              unsigned tag, struct communicator * comm,
                              void * user_data,
                              func_recv_callback callback,
                              enum yac_message_priority priority);
   void (*reduce_callback)(void * buffer, unsigned count,
                           enum yac_comm_data_type data_type,
                           func_comm_op op, unsigned root, unsigned tag,
                           struct communicator * comm, void * user_data,
                           func_recv_callback callback,
                           enum yac_message_priority priority);
   void (*allreduce_callback)(void * buffer, unsigned count,
                              enum yac_comm_data_type data_type,
                              func_comm_op op, unsigned tag,
                              struct communicator * comm, void * user_data,
                              func_recv_callback callback,
                              enum yac_message_priority priority);
   void (*barrier_callback)(unsigned tag, struct communicator * comm,
                            void * user_data,
                            func_recv_callback callback,
                            enum yac_message_priority priority);
   void (*check)(struct communicator * comm);
};

/** \brief %communicator structure
 *
 * A %communicator object can be used to communicate with other processes. It is
 * similar to the MPI_Comm. The main use of struct %communicator is to have a
 * communication mechanism that can also work without MPI. This is important for
 * testing in a non-parallel environment. Currently, this is achieved by the
 * local %communicator (see \ref communicator_local.h).
 */
struct communicator {
   struct communicator_vtable *vtable;
};

/**
 * frees all memory associated to the %communicator
 * @param[in] comm
 */
void yac_free_communicator(struct communicator * comm);

/**
 * gets the number of processes in the %communicator
 * @param[in] comm
 * @return number of processes in comm
 */
unsigned yac_get_comm_size(struct communicator * comm);

/**
 * gets the rank of the local process in the %communicator
 * @param[in] comm
 * @return rank of local process in comm
 */
unsigned yac_get_comm_rank(struct communicator * comm);

/**
 * sends a message to the specified rank in comm\n
 * this routine is similar to MPI_Bsend, however no additional buffer needs to be
 * provided by the user and the memory associated to send_buffer can be reused after
 * the call to comm_bsend
 * @param[in] send_buffer pointer to the buffer containing the data to be sent
 * @param[in] count number of data elements to be sent
 * @param[in] data_type data type of the data elements to be sent
 * @param[in] dest rank in comm of the receiver of this message
 * @param[in] tag tag for identifying the message
 * @param[in] comm
 */
void yac_comm_bsend(void const * send_buffer, unsigned count, enum yac_comm_data_type data_type,
                    unsigned dest, unsigned tag, struct communicator * comm);

/**
 * unblocking send operation similar to MPI_Isend
 * @param[in] send_buffer pointer to send buffer
 * @param[in] count gives the maximum number of elements to be sent
 * @param[in] data_type data type of the data elements to be received
 * @param[in] dest rank in comm of the receiver
 * @param[in] tag tag for identifying the message
 * @param[in] comm
 * @param[out] request request object that can be used to ensure that the
 *                     receive is completed (see \ref yac_comm_waitall)
 */
void yac_comm_isend(void const * send_buffer, unsigned count,
                    enum yac_comm_data_type data_type, unsigned dest,
                    unsigned tag, struct communicator * comm,
                    struct comm_request ** request);

/**
 * initiates a asynchronous receive operation, once the message is received, the
 * recv_callback routine will be called
 * @param[in] count maximum number of elements to be received
 * @param[in] data_type data type of the data elements to be received
 * @param[in] source rank in comm of the sender
 * @param[in] tag tag for identifying the message
 * @param[in] comm
 * @param[in] user_data a pointer that is passed to recv_callback or recv_callback_cancel if the respective routine is called
 * @param[in] recv_callback pointer to a routine that is called once the message is received
 * @param[in] recv_callback_cancel pointer to a routine that is called once this receive is canceled
 * @param[in] priority priority of the message
 * @see comm_cancel_recv
 *
 * \remark NULL is a valid value for user_data, recv_callback, and recv_callback_cancel
 */
void yac_comm_recv_callback(unsigned count, enum yac_comm_data_type data_type,
                            unsigned source, unsigned tag,
                            struct communicator * comm, void * user_data,
                            func_recv_callback recv_callback,
                            func_recv_callback_cancel recv_callback_cancel,
                            enum yac_message_priority priority);

/**
 * blocking receive operation similar to MPI_Recv
 * @param[out] recv_buffer pointer to receive buffer
 * @param[in,out] count gives the maximum number of elements to be received and returns the actual number of received elements
 * @param[in] data_type data type of the data elements to be received
 * @param[in] source rank in comm of the sender
 * @param[in] tag tag for identifying the message
 * @param[in] comm
 */
void yac_comm_recv(void * recv_buffer, unsigned * count,
                   enum yac_comm_data_type data_type, unsigned source,
                   unsigned tag, struct communicator * comm);

/**
 * unblocking receive operation similar to MPI_Irecv
 * @param[out] recv_buffer pointer to receive buffer
 * @param[in] count gives the maximum number of elements to be received
 * @param[in] data_type data type of the data elements to be received
 * @param[in] source rank in comm of the sender
 * @param[in] tag tag for identifying the message
 * @param[in] comm
 * @param[out] request request object that can be used to ensure that the
 *                     receive is completed (see \ref yac_comm_waitall)
 */
void yac_comm_irecv(void * recv_buffer, unsigned count,
                    enum yac_comm_data_type data_type, unsigned source,
                    unsigned tag, struct communicator * comm,
                    struct comm_request ** request);

/**
 * waits until all requests in the %communicator with a specific tag have been completed
 * @param[in] comm
 * @param[in] tag tag of requests to be completet
 * @returns 1 if any message has been received and processed by yac_comm_wait
 *            (message could have been with another tag on another communiator)\n
 *          0 if no message has been received and processed
 */
unsigned yac_comm_wait(struct communicator * comm, unsigned tag);

/**
 * waits until all requests in the %communicator with a specific tag have been
 * completed
 * @param[in] count number of entries in requests
 * @param[in,out] requests request objects (see \ref yac_comm_irecv)
 */
void yac_comm_waitall(unsigned count, struct comm_request ** requests);


unsigned yac_comm_testsome(unsigned count, struct comm_request ** requests);

/**
 * frees a request object
 * @param[in,out] request request object (see \ref yac_comm_isend and
 *                                        \ref yac_comm_irecv)
 */
void yac_comm_free_request(struct comm_request ** request);

/**
 * cancels a comm_recv_callback operation
 * @param[in] comm
 * @param[in] tag tag of comm_recv_callback requests that are supposed to be canceled
 * @see comm_recv_callback
 */
void yac_comm_cancel_recv(struct communicator * comm, unsigned tag);

/**
 * gathers data from all process in the %communicator (similar to MPI_Allgather)
 * @param[in] send_buffer pointer to send buffer
 * @param[in] send_count number of elements to be sent
 * @param[out] recv_buffer pointer to receive buffer
 * @param[in] recv_count number of elements received per other process
 * @param[in] data_type data type of the elements to be exchanged
 * @param[in] comm
 */
void yac_comm_allgather(void * send_buffer, unsigned send_count,
                        void * recv_buffer, unsigned recv_count,
                        enum yac_comm_data_type data_type, struct communicator * comm);

/**
 * gathers data from all process in the %communicator (similar to MPI_Allgatherv)
 * @param[in] send_buffer pointer to send buffer
 * @param[in] send_count number of elements to be sent
 * @param[out] recv_buffer pointer to receive buffer
 * @param[in] recv_counts array with the number of elements to be received per process
 * @param[in] data_type data type of the elements to be exchanged
 * @param[in] comm
 */
void yac_comm_allgatherv(void * send_buffer, unsigned send_count,
                         void * recv_buffer, unsigned * recv_counts,
                         enum yac_comm_data_type data_type, struct communicator * comm);

/**
 * duplicates a %communicator (similar to MPI_Comm_dup)
 * @param[in] comm
 * @return duplication of input communicator
 */
struct communicator * yac_comm_dup(struct communicator * comm);

/**
 * splits a %communicator (similar to MPI_Comm_split)
 * @param[in] comm
 * @param[in] color processes with the same color are in the same new_comm
 * @param[in] key specifies the rank of the local process in new_comm
 * @return new communicator
 *
 * \remark comm_split_mpi sets negative colors to MPI_UNDEFINED.
 *         Thus, processes calling comm_split with negative colors
 *         will receive the equivalent to MPI_COMM_NULL.
 */
struct communicator * yac_comm_split(struct communicator * comm,
                                     int color, int key);

/**
 * splits a %communicator into two intercommunicators (similar to MPI_Comm_split
 * in combination with MPI_Intercomm_create)
 * @param[in] comm
 * @param[in] group is either 0 or 1 and specifies which group the local process will belong to
 * @param[out] local_group_ranks contains the mapping of rank in new_comm of the local group to the ranks in comm
 * @param[out] remote_group_ranks contains the mapping of rank in new_comm of the remote group to the ranks in comm
 * @return new communicator
 *
 * \remark the user needs to provide either NULL or an array of sufficient size for local_group_ranks and remote_group_rank
 */
struct communicator * yac_comm_split_intercomm(struct communicator * comm,
                                               int group,
                                               unsigned * local_group_ranks,
                                               unsigned * remote_group_ranks);

/**
 * gets the number of processes in the remote group of a inter-communicator
 * @param[in] comm inter-communicator
 * @return number of processes in comm
 * @see comm_split_intercomm
 *
 * \remark this operation is only valid for inter-communicators
 */
unsigned yac_get_comm_remote_size(struct communicator * comm);

/**
 * terminates program all processes in the %communicator (similar to MPI_Abort)
 * @param[in] comm
 */
void yac_comm_abort(struct communicator * comm);

/**
 * generates unique tags based on the provided string
 * (which also needs to be unique)\n
 * this routine needs to be called collectively among all processes in comm
 * @param[in] comm
 * @param[in] unique_generator_string string identifying this call, needs to be
 *                                    identical on all processes
 * @param[in] tag_strings identifier strings for all tags
 * @param[in] num_tag_strings number of entries in tag_strings
 * @param[out] tags
 * @remark all processes need to provide the same tag strings, the order can be
 *         different
 */
void yac_get_unique_comm_tags(struct communicator * comm,
                              char const * unique_generator_string,
                              char const ** tag_strings,
                              unsigned num_tag_strings, unsigned * tags);

/**
 * determines the number in bytes that are required to pack the given datatype
 * with the given count
 * @param[in] count number if elements to be packed
 * @param[in] type  datatype of the elements to be packed
 * @param[in] comm
 */
int yac_comm_pack_size(int count, enum yac_comm_data_type type,
                       struct communicator * comm);

/**
 * packs data of given type and count into contiguous memory
 * @param[in]     in_buffer       input buffer
 * @param[in]     in_count        number of elements to be packed
 * @param[in]     type            datatype of elements to be packed
 * @param[out]    out_buffer      start address of output buffer
 * @param[in]     out_buffer_size size of output buffer in byte
 * @param[in,out] position        current write position in the input buffer in byte
 * @param[in]     comm
 */
void yac_comm_pack(void * in_buffer, int in_count, enum yac_comm_data_type type,
                   void * out_buffer, int out_buffer_size, int * position,
                   struct communicator * comm);

/**
 * unpacks data of given type and count into contiguous memory
 * @param[in]     in_buffer  input buffer
 * @param[in]     in_size    size of input buffer in byte
 * @param[in,out] position   current write position in the input buffer in byte
 * @param[out]    out_buffer start address of output buffer
 * @param[in]     out_count  number of elements to be unpacked
 * @param[in]     type       datatype of elements to be packed
 * @param[in]     comm
 */
void yac_comm_unpack(void * in_buffer, int in_size, int * position,
                     void * out_buffer, int out_count, enum yac_comm_data_type type,
                     struct communicator * comm);

/**
 * routine translates ranks of all processes in comm_b to those of comm_a
 * @param[in]  comm_a communicator a
 * @param[in]  comm_b communicator b
 * @param[out] ranks  array of translated ranks (sorted by the ranks in comm_b)
 * @remarks the user needs to provide an array for rank that needs to be big
 *          enougth for all ranks of comm_b
 * @remarks all processes of comm_b must be within comm_a, otherwise the result
 *          is undefined
 * @remarks the translated ranks are ordered by the order of ranks in comm_b
 */
void yac_comm_translate_ranks(struct communicator * comm_a,
                              struct communicator * comm_b,
                              unsigned * ranks);

/**
 * adds routine to a list of routines, that is called when comm_finalize is
 * called
 */
void yac_add_comm_finalize_callback(void (*callback)(void));

/**
 * initiates a asynchronous collective broadcast operation, once the operation
 * is finished locally the bcast_callback routine is called
 * @param[in] buffer send buffer (only significant on root)
 * @param[in] count number of entries in buffer (needs to be identical on all
 *                  ranks)
 * @param[in] data_type data type of the data elements in the buffer
 * @param[in] root rank of broadcast root
 * @param[in] tag tag for identifying the broadcast message
 * @param[in] comm
 * @param[in] user_data a pointer that is passed to bcast_callback if the
 *                      respective routine is called
 * @param[in] bcast_callback pointer to a routine that is called once the
 *                           broadcast operation is finished
 * @param[in] priority priority of the broadcast operation
 * @remarks while the broadcast operation is active no other message with the
 *          same tag are allowed to be sent/received in comm
 * @remarks yac_comm_wait can be used to ensure that the operation finished
 */
void yac_comm_bcast_callback(void * buffer, unsigned count,
                             enum yac_comm_data_type data_type, unsigned root,
                             unsigned tag, struct communicator * comm,
                             void * user_data, func_recv_callback bcast_callback,
                             enum yac_message_priority priority);

/**
 * gathers data from all process in the %communicator (similar to MPI_Gatherv)
 * @param[in] send_buffer pointer to send buffer
 * @param[in] counts array containing number of elements to be sent by each
 *                   process (needs to be provided by all processes)
 * @param[in] data_type data type of the elements to be exchanged
 * @param[in] root rank of the process on which the data is gathered
 * @param[in] tag tag for identifying the gatherv message
 * @param[in] comm
 * @param[in] user_data a pointer that is passed to gatherv_callback if the
 *                      respective routine is called
 * @param[in] gatherv_callback pointer to a routine that is called once the
 *                             gather operation is finished
 * @param[in] priority priority of the gatherv operation
 * @remarks while the gatherv operation is active no other message with the
 *          same tag are allowed to be sent/received in comm
 * @remarks yac_comm_wait can be used to ensure that the operation finished
 */
void yac_comm_gatherv_callback(void * send_buffer, unsigned * counts,
                               enum yac_comm_data_type data_type, unsigned root,
                               unsigned tag, struct communicator * comm,
                               void * user_data,
                               func_recv_callback gatherv_callback,
                               enum yac_message_priority priority);

/**
 * gathers data from all process in the %communicator (similar to MPI_Allgather)
 * @param[in] send_buffer pointer to send buffer
 * @param[in] count number of elements to be sent by each process
 * @param[in] data_type data type of the elements to be exchanged
 * @param[in] tag tag for identifying the allgather message
 * @param[in] comm
 * @param[in] user_data a pointer that is passed to allgather_callback if the
 *                      respective routine is called
 * @param[in] allgather_callback pointer to a routine that is called once the
 *                               allgather operation is finished
 * @param[in] priority priority of the allgather operation
 * @remarks while the allgather operation is active no other message with the
 *          same tag are allowed to be sent/received in comm
 * @remarks yac_comm_wait can be used to ensure that the operation finished
 */
void yac_comm_allgather_callback(void * send_buffer, unsigned count,
                                 enum yac_comm_data_type data_type, unsigned tag,
                                 struct communicator * comm, void * user_data,
                                 func_recv_callback allgather_callback,
                                 enum yac_message_priority priority);

/**
 * gathers data from all process in the %communicator (similar to MPI_Allgatherv)
 * @param[in] send_buffer pointer to send buffer
 * @param[in] counts array containing number of elements to be sent by each
 *                   process (needs to be provided by all processes)
 * @param[in] data_type data type of the elements to be exchanged
 * @param[in] tag tag for identifying the allgatherv message
 * @param[in] comm
 * @param[in] user_data a pointer that is passed to allgatherv_callback if the
 *                      respective routine is called
 * @param[in] allgatherv_callback pointer to a routine that is called once the
 *                                allgatherv operation is finished
 * @param[in] priority priority of the allgatherv operation
 * @remarks while the allgatherv operation is active no other message with the
 *          same tag are allowed to be sent/received in comm
 * @remarks yac_comm_wait can be used to ensure that the operation finished
 */
void yac_comm_allgatherv_callback(void * send_buffer, unsigned * counts,
                                  enum yac_comm_data_type data_type, unsigned tag,
                                  struct communicator * comm, void * user_data,
                                  func_recv_callback allgatherv_callback,
                                  enum yac_message_priority priority);

/**
 * scatters data from to process in the %communicator (similar to MPI_Scatter)
 * @param[in] send_buffer pointer to send buffer (only significant on root)
 * @param[in] count array containing number of elements to be received by each
 *                  process (needs to be provided by all processes)
 * @param[in] data_type data type of the elements to be exchanged
 * @param[in] root rank of the process from which the data is scattered
 * @param[in] tag tag for identifying the scatterv message
 * @param[in] comm
 * @param[in] user_data a pointer that is passed to scatterv_callback if the
 *                      respective routine is called
 * @param[in] scatter_callback pointer to a routine that is called once the
 *                             scatterv operation is finished
 * @param[in] priority priority of the scatterv operation
 * @remarks while the scatter operation is active no other message with the
 *          same tag are allowed to be sent/received in comm
 * @remarks yac_comm_wait can be used to ensure that the operation finished
 */
void yac_comm_scatter_callback(void * send_buffer, unsigned count,
                               enum yac_comm_data_type data_type, unsigned root,
                               unsigned tag, struct communicator * comm,
                               void * user_data,
                               func_recv_callback scatter_callback,
                               enum yac_message_priority priority);

/**
 * scatters data from to process in the %communicator (similar to MPI_Scatterv)
 * @param[in] send_buffer pointer to send buffer (only significant on root)
 * @param[in] counts array containing number of elements to be received by each
 *                   process (needs to be provided by all processes)
 * @param[in] data_type data type of the elements to be exchanged
 * @param[in] root rank of the process from which the data is scattered
 * @param[in] tag tag for identifying the scatterv message
 * @param[in] comm
 * @param[in] user_data a pointer that is passed to scatterv_callback if the
 *                      respective routine is called
 * @param[in] scatterv_callback pointer to a routine that is called once the
 *                              scatterv operation is finished
 * @param[in] priority priority of the scatterv operation
 * @remarks while the scatterv operation is active no other message with the
 *          same tag are allowed to be sent/received in comm
 * @remarks yac_comm_wait can be used to ensure that the operation finished
 */
void yac_comm_scatterv_callback(void * send_buffer, unsigned * counts,
                                enum yac_comm_data_type data_type, unsigned root,
                                unsigned tag, struct communicator * comm,
                                void * user_data,
                                func_recv_callback scatterv_callback,
                                enum yac_message_priority priority);

/**
 * exchanges data between all processes within the %communicator
 * @param[in] send_buffer pointer to send buffer
 * @param[in] counts number of elements for each process in comm
 * @param[in] data_type data type of the elements to be exchanged
 * @param[in] tag tag for identifying the alltoallv message
 * @param[in] comm
 * @param[in] user_data a pointer that is passed to alltoallv_callback if the
 *                      respective routine is called
 * @param[in] alltoallv_callback pointer to a routine that is called once the
 *                               alltoallv operation is finished
 * @param[in] priority priority of the alltoallv operation
 * @remarks while the alltoallv operation is active no other message with the
 *          same tag are allowed to be sent/received in comm
 * @remarks yac_comm_wait can be used to ensure that the operation finished
 */
void yac_comm_alltoallv_callback(void * send_buffer, unsigned * counts,
                                 enum yac_comm_data_type data_type,
                                 unsigned tag, struct communicator * comm,
                                 void * user_data,
                                 func_recv_callback alltoallv_callback,
                                 enum yac_message_priority priority);

/**
 * applies operation to the combination of all values and collects the result
 * the specified root process
 * @param[in] buffer pointer to data to be combined
 * @param[in] count number of elements in buffer
 * @param[in] data_type data type of the elements in buffer
 * @param[in] op operation to be applied in the reduction (see \ref func_comm_op)
 * @param[in] root rank on which the final result is available
 * @param[in] tag tag for identifying the reduce messages
 * @param[in] comm
 * @param[in] user_data a pointer that is passed to reduce_callback if the
 *                      respective routine is called
 * @param[in] reduce_callback pointer to a routine that is called once the
 *                            reduce operation is finished
 * @param[in] priority priority of the reduce operation
 * @remarks while the reduce operation is active no other message with the
 *          same tag are allowed to be sent/received in comm
 * @remarks yac_comm_wait can be used to ensure that the operation finished
 * @remarks the number of elements in buffer has to be the same for all
 *          processes
 */
void yac_comm_reduce_callback(void * buffer, unsigned count,
                              enum yac_comm_data_type data_type,
                              func_comm_op op, unsigned root, unsigned tag,
                              struct communicator * comm, void * user_data,
                              func_recv_callback reduce_callback,
                              enum yac_message_priority priority);

/**
 * applies operation to the combination of all values and distributes the result
 * @param[in] buffer pointer to data to be combined
 * @param[in] count number of elements in buffer
 * @param[in] data_type data type of the elements in buffer
 * @param[in] op operation to be applied in the reduction (see \ref func_comm_op)
 * @param[in] tag tag for identifying the allreduce messages
 * @param[in] comm
 * @param[in] user_data a pointer that is passed to allreduce_callback if the
 *                      respective routine is called
 * @param[in] allreduce_callback pointer to a routine that is called once the
 *                               allreduce operation is finished
 * @param[in] priority priority of the allreduce operation
 * @remarks while the allreduce operation is active no other message with the
 *          same tag are allowed to be sent/received in comm
 * @remarks yac_comm_wait can be used to ensure that the operation finished
 * @remarks the number of elements in buffer has to be the same for all
 *          processes
 */
void yac_comm_allreduce_callback(void * buffer, unsigned count,
                                 enum yac_comm_data_type data_type,
                                 func_comm_op op, unsigned tag,
                                 struct communicator * comm, void * user_data,
                                 func_recv_callback allreduce_callback,
                                 enum yac_message_priority priority);

/**
 * a call to \ref yac_comm_wait for the provided tag will not return until all
 * processes in comm have called \ref yac_comm_barrier_callback with the same
 * tag
 * @param[in] tag tag for identifying the barrier messages
 * @param[in] comm
 * @param[in] user_data a pointer that is passed to allreduce_callback if the
 *                      respective routine is called
 * @param[in] barrier_callback pointer to a routine that is called once the
 *                             barrier operation is finished
 * @param[in] priority priority of the barrier operation
 */
void yac_comm_barrier_callback(unsigned tag, struct communicator * comm,
                               void * user_data,
                               func_recv_callback barrier_callback,
                               enum yac_message_priority priority);

/**
 * Passes control to the communication layer in order for it to be able to
 * process asynchronous messages.
 * This routine can be called in task that are computationally intensive.
 * @param[in] comm
 */
void yac_comm_check(struct communicator * comm);

/**
 * terminates the parallel environment
 */
void yac_comm_finalize();

#endif // COMMUNICATOR_H

