libxmp/libxmpf in Omni Compiler  1.3.4
xmp_reduce_tca.c File Reference
#include <cuda_runtime.h>
#include <xmp_internal.h>
#include "tca-api.h"
#include <math.h>
Include dependency graph for xmp_reduce_tca.c:

Data Structures

struct  _XMP_tca_coll_info_type
 

Macros

#define _XMP_TCA_SYNC_MARK_SIZE   sizeof(unsigned long)
 
#define _XMP_TCA_CACHE_ALIGNED_STRIDE   64
 
#define _XMP_TCA_PIO_SYNC_MARK   255
 
#define _XMP_TCA_COLL_MAX   64
 
#define _XMP_TCA_ALLREDUCE_TAG   0x100
 
#define _XMP_TCA_DEVICE_TO_HOST_WAIT_SLOT   0
 
#define _XMP_TCA_HOST_TO_DEVICE_WAIT_SLOT   1
 
#define _XMP_TCA_ALLREDUCE_TCACOPY_LIMIT   8
 
#define CUDA_CHECK(cuda_call)
 
#define OP_FUNC_3OP(name, op, type_name, type)
 
#define TCA_TYPE_FUNCTIONS(name, type)
 

Typedefs

typedef struct _XMP_tca_coll_info_type _XMP_tca_coll_info_t
 
typedef void(* tca_op_func_3op_handler_t) (void *, void *, void *, int)
 
typedef tca_op_func_3op_handler_t tca_op_fn_3op_t
 

Enumerations

enum  {
  TCA_OP_INT8, TCA_OP_UINT8, TCA_OP_INT16, TCA_OP_UINT16,
  TCA_OP_INT32, TCA_OP_UINT32, TCA_OP_INT64, TCA_OP_UINT64,
  TCA_OP_FLOAT, TCA_OP_DOUBLE, TCA_OP_LONG_DOUBLE, TCA_OP_TYPE_MAX
}
 

Functions

void _XMP_reduce_tca_NODES_ENTIRE (_XMP_nodes_t *nodes, void *addr, int count, int datatype, int op)
 
void _XMP_reduce_tca_CLAUSE (void *data_addr, int count, int datatype, int op)
 

Variables

_XMP_tca_coll_info_t coll_info
 
int _XMP_tca_coll_info_flag = 0
 

Macro Definition Documentation

◆ _XMP_TCA_ALLREDUCE_TAG

#define _XMP_TCA_ALLREDUCE_TAG   0x100

◆ _XMP_TCA_ALLREDUCE_TCACOPY_LIMIT

#define _XMP_TCA_ALLREDUCE_TCACOPY_LIMIT   8

◆ _XMP_TCA_CACHE_ALIGNED_STRIDE

#define _XMP_TCA_CACHE_ALIGNED_STRIDE   64

◆ _XMP_TCA_COLL_MAX

#define _XMP_TCA_COLL_MAX   64

◆ _XMP_TCA_DEVICE_TO_HOST_WAIT_SLOT

#define _XMP_TCA_DEVICE_TO_HOST_WAIT_SLOT   0

◆ _XMP_TCA_HOST_TO_DEVICE_WAIT_SLOT

#define _XMP_TCA_HOST_TO_DEVICE_WAIT_SLOT   1

◆ _XMP_TCA_PIO_SYNC_MARK

#define _XMP_TCA_PIO_SYNC_MARK   255

◆ _XMP_TCA_SYNC_MARK_SIZE

#define _XMP_TCA_SYNC_MARK_SIZE   sizeof(unsigned long)

◆ CUDA_CHECK

#define CUDA_CHECK (   cuda_call)
Value:
do { \
cudaError_t status = cuda_call; \
if(status != cudaSuccess) { \
fprintf(stderr, "CUDA error in file '%s' in line %i : %s.\n", \
__FILE__, __LINE__, cudaGetErrorString(status) ); \
exit(EXIT_FAILURE); \
} \
} while (0)

◆ OP_FUNC_3OP

#define OP_FUNC_3OP (   name,
  op,
  type_name,
  type 
)
Value:
static inline void tca_op_func_3op_##name##_##type_name(void *dst, void *src0, void *src1, int count) { \
int i; \
type *s0 = (type *)src0; \
type *s1 = (type *)src1; \
type *d = (type *)dst; \
for(i = 0; i < count; i++) { \
*(d++) = *(s0++) op *(s1++); \
} \
}

◆ TCA_TYPE_FUNCTIONS

#define TCA_TYPE_FUNCTIONS (   name,
  type 
)
Value:
[TCA_OP_INT8] = tca_op_func_##type##_##name##_int8, \
[TCA_OP_UINT8] = tca_op_func_##type##_##name##_uint8, \
[TCA_OP_INT16] = tca_op_func_##type##_##name##_int16, \
[TCA_OP_UINT16] = tca_op_func_##type##_##name##_uint16, \
[TCA_OP_INT32] = tca_op_func_##type##_##name##_int32, \
[TCA_OP_UINT32] = tca_op_func_##type##_##name##_uint32, \
[TCA_OP_INT64] = tca_op_func_##type##_##name##_int64, \
[TCA_OP_UINT64] = tca_op_func_##type##_##name##_uint64, \
[TCA_OP_FLOAT] = tca_op_func_##type##_##name##_float, \
[TCA_OP_DOUBLE] = tca_op_func_##type##_##name##_double, \
[TCA_OP_LONG_DOUBLE] = tca_op_func_##type##_##name##_long_double \

Typedef Documentation

◆ _XMP_tca_coll_info_t

◆ tca_op_fn_3op_t

◆ tca_op_func_3op_handler_t

typedef void(* tca_op_func_3op_handler_t) (void *, void *, void *, int)

Enumeration Type Documentation

◆ anonymous enum

anonymous enum
Enumerator
TCA_OP_INT8 
TCA_OP_UINT8 
TCA_OP_INT16 
TCA_OP_UINT16 
TCA_OP_INT32 
TCA_OP_UINT32 
TCA_OP_INT64 
TCA_OP_UINT64 
TCA_OP_FLOAT 
TCA_OP_DOUBLE 
TCA_OP_LONG_DOUBLE 
TCA_OP_TYPE_MAX 
193  {
194  TCA_OP_INT8,
195  TCA_OP_UINT8,
196  TCA_OP_INT16,
198  TCA_OP_INT32,
200  TCA_OP_INT64,
202  TCA_OP_FLOAT,
206 };

Function Documentation

◆ _XMP_reduce_tca_CLAUSE()

void _XMP_reduce_tca_CLAUSE ( void *  data_addr,
int  count,
int  datatype,
int  op 
)
440 {
441  // Not implemented
442  _XMP_fatal("_XMP_reduce_tca_CLAUSE is not implemented.");
443 }
Here is the call graph for this function:

◆ _XMP_reduce_tca_NODES_ENTIRE()

void _XMP_reduce_tca_NODES_ENTIRE ( _XMP_nodes_t nodes,
void *  addr,
int  count,
int  datatype,
int  op 
)
418 {
419  if (count == 0) {
420  return; // FIXME not good implementation
421  }
422  if (!nodes->is_member) {
423  return;
424  }
426  init_coll_info();
427  }
428 
429  MPI_Comm mpi_comm = *((MPI_Comm *)nodes->comm);
430 
431  int id = get_coll_id(dev_addr, count, datatype, op, mpi_comm);
432  if (!coll_info.flag[id]) {
433  _XMP_reduce_init_tca(dev_addr, count, datatype, op, mpi_comm, id);
434  }
435 
436  _XMP_reduce_do_tca(dev_addr, count, datatype, op, mpi_comm, id);
437 }

Variable Documentation

◆ _XMP_tca_coll_info_flag

int _XMP_tca_coll_info_flag = 0

◆ coll_info

_XMP_tca_coll_info_type::flag
_Bool flag[_XMP_TCA_COLL_MAX]
Definition: xmp_reduce_hybrid.c:30
coll_info
_XMP_tca_coll_info_t coll_info
Definition: xmp_reduce_tca.c:38
_XMP_nodes_type::is_member
int is_member
Definition: xmp_data_struct.h:46
TCA_OP_FLOAT
@ TCA_OP_FLOAT
Definition: xmp_reduce_tca.c:202
TCA_OP_TYPE_MAX
@ TCA_OP_TYPE_MAX
Definition: xmp_reduce_tca.c:205
TCA_OP_DOUBLE
@ TCA_OP_DOUBLE
Definition: xmp_reduce_tca.c:203
TCA_OP_LONG_DOUBLE
@ TCA_OP_LONG_DOUBLE
Definition: xmp_reduce_tca.c:204
TCA_OP_INT8
@ TCA_OP_INT8
Definition: xmp_reduce_tca.c:194
_XMP_tca_coll_info_flag
int _XMP_tca_coll_info_flag
Definition: xmp_reduce_tca.c:39
TCA_OP_INT32
@ TCA_OP_INT32
Definition: xmp_reduce_tca.c:198
_XMP_nodes_type::comm
_XMP_comm_t * comm
Definition: xmp_data_struct.h:53
_XMP_fatal
void _XMP_fatal(char *msg)
Definition: xmp_util.c:42
TCA_OP_UINT64
@ TCA_OP_UINT64
Definition: xmp_reduce_tca.c:201
TCA_OP_UINT16
@ TCA_OP_UINT16
Definition: xmp_reduce_tca.c:197
TCA_OP_INT16
@ TCA_OP_INT16
Definition: xmp_reduce_tca.c:196
TCA_OP_UINT32
@ TCA_OP_UINT32
Definition: xmp_reduce_tca.c:199
TCA_OP_INT64
@ TCA_OP_INT64
Definition: xmp_reduce_tca.c:200
TCA_OP_UINT8
@ TCA_OP_UINT8
Definition: xmp_reduce_tca.c:195