libxmp/libxmpf in Omni Compiler  1.3.4
xmp_reduce_gpu.c File Reference
#include <xmp_internal.h>
Include dependency graph for xmp_reduce_gpu.c:

Functions

void _XMP_reduce_gpu_NODES_ENTIRE (_XMP_nodes_t *nodes, void *addr, int count, int datatype, int op)
 
void _XMP_reduce_gpu_FLMM_NODES_ENTIRE (_XMP_nodes_t *nodes, void *addr, int count, int datatype, int op, int num_locs,...)
 
void _XMP_reduce_gpu_CLAUSE (void *data_addr, int count, int datatype, int op)
 
void _XMP_reduce_gpu_FLMM_CLAUSE (void *data_addr, int count, int datatype, int op, int num_locs,...)
 

Function Documentation

◆ _XMP_reduce_gpu_CLAUSE()

void _XMP_reduce_gpu_CLAUSE ( void *  data_addr,
int  count,
int  datatype,
int  op 
)
144  {
145  // setup information
146  MPI_Datatype mpi_datatype = MPI_DATATYPE_NULL;
147  size_t datatype_size = 0;
148  MPI_Op mpi_op;
149  _XMP_setup_reduce_type(&mpi_datatype, &datatype_size, datatype);
150  _XMP_setup_reduce_op(&mpi_op, op);
151 
152  size_t size = datatype_size * count;
153  void *host_buf = _XMP_alloc(size);
154 
155  // copy dev to host
156  //e = cudaMemcpy(host_buf, dev_addr, size, cudaMemcpyDeviceToHost);
157  _XACC_memory_read(host_buf, dev_addr, 0, size, _XACC_QUEUE_NULL, true);
158 
159  // reduce
160  MPI_Allreduce(MPI_IN_PLACE, host_buf, count, mpi_datatype, mpi_op, *((MPI_Comm *)(_XMP_get_execution_nodes())->comm));
161 
162  // copy host to dev
163  //e = cudaMemcpy(dev_addr, host_buf, size, cudaMemcpyHostToDevice);
164  _XACC_memory_write(dev_addr, 0, host_buf, size, _XACC_QUEUE_NULL, true);
165 
166  _XMP_free(host_buf);
167 }

◆ _XMP_reduce_gpu_FLMM_CLAUSE()

void _XMP_reduce_gpu_FLMM_CLAUSE ( void *  data_addr,
int  count,
int  datatype,
int  op,
int  num_locs,
  ... 
)

◆ _XMP_reduce_gpu_FLMM_NODES_ENTIRE()

void _XMP_reduce_gpu_FLMM_NODES_ENTIRE ( _XMP_nodes_t nodes,
void *  addr,
int  count,
int  datatype,
int  op,
int  num_locs,
  ... 
)

◆ _XMP_reduce_gpu_NODES_ENTIRE()

void _XMP_reduce_gpu_NODES_ENTIRE ( _XMP_nodes_t nodes,
void *  addr,
int  count,
int  datatype,
int  op 
)
113 {
114  if (count == 0) {
115  return; // FIXME not good implementation
116  }
117  if (!nodes->is_member) {
118  return;
119  }
120 
121  // setup information
122  MPI_Datatype mpi_datatype = MPI_DATATYPE_NULL;
123  size_t datatype_size = 0;
124  MPI_Op mpi_op;
125  _XMP_setup_reduce_type(&mpi_datatype, &datatype_size, datatype);
126  _XMP_setup_reduce_op(&mpi_op, op);
127 
128  size_t size = datatype_size * count;
129  void *host_buf = _XMP_alloc(size);
130 
131  // copy dev to host
132  //e = cudaMemcpy(host_buf, dev_addr, size, cudaMemcpyDeviceToHost);
133  _XACC_memory_read(host_buf, dev_addr, 0, size, _XACC_QUEUE_NULL, true);
134 
135  MPI_Allreduce(MPI_IN_PLACE, host_buf, count, mpi_datatype, mpi_op, *((MPI_Comm *)nodes->comm));
136 
137  // copy host to dev
138  //e = cudaMemcpy(dev_addr, host_buf, size, cudaMemcpyHostToDevice);
139  _XACC_memory_write(dev_addr, 0, host_buf, size, _XACC_QUEUE_NULL, true);
140 
141  _XMP_free(host_buf);
142 }
_XMP_nodes_type::is_member
int is_member
Definition: xmp_data_struct.h:46
_XMP_alloc
void * _XMP_alloc(size_t size)
Definition: xmp_util.c:21
_XMP_setup_reduce_type
void _XMP_setup_reduce_type(MPI_Datatype *mpi_datatype, size_t *datatype_size, int datatype)
Definition: xmp_reduce.c:13
_XACC_memory_read
void _XACC_memory_read(void *addr, _XACC_memory_t memory, size_t memory_offset, size_t size, _XACC_queue_t queue, bool is_blocking)
Definition: xacc_util_cl.c:73
_XACC_memory_write
void _XACC_memory_write(_XACC_memory_t memory, size_t memory_offset, void *addr, size_t size, _XACC_queue_t queue, bool is_blocking)
Definition: xacc_util_cl.c:81
_XMP_free
void _XMP_free(void *p)
Definition: xmp_util.c:37
_XMP_nodes_type::comm
_XMP_comm_t * comm
Definition: xmp_data_struct.h:53
_XMP_get_execution_nodes
void * _XMP_get_execution_nodes(void)
Definition: xmp_nodes_stack.c:46