libxmp/libxmpf in Omni Compiler  1.3.4
xmp_reduce_acc.c File Reference
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include "xmp_internal.h"
Include dependency graph for xmp_reduce_acc.c:

Macros

#define MPI_PORTABLE_PLATFORM_H
 

Functions

void _XMP_reduce_gpu_NODES_ENTIRE (_XMP_nodes_t *nodes, void *addr, int count, int datatype, int op)
 
void _XMP_reduce_gpu_CLAUSE (void *data_addr, int count, int datatype, int op)
 
void _XMP_reduce_acc_NODES_ENTIRE (_XMP_nodes_t *nodes, void *data_addr, int count, int datatype, int op)
 
void _XMP_reduce_acc_FLMM_NODES_ENTIRE (_XMP_nodes_t *nodes, void *addr, int count, int datatype, int op, int num_locs,...)
 
void _XMP_reduce_acc_CLAUSE (void *data_addr, int count, int datatype, int op)
 
void _XMP_reduce_acc_FLMM_CLAUSE (void *data_addr, int count, int datatype, int op, int num_locs,...)
 

Macro Definition Documentation

◆ MPI_PORTABLE_PLATFORM_H

#define MPI_PORTABLE_PLATFORM_H

Function Documentation

◆ _XMP_reduce_acc_CLAUSE()

void _XMP_reduce_acc_CLAUSE ( void *  data_addr,
int  count,
int  datatype,
int  op 
)
69 {
70  set_comm_mode();
71 
72  if(comm_mode >= 1){
73  _XMP_reduce_CLAUSE(data_addr, count, datatype, op);
74  }else{
75 #ifdef _XMP_TCA
76  //
77 #else
78  _XMP_reduce_gpu_CLAUSE(data_addr, count, datatype, op);
79 #endif
80 
81  }
82 }

◆ _XMP_reduce_acc_FLMM_CLAUSE()

void _XMP_reduce_acc_FLMM_CLAUSE ( void *  data_addr,
int  count,
int  datatype,
int  op,
int  num_locs,
  ... 
)
85 {
86  _XMP_fatal("_XMP_reduce_acc_FLMM_CLAUSE is unimplemented");
87 }
Here is the call graph for this function:

◆ _XMP_reduce_acc_FLMM_NODES_ENTIRE()

void _XMP_reduce_acc_FLMM_NODES_ENTIRE ( _XMP_nodes_t nodes,
void *  addr,
int  count,
int  datatype,
int  op,
int  num_locs,
  ... 
)
64 {
65  _XMP_fatal("_XMP_reduce_acc_FLMM_NODES_ENTIRE is unimplemented");
66 }
Here is the call graph for this function:

◆ _XMP_reduce_acc_NODES_ENTIRE()

void _XMP_reduce_acc_NODES_ENTIRE ( _XMP_nodes_t nodes,
void *  data_addr,
int  count,
int  datatype,
int  op 
)
44 {
45  set_comm_mode();
46 
47  if(comm_mode >= 1){
48  _XMP_reduce_NODES_ENTIRE(nodes, data_addr, count, datatype, op);
49  }else{
50 #ifdef _XMP_TCA
51  set_hybrid_comm();
52  if (enable_hybrid) {
53  _XMP_reduce_hybrid_NODES_ENTIRE(nodes, data_addr, count, datatype, op);
54  } else {
55  _XMP_reduce_tca_NODES_ENTIRE(nodes, data_addr, count, datatype, op);
56  }
57 #else
58  _XMP_reduce_gpu_NODES_ENTIRE(nodes, data_addr, count, datatype, op);
59 #endif
60  }
61 }

◆ _XMP_reduce_gpu_CLAUSE()

void _XMP_reduce_gpu_CLAUSE ( void *  data_addr,
int  count,
int  datatype,
int  op 
)
144  {
145  // setup information
146  MPI_Datatype mpi_datatype = MPI_DATATYPE_NULL;
147  size_t datatype_size = 0;
148  MPI_Op mpi_op;
149  _XMP_setup_reduce_type(&mpi_datatype, &datatype_size, datatype);
150  _XMP_setup_reduce_op(&mpi_op, op);
151 
152  size_t size = datatype_size * count;
153  void *host_buf = _XMP_alloc(size);
154 
155  // copy dev to host
156  //e = cudaMemcpy(host_buf, dev_addr, size, cudaMemcpyDeviceToHost);
157  _XACC_memory_read(host_buf, dev_addr, 0, size, _XACC_QUEUE_NULL, true);
158 
159  // reduce
160  MPI_Allreduce(MPI_IN_PLACE, host_buf, count, mpi_datatype, mpi_op, *((MPI_Comm *)(_XMP_get_execution_nodes())->comm));
161 
162  // copy host to dev
163  //e = cudaMemcpy(dev_addr, host_buf, size, cudaMemcpyHostToDevice);
164  _XACC_memory_write(dev_addr, 0, host_buf, size, _XACC_QUEUE_NULL, true);
165 
166  _XMP_free(host_buf);
167 }

◆ _XMP_reduce_gpu_NODES_ENTIRE()

void _XMP_reduce_gpu_NODES_ENTIRE ( _XMP_nodes_t nodes,
void *  addr,
int  count,
int  datatype,
int  op 
)
113 {
114  if (count == 0) {
115  return; // FIXME not good implementation
116  }
117  if (!nodes->is_member) {
118  return;
119  }
120 
121  // setup information
122  MPI_Datatype mpi_datatype = MPI_DATATYPE_NULL;
123  size_t datatype_size = 0;
124  MPI_Op mpi_op;
125  _XMP_setup_reduce_type(&mpi_datatype, &datatype_size, datatype);
126  _XMP_setup_reduce_op(&mpi_op, op);
127 
128  size_t size = datatype_size * count;
129  void *host_buf = _XMP_alloc(size);
130 
131  // copy dev to host
132  //e = cudaMemcpy(host_buf, dev_addr, size, cudaMemcpyDeviceToHost);
133  _XACC_memory_read(host_buf, dev_addr, 0, size, _XACC_QUEUE_NULL, true);
134 
135  MPI_Allreduce(MPI_IN_PLACE, host_buf, count, mpi_datatype, mpi_op, *((MPI_Comm *)nodes->comm));
136 
137  // copy host to dev
138  //e = cudaMemcpy(dev_addr, host_buf, size, cudaMemcpyHostToDevice);
139  _XACC_memory_write(dev_addr, 0, host_buf, size, _XACC_QUEUE_NULL, true);
140 
141  _XMP_free(host_buf);
142 }
_XMP_reduce_hybrid_NODES_ENTIRE
void _XMP_reduce_hybrid_NODES_ENTIRE(_XMP_nodes_t *nodes, void *addr, int count, int datatype, int op)
Definition: xmp_reduce_hybrid.c:437
_XMP_nodes_type::is_member
int is_member
Definition: xmp_data_struct.h:46
_XMP_alloc
void * _XMP_alloc(size_t size)
Definition: xmp_util.c:21
_XMP_setup_reduce_type
void _XMP_setup_reduce_type(MPI_Datatype *mpi_datatype, size_t *datatype_size, int datatype)
Definition: xmp_reduce.c:13
_XACC_memory_read
void _XACC_memory_read(void *addr, _XACC_memory_t memory, size_t memory_offset, size_t size, _XACC_queue_t queue, bool is_blocking)
Definition: xacc_util_cl.c:73
_XACC_memory_write
void _XACC_memory_write(_XACC_memory_t memory, size_t memory_offset, void *addr, size_t size, _XACC_queue_t queue, bool is_blocking)
Definition: xacc_util_cl.c:81
_XMP_reduce_gpu_NODES_ENTIRE
void _XMP_reduce_gpu_NODES_ENTIRE(_XMP_nodes_t *nodes, void *addr, int count, int datatype, int op)
Definition: xmp_reduce_gpu.c:112
_XMP_reduce_NODES_ENTIRE
void _XMP_reduce_NODES_ENTIRE(void *nodes, void *addr, int count, int datatype, int op)
_XMP_free
void _XMP_free(void *p)
Definition: xmp_util.c:37
_XMP_reduce_CLAUSE
void _XMP_reduce_CLAUSE(void *data_addr, int count, int datatype, int op)
Definition: xmp_reduce.c:351
_XMP_reduce_gpu_CLAUSE
void _XMP_reduce_gpu_CLAUSE(void *data_addr, int count, int datatype, int op)
Definition: xmp_reduce_gpu.c:144
_XMP_nodes_type::comm
_XMP_comm_t * comm
Definition: xmp_data_struct.h:53
_XMP_fatal
void _XMP_fatal(char *msg)
Definition: xmp_util.c:42
_XMP_reduce_tca_NODES_ENTIRE
void _XMP_reduce_tca_NODES_ENTIRE(_XMP_nodes_t *nodes, void *addr, int count, int datatype, int op)
Definition: xmp_reduce_tca.c:417
_XMP_get_execution_nodes
void * _XMP_get_execution_nodes(void)
Definition: xmp_nodes_stack.c:46