libxmp/libxmpf in Omni Compiler  1.3.4
xmp_reflect.c File Reference
#include "xmp_internal.h"
#include <string.h>
#include <stdio.h>
#include <math.h>
Include dependency graph for xmp_reflect.c:

Functions

void _XMP_reflect_pcopy_sched_dim (_XMP_array_t *adesc, int target_dim, int lwidth, int uwidth, int is_periodic, int shadow_comm_type)
 
int _XMP_get_owner_pos (_XMP_array_t *a, int dim, int index)
 
void _XMP_reflect_pack_dim (_XMP_array_t *a, int i, int *lwidth, int *uwidth, int *is_periodic, int shadow_comm_type)
 
void _XMP_reflect_async_cardinal (_XMP_array_t *a, int async_id)
 
void _XMP_reflect_async_ordinal (_XMP_array_t *a, int async_id)
 
void xmp_dbg_printf (char *fmt,...)
 
void _XMP_set_reflect__ (_XMP_array_t *a, int dim, int lwidth, int uwidth, int is_periodic)
 
void _XMP_reflect__ (_XMP_array_t *a)
 
void _XMP_reflect_async__ (_XMP_array_t *a, int async_id)
 

Variables

int _xmp_reflect_pack_flag
 

Function Documentation

◆ _XMP_get_owner_pos()

int _XMP_get_owner_pos ( _XMP_array_t a,
int  dim,
int  index 
)
1688  {
1689 
1690  int align_offset = a->info[dim].align_subscript;
1691 
1692  int tdim = a->info[dim].align_template_index;
1693  int tlb = a->align_template->info[tdim].ser_lower;
1694  int chunk = a->align_template->chunk[tdim].par_chunk_width;
1695 
1696  int pos;
1697  switch (a->info[dim].align_manner){
1698 
1699  case _XMP_N_ALIGN_BLOCK:
1700  pos = (index + align_offset - tlb) / chunk;
1701  return pos;
1702 
1703  case _XMP_N_ALIGN_GBLOCK:
1704  {
1705  int tpos = index + align_offset; // tlb is not subtracted because the mapping_array is 1-origin.
1706  long long *m = a->align_template->chunk[tdim].mapping_array;
1707  int np = a->align_template->chunk[tdim].onto_nodes_info->size;
1708  for (int i = 0; i < np; i++){
1709  if (m[i] <= tpos && tpos < m[i+1]){
1710  return i;
1711  }
1712  }
1713  }
1714  }
1715 
1716  _XMP_fatal("cannot calculate position");
1717  return -1;
1718 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ _XMP_reflect__()

void _XMP_reflect__ ( _XMP_array_t a)
120 {
121 
122  int is_ordinal = 1;
123 
124  //_XMP_RETURN_IF_SINGLE;
125  if (!a->is_allocated){
126  _xmp_set_reflect_flag = 0;
127  return;
128  }
129 
130  if (!_xmp_set_reflect_flag){
131  for (int i = 0; i < a->dim; i++){
132  _XMP_array_info_t *ai = &(a->info[i]);
133  _xmp_lwidth[i] = ai->shadow_size_lo;
134  _xmp_uwidth[i] = ai->shadow_size_hi;
135  _xmp_is_periodic[i] = 0;
136  }
137  }
138 
139  _XMP_TSTART(t0);
140  for (int i = 0; i < a->dim; i++){
141 
142  _XMP_array_info_t *ai = &(a->info[i]);
143 
144  if (ai->shadow_type == _XMP_N_SHADOW_NONE){
145  continue;
146  }
147  else if (ai->shadow_type == _XMP_N_SHADOW_NORMAL){
148 
149  _XMP_reflect_sched_t *reflect = ai->reflect_sched;
150 
151  if (_xmp_lwidth[i] || _xmp_uwidth[i]){
152 
153  _XMP_ASSERT(reflect);
154 
155  /* if (!reflect->reflect_is_initialized || */
156  /* _xmp_lwidth[i] != reflect->lo_width || */
157  /* _xmp_uwidth[i] != reflect->hi_width || */
158  /* _xmp_is_periodic[i] != reflect->is_periodic){ */
159 
160  /* reflect->lo_width = _xmp_lwidth[i]; */
161  /* reflect->hi_width = _xmp_uwidth[i]; */
162  /* reflect->is_periodic = _xmp_is_periodic[i]; */
163 
164  /* if (_xmp_reflect_pack_flag){ */
165  /* _XMP_reflect_pcopy_sched_dim(a, i, _xmp_lwidth[i], _xmp_uwidth[i], _xmp_is_periodic[i], 0); */
166  /* } */
167  /* else { */
168  /* _XMP_reflect_normal_sched_dim(a, i, _xmp_lwidth[i], _xmp_uwidth[i], _xmp_is_periodic[i]); */
169  /* } */
170 
171  /* reflect->reflect_is_initialized = 1; */
172  /* } */
173 
174  if (!reflect->reflect_is_initialized ||
175  _xmp_lwidth[i] != reflect->lo_width ||
176  _xmp_uwidth[i] != reflect->hi_width ||
177  _xmp_is_periodic[i] != reflect->is_periodic){
178 
180  _XMP_reflect_pcopy_sched_dim(a, i, _xmp_lwidth[i], _xmp_uwidth[i], _xmp_is_periodic[i], _XMP_COMM_REFLECT);
181  }
182  else {
183  _XMP_reflect_normal_sched_dim(a, i, _xmp_lwidth[i], _xmp_uwidth[i], _xmp_is_periodic[i]);
184  }
185 
186  reflect->reflect_is_initialized = 1;
187  reflect->lo_width = _xmp_lwidth[i];
188  reflect->hi_width = _xmp_uwidth[i];
189  reflect->is_periodic = _xmp_is_periodic[i];
190  }
191 
192  if (_xmp_reflect_pack_flag && reflect->req[0] != MPI_REQUEST_NULL){
193  _XMP_TSTART(t0);
194  _XMP_reflect_pack_dim(a, i, _xmp_lwidth, _xmp_uwidth, _xmp_is_periodic, _XMP_COMM_REFLECT);
195  _XMP_TEND(xmptiming_.t_copy, t0);
196  }
197 
198  _XMP_TSTART(t0);
199  if (reflect->req[0] != MPI_REQUEST_NULL) // if req[0] isn't null, any others shouldn't be null.
200  MPI_Startall(4, reflect->req);
201  _XMP_TEND2(xmptiming_.t_comm, xmptiming_.tdim_comm[i], t0);
202 
203  if (is_ordinal){
204  _XMP_TSTART(t0);
205  MPI_Waitall(4, reflect->req, MPI_STATUSES_IGNORE);
206  _XMP_TEND2(xmptiming_.t_wait, xmptiming_.tdim_wait[i], t0);
207  if (_xmp_reflect_pack_flag && reflect->req[0] != MPI_REQUEST_NULL){
208  _XMP_TSTART(t0);
209  _XMP_reflect_unpack_dim(a, i, _xmp_lwidth, _xmp_uwidth, _xmp_is_periodic);
210  _XMP_TEND(xmptiming_.t_copy, t0);
211  }
212  }
213 
214  }
215 
216  }
217  else { /* _XMP_N_SHADOW_FULL */
219  }
220 
221  }
222  _XMP_TEND(xmptiming_.t_sched, t0);
223 
224  // t0 = MPI_Wtime();
225  if (!is_ordinal)
226  _XMP_reflect_wait(a, _xmp_lwidth, _xmp_uwidth, _xmp_is_periodic);
227  // t_wait = t_wait + (MPI_Wtime() - t0);
228 
229  _xmp_set_reflect_flag = 0;
230  for (int i = 0; i < a->dim; i++){
231  _xmp_lwidth[i] = 0;
232  _xmp_uwidth[i] = 0;
233  _xmp_is_periodic[i] = 0;
234  }
235 
236 }

◆ _XMP_reflect_async__()

void _XMP_reflect_async__ ( _XMP_array_t a,
int  async_id 
)
942  {
943 
944  int is_ordinal = 1;
945 
946  //_XMP_RETURN_IF_SINGLE;
947  if (!a->is_allocated){
948  _xmp_set_reflect_flag = 0;
949  return;
950  }
951 
952  if (!_xmp_set_reflect_flag){
953  for (int i = 0; i < a->dim; i++){
954  _XMP_array_info_t *ai = &(a->info[i]);
955  if (ai->shadow_type == _XMP_N_SHADOW_FULL){
956  _XMP_fatal("asynchronous reflect for full shadow not supported.");
957  }
958  _xmp_lwidth[i] = ai->shadow_size_lo;
959  _xmp_uwidth[i] = ai->shadow_size_hi;
960  _xmp_is_periodic[i] = 0;
961  }
962  }
963 
964  int reflect_ndims = 0;
965  for (int i = 0; i < a->dim; i++){
966  if (_xmp_lwidth[i] || _xmp_uwidth[i]){
967  reflect_ndims++;
968  }
969  }
970 
971  if (reflect_ndims == 0){
972  return;
973  }
974  else if (reflect_ndims == 1 || !is_ordinal){
975  _XMP_reflect_async_cardinal(a, async_id);
976  }
977  else {
978  _XMP_reflect_async_ordinal(a, async_id);
979  }
980 
981  _xmp_set_reflect_flag = 0;
982  for (int i = 0; i < a->dim; i++){
983  _xmp_lwidth[i] = 0;
984  _xmp_uwidth[i] = 0;
985  _xmp_is_periodic[i] = 0;
986  }
987 
988 }

◆ _XMP_reflect_async_cardinal()

void _XMP_reflect_async_cardinal ( _XMP_array_t a,
int  async_id 
)
992 {
993 
995  MPI_Request *reqs = &async->reqs[async->nreqs];
996  int nreqs = 0;
997 
998  _XMP_TSTART(t0);
999  for (int i = 0; i < a->dim; i++){
1000 
1001  _XMP_array_info_t *ai = &(a->info[i]);
1002 
1003  if (ai->shadow_type == _XMP_N_SHADOW_NONE){
1004  continue;
1005  }
1006  else if (ai->shadow_type == _XMP_N_SHADOW_NORMAL){
1007 
1008  _XMP_reflect_sched_t *reflect = ai->reflect_sched;
1009 
1010  if (_xmp_lwidth[i] || _xmp_uwidth[i]){
1011 
1012  _XMP_ASSERT(reflect);
1013 
1014  if (reflect->is_periodic == -1 /* not set yet */ ||
1015  _xmp_lwidth[i] != reflect->lo_width ||
1016  _xmp_uwidth[i] != reflect->hi_width ||
1017  _xmp_is_periodic[i] != reflect->is_periodic){
1018 
1019  reflect->lo_width = _xmp_lwidth[i];
1020  reflect->hi_width = _xmp_uwidth[i];
1021  reflect->is_periodic = _xmp_is_periodic[i];
1022 
1023  _XMP_reflect_normal_sched_dim(a, i, _xmp_lwidth[i], _xmp_uwidth[i], _xmp_is_periodic[i]);
1024 
1025  }
1026 
1027  if (async->nreqs + nreqs + 4 > _XMP_MAX_ASYNC_REQS){
1028  _XMP_fatal("too many arrays in an asynchronous reflect");
1029  }
1030  memcpy(&reqs[nreqs], reflect->req, 4 * sizeof(MPI_Request));
1031  nreqs += 4;
1032 
1033  _XMP_TSTART(t0);
1034  if (reflect->req[0] != MPI_REQUEST_NULL) // if req[0] isn't null, any others shouldn't be null.
1035  MPI_Startall(4, reflect->req);
1036  _XMP_TEND2(xmptiming_.t_comm, xmptiming_.tdim_comm[i], t0);
1037 
1038  }
1039 
1040  }
1041  else { /* _XMP_N_SHADOW_FULL */
1043  }
1044 
1045  }
1046  _XMP_TEND(xmptiming_.t_sched, t0);
1047 
1048  async->nreqs += nreqs;
1049 
1050 }
Here is the call graph for this function:

◆ _XMP_reflect_async_ordinal()

void _XMP_reflect_async_ordinal ( _XMP_array_t a,
int  async_id 
)
1055  {
1056 
1057  int n = a->dim;
1058  _XMP_async_reflect_t *async_reflect;
1059 
1060  _Bool reusable_sched = false;
1061 
1062  if (!a->async_reflect){
1063  int max_nreqs = (pow(3, n) - 1) * 2;
1064  async_reflect = (_XMP_async_reflect_t *)_XMP_alloc(sizeof(_XMP_async_reflect_t));
1065  async_reflect->datatype = (MPI_Datatype *)_XMP_alloc(sizeof(MPI_Datatype) * max_nreqs);
1066  async_reflect->reqs = (MPI_Request *)_XMP_alloc(sizeof(MPI_Request) * max_nreqs);
1067  for (int i = 0; i < max_nreqs; i++){
1068  async_reflect->datatype[i] = MPI_DATATYPE_NULL;
1069  async_reflect->reqs[i] = MPI_REQUEST_NULL;
1070  }
1071  async_reflect->nreqs = 0;
1072  a->async_reflect = async_reflect;
1073  }
1074  else {
1075  reusable_sched = true;
1076  async_reflect = a->async_reflect;
1077  for (int i = 0; i < n; i++){
1078  if (async_reflect->lwidth[i] != _xmp_lwidth[i] ||
1079  async_reflect->uwidth[i] != _xmp_uwidth[i] ||
1080  async_reflect->is_periodic[i] != _xmp_is_periodic[i]){
1081  reusable_sched = false;
1082  break;
1083  }
1084  }
1085  }
1086 
1087  if (!reusable_sched){
1088 
1089  int lb[_XMP_N_MAX_DIM] = { 0 };
1090  int ub[_XMP_N_MAX_DIM] = { 0 };
1091 
1092  for (int i = 0; i < n; i++){
1093  async_reflect->lwidth[i] = _xmp_lwidth[i];
1094  async_reflect->uwidth[i] = _xmp_uwidth[i];
1095  async_reflect->is_periodic[i] = _xmp_is_periodic[i];
1096 
1097  if (_xmp_lwidth[i] > 0) lb[i] = -1;
1098  if (_xmp_uwidth[i] > 0) ub[i] = 1;
1099  }
1100 
1101  for (int i = 0; i < async_reflect->nreqs; i++){
1102  if (async_reflect->datatype[i] != MPI_DATATYPE_NULL)
1103  MPI_Type_free(&async_reflect->datatype[i]);
1104  if (async_reflect->reqs[i] != MPI_REQUEST_NULL)
1105  MPI_Request_free(&async_reflect->reqs[i]);
1106  }
1107  async_reflect->nreqs = 0;
1108 
1109  int ishadow[_XMP_N_MAX_DIM];
1110  for (ishadow[0] = lb[0]; ishadow[0] <= ub[0]; ishadow[0]++){
1111  for (ishadow[1] = lb[1]; ishadow[1] <= ub[1]; ishadow[1]++){
1112  for (ishadow[2] = lb[2]; ishadow[2] <= ub[2]; ishadow[2]++){
1113  for (ishadow[3] = lb[3]; ishadow[3] <= ub[3]; ishadow[3]++){
1114  for (ishadow[4] = lb[4]; ishadow[4] <= ub[4]; ishadow[4]++){
1115  for (ishadow[5] = lb[5]; ishadow[5] <= ub[5]; ishadow[5]++){
1116  for (ishadow[6] = lb[6]; ishadow[6] <= ub[6]; ishadow[6]++){
1117 
1118  // When ishadow > 0, upper shadow is to be updated, and vice versa.
1119 
1120  int nnzero = 0;
1121  for (int i = 0; i < n; i++){
1122  if (ishadow[i] != 0) nnzero++;
1123  }
1124  if (nnzero == 0) continue;
1125 
1126  _XMP_reflect_sched_dir(a, ishadow, _xmp_lwidth, _xmp_uwidth, _xmp_is_periodic);
1127 
1128  }}}}}}}
1129 
1130  }
1131 
1133  MPI_Request *reqs = &async->reqs[async->nreqs];
1134 
1135  // copy to async
1136  if (async->nreqs + async_reflect->nreqs > _XMP_MAX_ASYNC_REQS){
1137  _XMP_fatal("too many arrays in an asynchronous reflect");
1138  }
1139  memcpy(reqs, async_reflect->reqs, async_reflect->nreqs * sizeof(MPI_Request));
1140 
1141  async->nreqs += async_reflect->nreqs;
1142 
1143  _XMP_TSTART(t0);
1144  MPI_Startall(async_reflect->nreqs, reqs);
1145  _XMP_TEND(xmptiming_.t_start, t0);
1146 
1147 }
Here is the call graph for this function:

◆ _XMP_reflect_pack_dim()

void _XMP_reflect_pack_dim ( _XMP_array_t a,
int  i,
int *  lwidth,
int *  uwidth,
int *  is_periodic,
int  shadow_comm_type 
)
1770 {
1771 
1772  char *pack_dst_lo, *pack_src_lo; int dst_lo;
1773  char *pack_dst_hi, *pack_src_hi; int dst_hi;
1774 
1775  if (shadow_comm_type == _XMP_COMM_REFLECT){
1776  if (a->order == MPI_ORDER_FORTRAN){ /* for XMP/F */
1777  if (i == a->dim - 1) return;
1778  }
1779  else if (a->order == MPI_ORDER_C){ /* for XMP/C */
1780  if (i == 0) return;
1781  }
1782  else {
1783  _XMP_fatal("cannot determin the base language.");
1784  }
1785  }
1786 
1787  _XMP_array_info_t *ai = &(a->info[i]);
1788  _XMP_reflect_sched_t *reflect = ai->reflect_sched;
1789 
1790  if (shadow_comm_type == _XMP_COMM_REDUCE_SHADOW){
1791  pack_dst_lo = (char *)reflect->lo_recv_buf;
1792  pack_src_lo = (char *)reflect->lo_recv_array;
1793  dst_lo = reflect->lo_rank;
1794  pack_dst_hi = (char *)reflect->hi_recv_buf;
1795  pack_src_hi = (char *)reflect->hi_recv_array;
1796  dst_hi = reflect->hi_rank;
1797  }
1798  else {
1799  pack_dst_lo = (char *)reflect->lo_send_buf;
1800  pack_src_lo = (char *)reflect->lo_send_array;
1801  dst_lo = reflect->hi_rank;
1802  pack_dst_hi = (char *)reflect->hi_send_buf;
1803  pack_src_hi = (char *)reflect->hi_send_array;
1804  dst_hi = reflect->lo_rank;
1805  }
1806 
1807 
1808  if (ai->shadow_type == _XMP_N_SHADOW_NORMAL){
1809 
1810  // for lower reflect
1811  if (lwidth[i] && dst_lo != MPI_PROC_NULL){
1812  _XMP_pack_vector(pack_dst_lo, pack_src_lo,
1813  reflect->count, lwidth[i] * reflect->blocklength,
1814  reflect->stride);
1815  }
1816 
1817  // for upper reflect
1818  if (uwidth[i] && dst_hi != MPI_PROC_NULL){
1819  _XMP_pack_vector(pack_dst_hi, pack_src_hi,
1820  reflect->count, uwidth[i] * reflect->blocklength,
1821  reflect->stride);
1822  }
1823 
1824  }
1825 
1826 }
Here is the call graph for this function:

◆ _XMP_reflect_pcopy_sched_dim()

void _XMP_reflect_pcopy_sched_dim ( _XMP_array_t adesc,
int  target_dim,
int  lwidth,
int  uwidth,
int  is_periodic,
int  shadow_comm_type 
)
469  {
470 
471  if (lwidth == 0 && uwidth == 0) return;
472 
473  _XMP_array_info_t *ai = &(adesc->info[target_dim]);
474  _XMP_array_info_t *ainfo = adesc->info;
477 
478  if (lwidth > ai->shadow_size_lo || uwidth > ai->shadow_size_hi){
479  _XMP_fatal("reflect width is larger than shadow width.");
480  }
481 
482  _XMP_reflect_sched_t *reflect = ai->reflect_sched;
483 
484  int target_tdim = ai->align_template_index;
485  _XMP_nodes_info_t *ni = adesc->align_template->chunk[target_tdim].onto_nodes_info;
486 
487  if (ni->size == 1 && !is_periodic) return;
488 
489  int ndims = adesc->dim;
490 
491  // 0-origin
492  int my_pos = ni->rank;
493  int lb_pos = _XMP_get_owner_pos(adesc, target_dim, ai->ser_lower);
494  int ub_pos = _XMP_get_owner_pos(adesc, target_dim, ai->ser_upper);
495 
496  int lo_pos = (my_pos == lb_pos) ? ub_pos : my_pos - 1;
497  int hi_pos = (my_pos == ub_pos) ? lb_pos : my_pos + 1;
498 
499  MPI_Comm *comm = adesc->align_template->onto_nodes->comm;
500  int my_rank = adesc->align_template->onto_nodes->comm_rank;
501 
502  int lo_rank = my_rank + (lo_pos - my_pos) * ni->multiplier;
503  int hi_rank = my_rank + (hi_pos - my_pos) * ni->multiplier;
504 
505  int count = 0, blocklength = 0;
506  long long stride = 0;
507 
508  int type_size = adesc->type_size;
509  void *array_addr = adesc->array_addr_p;
510 
511  void *lo_send_array = NULL, *lo_recv_array = NULL;
512  void *hi_send_array = NULL, *hi_recv_array = NULL;
513 
514  void *lo_send_buf = NULL;
515  void *lo_recv_buf = NULL;
516  void *hi_send_buf = NULL;
517  void *hi_recv_buf = NULL;
518 
519  int lo_buf_size = 0;
520  int hi_buf_size = 0;
521 
522  if (reflect->prev_pcopy_sched_type &&
523  lwidth == reflect->lo_width &&
524  uwidth == reflect->hi_width &&
525  is_periodic == reflect->is_periodic){
526  if ((adesc->order == MPI_ORDER_FORTRAN && target_dim != ndims - 1) ||
527  (adesc->order == MPI_ORDER_C && target_dim != 0)){
528  goto init_comm;
529  }
530  else if (reflect->prev_pcopy_sched_type != shadow_comm_type){
531  count = reflect->count;
532  blocklength = reflect->blocklength;
533  stride = reflect->stride;
534  goto alloc_buf;
535  }
536  }
537 
538  //
539  // setup data_type
540  //
541 
542  if (adesc->order == MPI_ORDER_FORTRAN){ /* for XMP/F */
543 
544  count = 1;
545  blocklength = type_size;
546  stride = ainfo[0].alloc_size * type_size;
547 
548  for (int i = ndims - 2; i >= target_dim; i--){
549  count *= ainfo[i+1].alloc_size;
550  }
551 
552  for (int i = 1; i <= target_dim; i++){
553  blocklength *= ainfo[i-1].alloc_size;
554  stride *= ainfo[i].alloc_size;
555  }
556 
557  }
558  else if (adesc->order == MPI_ORDER_C){ /* for XMP/C */
559 
560  count = 1;
561  blocklength = type_size;
562  stride = ainfo[ndims-1].alloc_size * type_size;
563 
564  for (int i = 1; i <= target_dim; i++){
565  count *= ainfo[i-1].alloc_size;
566  }
567 
568  for (int i = ndims - 2; i >= target_dim; i--){
569  blocklength *= ainfo[i+1].alloc_size;
570  stride *= ainfo[i].alloc_size;
571  }
572 
573  }
574  else {
575  _XMP_fatal("cannot determin the base language.");
576  }
577 
578  //
579  // calculate base address
580  //
581 
582  alloc_buf:
583 
584  // for lower reflect
585 
586  if (lwidth){
587 
588  lo_send_array = array_addr;
589  lo_recv_array = array_addr;
590 
591  for (int i = 0; i < ndims; i++) {
592 
593  int lb_send, lb_recv;
594  unsigned long long dim_acc;
595 
596  if (i == target_dim) {
597  lb_send = ainfo[i].local_upper - lwidth + 1;
598  lb_recv = ainfo[i].shadow_size_lo - lwidth;;
599  }
600  else {
601  // Note: including shadow area
602  lb_send = 0;
603  lb_recv = 0;
604  }
605 
606  dim_acc = ainfo[i].dim_acc;
607 
608  lo_send_array = (void *)((char *)lo_send_array + lb_send * dim_acc * type_size);
609  lo_recv_array = (void *)((char *)lo_recv_array + lb_recv * dim_acc * type_size);
610 
611  }
612 
613  }
614 
615  // for upper reflect
616 
617  if (uwidth){
618 
619  hi_send_array = array_addr;
620  hi_recv_array = array_addr;
621 
622  for (int i = 0; i < ndims; i++) {
623 
624  int lb_send, lb_recv;
625  unsigned long long dim_acc;
626 
627  if (i == target_dim) {
628  lb_send = ainfo[i].local_lower;
629  lb_recv = ainfo[i].local_upper + 1;
630  }
631  else {
632  // Note: including shadow area
633  lb_send = 0;
634  lb_recv = 0;
635  }
636 
637  dim_acc = ainfo[i].dim_acc;
638 
639  hi_send_array = (void *)((char *)hi_send_array + lb_send * dim_acc * type_size);
640  hi_recv_array = (void *)((char *)hi_recv_array + lb_recv * dim_acc * type_size);
641 
642  }
643 
644  }
645 
646  //
647  // Allocate buffers
648  //
649 
650  if (reflect->prev_pcopy_sched_type == _XMP_COMM_REFLECT &&
651  ((adesc->order == MPI_ORDER_FORTRAN && target_dim == ndims - 1) ||
652  (adesc->order == MPI_ORDER_C && target_dim == 0))){
653  ;
654  }
655  else {
656  _XMP_free(reflect->lo_send_buf);
657  _XMP_free(reflect->lo_recv_buf);
658  _XMP_free(reflect->hi_send_buf);
659  _XMP_free(reflect->hi_recv_buf);
660  }
661 
662  // for lower reflect
663 
664  if (lwidth){
665 
666  lo_buf_size = lwidth * blocklength * count;
667 
668  if (shadow_comm_type == _XMP_COMM_REFLECT &&
669  ((adesc->order == MPI_ORDER_FORTRAN && target_dim == ndims - 1) ||
670  (adesc->order == MPI_ORDER_C && target_dim == 0))){
671  lo_send_buf = lo_send_array;
672  lo_recv_buf = lo_recv_array;
673  }
674  else {
675  _XMP_TSTART(t0);
676  lo_send_buf = _XMP_alloc(lo_buf_size);
677  lo_recv_buf = _XMP_alloc(lo_buf_size);
678  _XMP_TEND2(xmptiming_.t_mem, xmptiming_.tdim_mem[target_dim], t0);
679  }
680 
681  }
682 
683  // for upper reflect
684 
685  if (uwidth){
686 
687  hi_buf_size = uwidth * blocklength * count;
688 
689  if (shadow_comm_type == _XMP_COMM_REFLECT &&
690  ((adesc->order == MPI_ORDER_FORTRAN && target_dim == ndims - 1) ||
691  (adesc->order == MPI_ORDER_C && target_dim == 0))){
692  hi_send_buf = hi_send_array;
693  hi_recv_buf = hi_recv_array;
694  }
695  else {
696  _XMP_TSTART(t0);
697  hi_send_buf = _XMP_alloc(hi_buf_size);
698  hi_recv_buf = _XMP_alloc(hi_buf_size);
699  _XMP_TEND2(xmptiming_.t_mem, xmptiming_.tdim_mem[target_dim], t0);
700  }
701 
702  }
703 
704  //
705  // cache schedule
706  //
707 
708  reflect->count = count;
709  reflect->blocklength = blocklength;
710  reflect->stride = stride;
711 
712  reflect->lo_send_array = lo_send_array;
713  reflect->lo_recv_array = lo_recv_array;
714  reflect->hi_send_array = hi_send_array;
715  reflect->hi_recv_array = hi_recv_array;
716 
717  reflect->lo_send_buf = lo_send_buf;
718  reflect->lo_recv_buf = lo_recv_buf;
719  reflect->hi_send_buf = hi_send_buf;
720  reflect->hi_recv_buf = hi_recv_buf;
721 
722  //
723  // initialize communication
724  //
725 
726  int src, dst;
727 
728  init_comm:
729 
730  if (!is_periodic && my_pos == lb_pos){ // no periodic
731  lo_rank = MPI_PROC_NULL;
732  }
733 
734  if (!is_periodic && my_pos == ub_pos){ // no periodic
735  hi_rank = MPI_PROC_NULL;
736  }
737 
738  lo_buf_size = lwidth * reflect->blocklength * reflect->count;
739  hi_buf_size = uwidth * reflect->blocklength * reflect->count;
740 
741  // for lower shadow
742 
743  if (lwidth){
744  src = lo_rank;
745  dst = hi_rank;
746  }
747  else {
748  src = MPI_PROC_NULL;
749  dst = MPI_PROC_NULL;
750  }
751 
752  if (shadow_comm_type == _XMP_COMM_REDUCE_SHADOW){
753  if (reflect->req_reduce[0] != MPI_REQUEST_NULL){
754  MPI_Request_free(&reflect->req_reduce[0]);
755  }
756 
757  if (reflect->req_reduce[1] != MPI_REQUEST_NULL){
758  MPI_Request_free(&reflect->req_reduce[1]);
759  }
760 
761  MPI_Send_init(reflect->lo_recv_buf, lo_buf_size, MPI_BYTE, src,
762  _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req_reduce[0]);
763  MPI_Recv_init(reflect->lo_send_buf, lo_buf_size, MPI_BYTE, dst,
764  _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req_reduce[1]);
765  }
766  else {
767  if (reflect->req[0] != MPI_REQUEST_NULL){
768  MPI_Request_free(&reflect->req[0]);
769  }
770 
771  if (reflect->req[1] != MPI_REQUEST_NULL){
772  MPI_Request_free(&reflect->req[1]);
773  }
774 
775  MPI_Recv_init(reflect->lo_recv_buf, lo_buf_size, MPI_BYTE, src,
776  _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req[0]);
777  MPI_Send_init(reflect->lo_send_buf, lo_buf_size, MPI_BYTE, dst,
778  _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req[1]);
779  }
780 
781  // for upper shadow
782 
783  if (uwidth){
784  src = hi_rank;
785  dst = lo_rank;
786  }
787  else {
788  src = MPI_PROC_NULL;
789  dst = MPI_PROC_NULL;
790  }
791 
792  if (shadow_comm_type == _XMP_COMM_REDUCE_SHADOW){
793  if (reflect->req_reduce[2] != MPI_REQUEST_NULL){
794  MPI_Request_free(&reflect->req_reduce[2]);
795  }
796 
797  if (reflect->req_reduce[3] != MPI_REQUEST_NULL){
798  MPI_Request_free(&reflect->req_reduce[3]);
799  }
800 
801  MPI_Send_init(reflect->hi_recv_buf, hi_buf_size, MPI_BYTE, src,
802  _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req_reduce[2]);
803  MPI_Recv_init(reflect->hi_send_buf, hi_buf_size, MPI_BYTE, dst,
804  _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req_reduce[3]);
805  }
806  else {
807  if (reflect->req[2] != MPI_REQUEST_NULL){
808  MPI_Request_free(&reflect->req[2]);
809  }
810 
811  if (reflect->req[3] != MPI_REQUEST_NULL){
812  MPI_Request_free(&reflect->req[3]);
813  }
814 
815  MPI_Recv_init(reflect->hi_recv_buf, hi_buf_size, MPI_BYTE, src,
816  _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req[2]);
817  MPI_Send_init(reflect->hi_send_buf, hi_buf_size, MPI_BYTE, dst,
818  _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req[3]);
819  }
820 
821  reflect->prev_pcopy_sched_type = shadow_comm_type;
822 
823  reflect->lo_rank = lo_rank;
824  reflect->hi_rank = hi_rank;
825 
826 }
Here is the call graph for this function:

◆ _XMP_set_reflect__()

void _XMP_set_reflect__ ( _XMP_array_t a,
int  dim,
int  lwidth,
int  uwidth,
int  is_periodic 
)
64 {
65  _xmp_set_reflect_flag = 1;
66  _xmp_lwidth[dim] = lwidth;
67  _xmp_uwidth[dim] = uwidth;
68  _xmp_is_periodic[dim] = is_periodic;
69 }

◆ xmp_dbg_printf()

void xmp_dbg_printf ( char *  fmt,
  ... 
)
38 {
39  char buf[512];
40  va_list args;
41 
42  va_start(args,fmt);
43  vsprintf(buf,fmt,args);
44  va_end(args);
45 
46  printf("[%d] %s",_XMP_world_rank, buf);
47  fflush(stdout);
48 }

Variable Documentation

◆ _xmp_reflect_pack_flag

int _xmp_reflect_pack_flag
_XMP_array_info_type::align_subscript
long long align_subscript
Definition: xmp_data_struct.h:246
_XMP_reflect_sched_type::is_periodic
int is_periodic
Definition: xmp_data_struct.h:126
_XMP_async_reflect_type::uwidth
int uwidth[_XMP_N_MAX_DIM]
Definition: xmp_data_struct.h:183
_XMP_nodes_info_type::size
int size
Definition: xmp_data_struct.h:32
_XMP_nodes_info_type
Definition: xmp_data_struct.h:31
_XMP_template_type::info
_XMP_template_info_t info[1]
Definition: xmp_data_struct.h:115
_XMP_MAX_ASYNC_REQS
#define _XMP_MAX_ASYNC_REQS
Definition: xmp_data_struct.h:472
_XMP_array_info_type::align_template_index
int align_template_index
Definition: xmp_data_struct.h:260
_XMP_async_reflect_type::is_periodic
_Bool is_periodic[_XMP_N_MAX_DIM]
Definition: xmp_data_struct.h:184
_XMP_template_info_type::ser_lower
long long ser_lower
Definition: xmp_data_struct.h:72
_XMP_alloc
void * _XMP_alloc(size_t size)
Definition: xmp_util.c:21
_XMP_array_info_type
Definition: xmp_data_struct.h:194
_XMP_template_type::chunk
_XMP_template_chunk_t * chunk
Definition: xmp_data_struct.h:112
_XMP_async_reflect_type::reqs
MPI_Request * reqs
Definition: xmp_data_struct.h:187
_XMP_async_reflect_type::datatype
MPI_Datatype * datatype
Definition: xmp_data_struct.h:186
_XMP_reflect_async_cardinal
void _XMP_reflect_async_cardinal(_XMP_array_t *a, int async_id)
Definition: xmp_reflect.c:991
_XMP_array_info_type::ser_upper
int ser_upper
Definition: xmp_data_struct.h:200
_XMP_get_owner_pos
int _XMP_get_owner_pos(_XMP_array_t *a, int dim, int index)
Definition: xmp_reflect.c:1688
_XMP_reflect_sched_type::lo_recv_array
void * lo_recv_array
Definition: xmp_data_struct.h:137
_XMP_reflect_sched_type::lo_recv_buf
void * lo_recv_buf
Definition: xmp_data_struct.h:134
_XMP_array_info_type::shadow_type
int shadow_type
Definition: xmp_data_struct.h:248
_XMP_reflect_sched_type
Definition: xmp_data_struct.h:119
_XMP_TEND
#define _XMP_TEND(t, t0)
Definition: xmp_internal.h:748
_XMP_template_chunk_type::onto_nodes_info
_XMP_nodes_info_t * onto_nodes_info
Definition: xmp_data_struct.h:94
_XMP_reflect_sched_type::req
MPI_Request req[4]
Definition: xmp_data_struct.h:131
_XMP_reflect_sched_type::hi_rank
int hi_rank
Definition: xmp_data_struct.h:143
_XMP_nodes_type::comm_rank
int comm_rank
Definition: xmp_data_struct.h:52
_XMP_reflect_sched_type::reflect_is_initialized
int reflect_is_initialized
Definition: xmp_data_struct.h:121
_XMP_async_reflect_type::nreqs
int nreqs
Definition: xmp_data_struct.h:189
_XMP_template_chunk_type::par_chunk_width
unsigned long long par_chunk_width
Definition: xmp_data_struct.h:86
_XMP_COMM_REDUCE_SHADOW
#define _XMP_COMM_REDUCE_SHADOW
Definition: xmp_constant.h:137
_XMP_world_rank
int _XMP_world_rank
Definition: xmp_world.c:9
_XMP_reflect_sched_type::hi_send_buf
void * hi_send_buf
Definition: xmp_data_struct.h:135
_XMP_template_type::onto_nodes
_XMP_nodes_t * onto_nodes
Definition: xmp_data_struct.h:111
_XMP_COMM_REFLECT
#define _XMP_COMM_REFLECT
Definition: xmp_constant.h:136
_XMP_array_info_type::shadow_size_lo
int shadow_size_lo
Definition: xmp_data_struct.h:249
_XMP_TSTART
#define _XMP_TSTART(t0)
Definition: xmp_internal.h:747
_XMP_array_type::align_template
_XMP_template_t * align_template
Definition: xmp_data_struct.h:312
_XMP_array_info_type::align_manner
int align_manner
Definition: xmp_data_struct.h:197
_XMP_reflect_async_ordinal
void _XMP_reflect_async_ordinal(_XMP_array_t *a, int async_id)
Definition: xmp_reflect.c:1055
_XMP_get_current_async
_XMP_async_comm_t * _XMP_get_current_async()
Definition: xmp_async.c:205
_XMP_N_SHADOW_FULL
#define _XMP_N_SHADOW_FULL
Definition: xmp_constant.h:66
_XMP_reflect_sched_type::blocklength
int blocklength
Definition: xmp_data_struct.h:140
_XMP_array_type::async_reflect
_XMP_async_reflect_t * async_reflect
Definition: xmp_data_struct.h:296
_XMP_array_info_type::ser_lower
int ser_lower
Definition: xmp_data_struct.h:199
_XMP_reflect_sched_type::stride
long long stride
Definition: xmp_data_struct.h:141
_XMP_reflect_sched_type::hi_width
int hi_width
Definition: xmp_data_struct.h:125
_XMP_pack_vector
void _XMP_pack_vector(char *restrict dst, char *restrict src, int count, int blocklength, long stride)
Definition: xmp_pack_vector.c:9
_XMP_async_comm::nreqs
int nreqs
Definition: xmp_data_struct.h:460
_XMP_array_type::order
int order
Definition: xmp_data_struct.h:276
_XMP_reflect_pack_dim
void _XMP_reflect_pack_dim(_XMP_array_t *a, int i, int *lwidth, int *uwidth, int *is_periodic, int shadow_comm_type)
Definition: xmp_reflect.c:1768
_XMP_array_info_type::local_lower
int local_lower
Definition: xmp_data_struct.h:209
_XMP_N_MPI_TAG_REFLECT_HI
#define _XMP_N_MPI_TAG_REFLECT_HI
Definition: xmp_constant.h:12
_XMP_array_info_type::shadow_size_hi
int shadow_size_hi
Definition: xmp_data_struct.h:250
_XMP_reflect_sched_type::lo_rank
int lo_rank
Definition: xmp_data_struct.h:143
_XMP_async_comm::reqs
MPI_Request * reqs
Definition: xmp_data_struct.h:464
_XMP_async_comm
Definition: xmp_data_struct.h:458
_XMP_nodes_info_type::rank
int rank
Definition: xmp_data_struct.h:35
_XMP_array_type::type_size
size_t type_size
Definition: xmp_data_struct.h:274
_XMP_array_info_type::alloc_size
int alloc_size
Definition: xmp_data_struct.h:212
_XMP_N_ALIGN_BLOCK
#define _XMP_N_ALIGN_BLOCK
Definition: xmp_constant.h:37
_XMP_array_type::info
_XMP_array_info_t info[1]
Definition: xmp_data_struct.h:313
_XMP_reflect_sched_type::hi_recv_array
void * hi_recv_array
Definition: xmp_data_struct.h:138
_XMP_reflect_sched_type::req_reduce
MPI_Request req_reduce[4]
Definition: xmp_data_struct.h:132
_XMP_free
void _XMP_free(void *p)
Definition: xmp_util.c:37
_XMP_ASSERT
#define _XMP_ASSERT(_flag)
Definition: xmp_internal.h:34
_XMP_reflect_sched_type::lo_send_array
void * lo_send_array
Definition: xmp_data_struct.h:137
_XMP_nodes_info_type::multiplier
int multiplier
Definition: xmp_data_struct.h:37
_XMP_reflect_pcopy_sched_dim
void _XMP_reflect_pcopy_sched_dim(_XMP_array_t *adesc, int target_dim, int lwidth, int uwidth, int is_periodic, int shadow_comm_type)
Definition: xmp_reflect.c:468
_XMP_array_type::array_addr_p
void * array_addr_p
Definition: xmp_data_struct.h:279
_XMP_nodes_type::comm
_XMP_comm_t * comm
Definition: xmp_data_struct.h:53
_XMP_array_type::dim
int dim
Definition: xmp_data_struct.h:272
_XMP_fatal
void _XMP_fatal(char *msg)
Definition: xmp_util.c:42
_XMP_reflect_sched_type::prev_pcopy_sched_type
int prev_pcopy_sched_type
Definition: xmp_data_struct.h:123
_XMP_array_info_type::dim_acc
unsigned long long dim_acc
Definition: xmp_data_struct.h:242
_XMP_array_info_type::reflect_sched
_XMP_reflect_sched_t * reflect_sched
Definition: xmp_data_struct.h:252
_XMP_N_MPI_TAG_REFLECT_LO
#define _XMP_N_MPI_TAG_REFLECT_LO
Definition: xmp_constant.h:11
_XMP_reflect_sched_type::count
int count
Definition: xmp_data_struct.h:140
_XMP_async_reflect_type::lwidth
int lwidth[_XMP_N_MAX_DIM]
Definition: xmp_data_struct.h:183
_XMP_reflect_sched_type::lo_send_buf
void * lo_send_buf
Definition: xmp_data_struct.h:134
_XMP_N_MAX_DIM
#define _XMP_N_MAX_DIM
Definition: xmp_constant.h:6
_XMP_reflect_sched_type::hi_recv_buf
void * hi_recv_buf
Definition: xmp_data_struct.h:135
_XMP_N_SHADOW_NORMAL
#define _XMP_N_SHADOW_NORMAL
Definition: xmp_constant.h:65
_XMP_TEND2
#define _XMP_TEND2(t, tt, t0)
Definition: xmp_internal.h:749
_XMP_N_SHADOW_NONE
#define _XMP_N_SHADOW_NONE
Definition: xmp_constant.h:64
_XMP_reflect_sched_type::hi_send_array
void * hi_send_array
Definition: xmp_data_struct.h:138
_XMP_reflect_shadow_FULL
void _XMP_reflect_shadow_FULL(void *array_addr, void *array_desc, int array_index)
_XMP_N_ALIGN_GBLOCK
#define _XMP_N_ALIGN_GBLOCK
Definition: xmp_constant.h:40
_XMP_template_chunk_type::mapping_array
long long * mapping_array
Definition: xmp_data_struct.h:88
_xmp_reflect_pack_flag
int _xmp_reflect_pack_flag
Definition: xmp_pack_vector.c:298
_XMP_array_type::is_allocated
_Bool is_allocated
Definition: xmp_data_struct.h:270
_XMP_reflect_sched_type::lo_width
int lo_width
Definition: xmp_data_struct.h:125
_XMP_async_reflect_type
Definition: xmp_data_struct.h:181
_XMP_array_info_type::is_shadow_comm_member
_Bool is_shadow_comm_member
Definition: xmp_data_struct.h:195
_XMP_array_info_type::local_upper
int local_upper
Definition: xmp_data_struct.h:210