libxmp/libxmpf in Omni Compiler  1.3.4
xmp_reduce_shadow.c File Reference
#include "xmp_internal.h"
Include dependency graph for xmp_reduce_shadow.c:

Functions

void _XMP_reflect_pcopy_sched_dim (_XMP_array_t *adesc, int target_dim, int lwidth, int uwidth, int is_periodic, int shadow_comm_type)
 
void _XMP_reflect_pack_dim (_XMP_array_t *a, int i, int *lwidth, int *uwidth, int *is_periodic, int shadow_comm_type)
 
void _XMP_sum_vector (int type, char *restrict dst, char *restrict src, int count, int blocklength, long stride)
 
int _XMP_get_owner_pos (_XMP_array_t *a, int dim, int index)
 
void _XMP_set_reduce_shadow__ (_XMP_array_t *a, int dim, int lwidth, int uwidth, int is_periodic)
 
void _XMP_reduce_shadow__ (_XMP_array_t *a)
 
void _XMP_reduce_shadow_wait (_XMP_array_t *a)
 
void _XMP_reduce_shadow_sum (_XMP_array_t *a)
 

Function Documentation

◆ _XMP_get_owner_pos()

int _XMP_get_owner_pos ( _XMP_array_t a,
int  dim,
int  index 
)
1688  {
1689 
1690  int align_offset = a->info[dim].align_subscript;
1691 
1692  int tdim = a->info[dim].align_template_index;
1693  int tlb = a->align_template->info[tdim].ser_lower;
1694  int chunk = a->align_template->chunk[tdim].par_chunk_width;
1695 
1696  int pos;
1697  switch (a->info[dim].align_manner){
1698 
1699  case _XMP_N_ALIGN_BLOCK:
1700  pos = (index + align_offset - tlb) / chunk;
1701  return pos;
1702 
1703  case _XMP_N_ALIGN_GBLOCK:
1704  {
1705  int tpos = index + align_offset; // tlb is not subtracted because the mapping_array is 1-origin.
1706  long long *m = a->align_template->chunk[tdim].mapping_array;
1707  int np = a->align_template->chunk[tdim].onto_nodes_info->size;
1708  for (int i = 0; i < np; i++){
1709  if (m[i] <= tpos && tpos < m[i+1]){
1710  return i;
1711  }
1712  }
1713  }
1714  }
1715 
1716  _XMP_fatal("cannot calculate position");
1717  return -1;
1718 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ _XMP_reduce_shadow__()

void _XMP_reduce_shadow__ ( _XMP_array_t a)
29 {
30 
31  _XMP_async_comm_t *async = NULL;
32  MPI_Request *reqs = NULL;
33  int nreqs = 0;
34 
35  if (xmp_is_async()){
36  async = _XMP_get_current_async();
37  reqs = &async->reqs[async->nreqs];
38  }
39 
40  //_XMP_RETURN_IF_SINGLE;
41  if (!a->is_allocated){
42  _xmp_set_reduce_shadow_flag = 0;
43  return;
44  }
45 
46  if (!_xmp_set_reduce_shadow_flag){
47  for (int i = 0; i < a->dim; i++){
48  _XMP_array_info_t *ai = &(a->info[i]);
49  _xmp_lwidth[i] = ai->shadow_size_lo;
50  _xmp_uwidth[i] = ai->shadow_size_hi;
51  _xmp_is_periodic[i] = 0;
52  }
53  }
54 
55  for (int i = 0; i < a->dim; i++){
56 
57  _XMP_array_info_t *ai = &(a->info[i]);
58 
59  if (ai->shadow_type == _XMP_N_SHADOW_NONE){
60  continue;
61  }
62  else if (ai->shadow_type == _XMP_N_SHADOW_NORMAL){
63 
64  _XMP_reflect_sched_t *shadow_sched = ai->reflect_sched;
65 
66  if (_xmp_lwidth[i] || _xmp_uwidth[i]){
67 
68  _XMP_ASSERT(shadow_sched);
69 
70  if (!shadow_sched->reduce_is_initialized ||
71  _xmp_lwidth[i] != shadow_sched->lo_width ||
72  _xmp_uwidth[i] != shadow_sched->hi_width ||
73  _xmp_is_periodic[i] != shadow_sched->is_periodic){
74 
75  _XMP_reflect_pcopy_sched_dim(a, i, _xmp_lwidth[i], _xmp_uwidth[i], _xmp_is_periodic[i], _XMP_COMM_REDUCE_SHADOW);
76 
77  shadow_sched->reduce_is_initialized = 1;
78  shadow_sched->lo_width = _xmp_lwidth[i];
79  shadow_sched->hi_width = _xmp_uwidth[i];
80  shadow_sched->is_periodic = _xmp_is_periodic[i];
81 
82  }
83 
84  _XMP_reflect_pack_dim(a, i, _xmp_lwidth, _xmp_uwidth, _xmp_is_periodic, _XMP_COMM_REDUCE_SHADOW);
85 
86  if (shadow_sched->req_reduce[0] != MPI_REQUEST_NULL) // if req[0] isn't null, any others shouldn't be null.
87  MPI_Startall(4, shadow_sched->req_reduce);
88 
89  if (xmp_is_async()){
90  if (async->nreqs + nreqs + 4 > _XMP_MAX_ASYNC_REQS){
91  _XMP_fatal("too many arrays in an asynchronous reflect/reduce_shadow");
92  }
93  memcpy(&reqs[nreqs], shadow_sched->req_reduce, 4 * sizeof(MPI_Request));
94  nreqs += 4;
95  }
96 
97  }
98 
99  }
100  else { /* _XMP_N_SHADOW_FULL */
101  // not supported yet
102  //_XMP_reduce_shadow_shadow_FULL(a->array_addr_p, a, i);
103  }
104 
105  }
106 
107  if (!xmp_is_async()){
110  }
111  else {
112  async->a = a;
113  async->nreqs += nreqs;
114  async->type = _XMP_COMM_REDUCE_SHADOW;
115  }
116 
117  _xmp_set_reduce_shadow_flag = 0;
118  for (int i = 0; i < a->dim; i++){
119  _xmp_lwidth[i] = 0;
120  _xmp_uwidth[i] = 0;
121  _xmp_is_periodic[i] = 0;
122  }
123 
124 }
Here is the call graph for this function:

◆ _XMP_reduce_shadow_sum()

void _XMP_reduce_shadow_sum ( _XMP_array_t a)
154 {
155  int type_size = a->type_size;
156 
157  for (int i = 0; i < a->dim; i++){
158 
159  _XMP_array_info_t *ai = &(a->info[i]);
160  _XMP_reflect_sched_t *shadow_sched = ai->reflect_sched;
161 
162  if (!shadow_sched) continue;
163 
164  int lwidth = shadow_sched->lo_width;
165  int uwidth = shadow_sched->hi_width;
166  int is_periodic = shadow_sched->is_periodic;
167 
168  if (ai->shadow_type == _XMP_N_SHADOW_NORMAL){
169 
170  int target_dim = ai->align_template_index;
171  int my_pos = a->align_template->chunk[target_dim].onto_nodes_info->rank;
172  int lb_pos = _XMP_get_owner_pos(a, i, ai->ser_lower);
173  int ub_pos = _XMP_get_owner_pos(a, i, ai->ser_upper);
174 
175  // for lower reduce_shadow
176  if (lwidth && (is_periodic || my_pos != ub_pos)){
178  (char *)shadow_sched->lo_send_array,
179  (char *)shadow_sched->lo_send_buf,
180  shadow_sched->count, lwidth * shadow_sched->blocklength / type_size,
181  shadow_sched->stride / type_size);
182  }
183 
184  // for upper reduce_shadow
185  if (uwidth && (is_periodic || my_pos != lb_pos)){
187  (char *)shadow_sched->hi_send_array,
188  (char *)shadow_sched->hi_send_buf,
189  shadow_sched->count, uwidth * shadow_sched->blocklength / type_size,
190  shadow_sched->stride / type_size);
191  }
192 
193  }
194 
195  }
196 
197 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ _XMP_reduce_shadow_wait()

void _XMP_reduce_shadow_wait ( _XMP_array_t a)
128 {
129  for (int i = 0; i < a->dim; i++){
130 
131  _XMP_array_info_t *ai = &(a->info[i]);
132  _XMP_reflect_sched_t *shadow_sched = ai->reflect_sched;
133 
134  if (!shadow_sched) continue;
135 
136  int lwidth = shadow_sched->lo_width;
137  int uwidth = shadow_sched->hi_width;
138 
139  if (!lwidth && !uwidth) continue;
140 
141  if (ai->shadow_type == _XMP_N_SHADOW_NORMAL){
142  MPI_Waitall(4, shadow_sched->req_reduce, MPI_STATUSES_IGNORE);
143  }
144  else if (ai->shadow_type == _XMP_N_SHADOW_FULL){
145  ;
146  }
147 
148  }
149 
150 }

◆ _XMP_reflect_pack_dim()

void _XMP_reflect_pack_dim ( _XMP_array_t a,
int  i,
int *  lwidth,
int *  uwidth,
int *  is_periodic,
int  shadow_comm_type 
)
1770 {
1771 
1772  char *pack_dst_lo, *pack_src_lo; int dst_lo;
1773  char *pack_dst_hi, *pack_src_hi; int dst_hi;
1774 
1775  if (shadow_comm_type == _XMP_COMM_REFLECT){
1776  if (a->order == MPI_ORDER_FORTRAN){ /* for XMP/F */
1777  if (i == a->dim - 1) return;
1778  }
1779  else if (a->order == MPI_ORDER_C){ /* for XMP/C */
1780  if (i == 0) return;
1781  }
1782  else {
1783  _XMP_fatal("cannot determin the base language.");
1784  }
1785  }
1786 
1787  _XMP_array_info_t *ai = &(a->info[i]);
1788  _XMP_reflect_sched_t *reflect = ai->reflect_sched;
1789 
1790  if (shadow_comm_type == _XMP_COMM_REDUCE_SHADOW){
1791  pack_dst_lo = (char *)reflect->lo_recv_buf;
1792  pack_src_lo = (char *)reflect->lo_recv_array;
1793  dst_lo = reflect->lo_rank;
1794  pack_dst_hi = (char *)reflect->hi_recv_buf;
1795  pack_src_hi = (char *)reflect->hi_recv_array;
1796  dst_hi = reflect->hi_rank;
1797  }
1798  else {
1799  pack_dst_lo = (char *)reflect->lo_send_buf;
1800  pack_src_lo = (char *)reflect->lo_send_array;
1801  dst_lo = reflect->hi_rank;
1802  pack_dst_hi = (char *)reflect->hi_send_buf;
1803  pack_src_hi = (char *)reflect->hi_send_array;
1804  dst_hi = reflect->lo_rank;
1805  }
1806 
1807 
1808  if (ai->shadow_type == _XMP_N_SHADOW_NORMAL){
1809 
1810  // for lower reflect
1811  if (lwidth[i] && dst_lo != MPI_PROC_NULL){
1812  _XMP_pack_vector(pack_dst_lo, pack_src_lo,
1813  reflect->count, lwidth[i] * reflect->blocklength,
1814  reflect->stride);
1815  }
1816 
1817  // for upper reflect
1818  if (uwidth[i] && dst_hi != MPI_PROC_NULL){
1819  _XMP_pack_vector(pack_dst_hi, pack_src_hi,
1820  reflect->count, uwidth[i] * reflect->blocklength,
1821  reflect->stride);
1822  }
1823 
1824  }
1825 
1826 }
Here is the call graph for this function:

◆ _XMP_reflect_pcopy_sched_dim()

void _XMP_reflect_pcopy_sched_dim ( _XMP_array_t adesc,
int  target_dim,
int  lwidth,
int  uwidth,
int  is_periodic,
int  shadow_comm_type 
)
469  {
470 
471  if (lwidth == 0 && uwidth == 0) return;
472 
473  _XMP_array_info_t *ai = &(adesc->info[target_dim]);
474  _XMP_array_info_t *ainfo = adesc->info;
477 
478  if (lwidth > ai->shadow_size_lo || uwidth > ai->shadow_size_hi){
479  _XMP_fatal("reflect width is larger than shadow width.");
480  }
481 
482  _XMP_reflect_sched_t *reflect = ai->reflect_sched;
483 
484  int target_tdim = ai->align_template_index;
485  _XMP_nodes_info_t *ni = adesc->align_template->chunk[target_tdim].onto_nodes_info;
486 
487  if (ni->size == 1 && !is_periodic) return;
488 
489  int ndims = adesc->dim;
490 
491  // 0-origin
492  int my_pos = ni->rank;
493  int lb_pos = _XMP_get_owner_pos(adesc, target_dim, ai->ser_lower);
494  int ub_pos = _XMP_get_owner_pos(adesc, target_dim, ai->ser_upper);
495 
496  int lo_pos = (my_pos == lb_pos) ? ub_pos : my_pos - 1;
497  int hi_pos = (my_pos == ub_pos) ? lb_pos : my_pos + 1;
498 
499  MPI_Comm *comm = adesc->align_template->onto_nodes->comm;
500  int my_rank = adesc->align_template->onto_nodes->comm_rank;
501 
502  int lo_rank = my_rank + (lo_pos - my_pos) * ni->multiplier;
503  int hi_rank = my_rank + (hi_pos - my_pos) * ni->multiplier;
504 
505  int count = 0, blocklength = 0;
506  long long stride = 0;
507 
508  int type_size = adesc->type_size;
509  void *array_addr = adesc->array_addr_p;
510 
511  void *lo_send_array = NULL, *lo_recv_array = NULL;
512  void *hi_send_array = NULL, *hi_recv_array = NULL;
513 
514  void *lo_send_buf = NULL;
515  void *lo_recv_buf = NULL;
516  void *hi_send_buf = NULL;
517  void *hi_recv_buf = NULL;
518 
519  int lo_buf_size = 0;
520  int hi_buf_size = 0;
521 
522  if (reflect->prev_pcopy_sched_type &&
523  lwidth == reflect->lo_width &&
524  uwidth == reflect->hi_width &&
525  is_periodic == reflect->is_periodic){
526  if ((adesc->order == MPI_ORDER_FORTRAN && target_dim != ndims - 1) ||
527  (adesc->order == MPI_ORDER_C && target_dim != 0)){
528  goto init_comm;
529  }
530  else if (reflect->prev_pcopy_sched_type != shadow_comm_type){
531  count = reflect->count;
532  blocklength = reflect->blocklength;
533  stride = reflect->stride;
534  goto alloc_buf;
535  }
536  }
537 
538  //
539  // setup data_type
540  //
541 
542  if (adesc->order == MPI_ORDER_FORTRAN){ /* for XMP/F */
543 
544  count = 1;
545  blocklength = type_size;
546  stride = ainfo[0].alloc_size * type_size;
547 
548  for (int i = ndims - 2; i >= target_dim; i--){
549  count *= ainfo[i+1].alloc_size;
550  }
551 
552  for (int i = 1; i <= target_dim; i++){
553  blocklength *= ainfo[i-1].alloc_size;
554  stride *= ainfo[i].alloc_size;
555  }
556 
557  }
558  else if (adesc->order == MPI_ORDER_C){ /* for XMP/C */
559 
560  count = 1;
561  blocklength = type_size;
562  stride = ainfo[ndims-1].alloc_size * type_size;
563 
564  for (int i = 1; i <= target_dim; i++){
565  count *= ainfo[i-1].alloc_size;
566  }
567 
568  for (int i = ndims - 2; i >= target_dim; i--){
569  blocklength *= ainfo[i+1].alloc_size;
570  stride *= ainfo[i].alloc_size;
571  }
572 
573  }
574  else {
575  _XMP_fatal("cannot determin the base language.");
576  }
577 
578  //
579  // calculate base address
580  //
581 
582  alloc_buf:
583 
584  // for lower reflect
585 
586  if (lwidth){
587 
588  lo_send_array = array_addr;
589  lo_recv_array = array_addr;
590 
591  for (int i = 0; i < ndims; i++) {
592 
593  int lb_send, lb_recv;
594  unsigned long long dim_acc;
595 
596  if (i == target_dim) {
597  lb_send = ainfo[i].local_upper - lwidth + 1;
598  lb_recv = ainfo[i].shadow_size_lo - lwidth;;
599  }
600  else {
601  // Note: including shadow area
602  lb_send = 0;
603  lb_recv = 0;
604  }
605 
606  dim_acc = ainfo[i].dim_acc;
607 
608  lo_send_array = (void *)((char *)lo_send_array + lb_send * dim_acc * type_size);
609  lo_recv_array = (void *)((char *)lo_recv_array + lb_recv * dim_acc * type_size);
610 
611  }
612 
613  }
614 
615  // for upper reflect
616 
617  if (uwidth){
618 
619  hi_send_array = array_addr;
620  hi_recv_array = array_addr;
621 
622  for (int i = 0; i < ndims; i++) {
623 
624  int lb_send, lb_recv;
625  unsigned long long dim_acc;
626 
627  if (i == target_dim) {
628  lb_send = ainfo[i].local_lower;
629  lb_recv = ainfo[i].local_upper + 1;
630  }
631  else {
632  // Note: including shadow area
633  lb_send = 0;
634  lb_recv = 0;
635  }
636 
637  dim_acc = ainfo[i].dim_acc;
638 
639  hi_send_array = (void *)((char *)hi_send_array + lb_send * dim_acc * type_size);
640  hi_recv_array = (void *)((char *)hi_recv_array + lb_recv * dim_acc * type_size);
641 
642  }
643 
644  }
645 
646  //
647  // Allocate buffers
648  //
649 
650  if (reflect->prev_pcopy_sched_type == _XMP_COMM_REFLECT &&
651  ((adesc->order == MPI_ORDER_FORTRAN && target_dim == ndims - 1) ||
652  (adesc->order == MPI_ORDER_C && target_dim == 0))){
653  ;
654  }
655  else {
656  _XMP_free(reflect->lo_send_buf);
657  _XMP_free(reflect->lo_recv_buf);
658  _XMP_free(reflect->hi_send_buf);
659  _XMP_free(reflect->hi_recv_buf);
660  }
661 
662  // for lower reflect
663 
664  if (lwidth){
665 
666  lo_buf_size = lwidth * blocklength * count;
667 
668  if (shadow_comm_type == _XMP_COMM_REFLECT &&
669  ((adesc->order == MPI_ORDER_FORTRAN && target_dim == ndims - 1) ||
670  (adesc->order == MPI_ORDER_C && target_dim == 0))){
671  lo_send_buf = lo_send_array;
672  lo_recv_buf = lo_recv_array;
673  }
674  else {
675  _XMP_TSTART(t0);
676  lo_send_buf = _XMP_alloc(lo_buf_size);
677  lo_recv_buf = _XMP_alloc(lo_buf_size);
678  _XMP_TEND2(xmptiming_.t_mem, xmptiming_.tdim_mem[target_dim], t0);
679  }
680 
681  }
682 
683  // for upper reflect
684 
685  if (uwidth){
686 
687  hi_buf_size = uwidth * blocklength * count;
688 
689  if (shadow_comm_type == _XMP_COMM_REFLECT &&
690  ((adesc->order == MPI_ORDER_FORTRAN && target_dim == ndims - 1) ||
691  (adesc->order == MPI_ORDER_C && target_dim == 0))){
692  hi_send_buf = hi_send_array;
693  hi_recv_buf = hi_recv_array;
694  }
695  else {
696  _XMP_TSTART(t0);
697  hi_send_buf = _XMP_alloc(hi_buf_size);
698  hi_recv_buf = _XMP_alloc(hi_buf_size);
699  _XMP_TEND2(xmptiming_.t_mem, xmptiming_.tdim_mem[target_dim], t0);
700  }
701 
702  }
703 
704  //
705  // cache schedule
706  //
707 
708  reflect->count = count;
709  reflect->blocklength = blocklength;
710  reflect->stride = stride;
711 
712  reflect->lo_send_array = lo_send_array;
713  reflect->lo_recv_array = lo_recv_array;
714  reflect->hi_send_array = hi_send_array;
715  reflect->hi_recv_array = hi_recv_array;
716 
717  reflect->lo_send_buf = lo_send_buf;
718  reflect->lo_recv_buf = lo_recv_buf;
719  reflect->hi_send_buf = hi_send_buf;
720  reflect->hi_recv_buf = hi_recv_buf;
721 
722  //
723  // initialize communication
724  //
725 
726  int src, dst;
727 
728  init_comm:
729 
730  if (!is_periodic && my_pos == lb_pos){ // no periodic
731  lo_rank = MPI_PROC_NULL;
732  }
733 
734  if (!is_periodic && my_pos == ub_pos){ // no periodic
735  hi_rank = MPI_PROC_NULL;
736  }
737 
738  lo_buf_size = lwidth * reflect->blocklength * reflect->count;
739  hi_buf_size = uwidth * reflect->blocklength * reflect->count;
740 
741  // for lower shadow
742 
743  if (lwidth){
744  src = lo_rank;
745  dst = hi_rank;
746  }
747  else {
748  src = MPI_PROC_NULL;
749  dst = MPI_PROC_NULL;
750  }
751 
752  if (shadow_comm_type == _XMP_COMM_REDUCE_SHADOW){
753  if (reflect->req_reduce[0] != MPI_REQUEST_NULL){
754  MPI_Request_free(&reflect->req_reduce[0]);
755  }
756 
757  if (reflect->req_reduce[1] != MPI_REQUEST_NULL){
758  MPI_Request_free(&reflect->req_reduce[1]);
759  }
760 
761  MPI_Send_init(reflect->lo_recv_buf, lo_buf_size, MPI_BYTE, src,
762  _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req_reduce[0]);
763  MPI_Recv_init(reflect->lo_send_buf, lo_buf_size, MPI_BYTE, dst,
764  _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req_reduce[1]);
765  }
766  else {
767  if (reflect->req[0] != MPI_REQUEST_NULL){
768  MPI_Request_free(&reflect->req[0]);
769  }
770 
771  if (reflect->req[1] != MPI_REQUEST_NULL){
772  MPI_Request_free(&reflect->req[1]);
773  }
774 
775  MPI_Recv_init(reflect->lo_recv_buf, lo_buf_size, MPI_BYTE, src,
776  _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req[0]);
777  MPI_Send_init(reflect->lo_send_buf, lo_buf_size, MPI_BYTE, dst,
778  _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req[1]);
779  }
780 
781  // for upper shadow
782 
783  if (uwidth){
784  src = hi_rank;
785  dst = lo_rank;
786  }
787  else {
788  src = MPI_PROC_NULL;
789  dst = MPI_PROC_NULL;
790  }
791 
792  if (shadow_comm_type == _XMP_COMM_REDUCE_SHADOW){
793  if (reflect->req_reduce[2] != MPI_REQUEST_NULL){
794  MPI_Request_free(&reflect->req_reduce[2]);
795  }
796 
797  if (reflect->req_reduce[3] != MPI_REQUEST_NULL){
798  MPI_Request_free(&reflect->req_reduce[3]);
799  }
800 
801  MPI_Send_init(reflect->hi_recv_buf, hi_buf_size, MPI_BYTE, src,
802  _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req_reduce[2]);
803  MPI_Recv_init(reflect->hi_send_buf, hi_buf_size, MPI_BYTE, dst,
804  _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req_reduce[3]);
805  }
806  else {
807  if (reflect->req[2] != MPI_REQUEST_NULL){
808  MPI_Request_free(&reflect->req[2]);
809  }
810 
811  if (reflect->req[3] != MPI_REQUEST_NULL){
812  MPI_Request_free(&reflect->req[3]);
813  }
814 
815  MPI_Recv_init(reflect->hi_recv_buf, hi_buf_size, MPI_BYTE, src,
816  _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req[2]);
817  MPI_Send_init(reflect->hi_send_buf, hi_buf_size, MPI_BYTE, dst,
818  _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req[3]);
819  }
820 
821  reflect->prev_pcopy_sched_type = shadow_comm_type;
822 
823  reflect->lo_rank = lo_rank;
824  reflect->hi_rank = hi_rank;
825 
826 }
Here is the call graph for this function:

◆ _XMP_set_reduce_shadow__()

void _XMP_set_reduce_shadow__ ( _XMP_array_t a,
int  dim,
int  lwidth,
int  uwidth,
int  is_periodic 
)
20 {
21  _xmp_set_reduce_shadow_flag = 1;
22  _xmp_lwidth[dim] = lwidth;
23  _xmp_uwidth[dim] = uwidth;
24  _xmp_is_periodic[dim] = is_periodic;
25 }

◆ _XMP_sum_vector()

void _XMP_sum_vector ( int  type,
char *restrict  dst,
char *restrict  src,
int  count,
int  blocklength,
long  stride 
)
102  {
103 
104  if (_xmp_omp_num_procs > 1 && count > 8 * _xmp_omp_num_procs){
105 
106  switch (type){
107 
108  case _XMP_N_TYPE_SHORT:
109 #pragma omp parallel for
110  _XMP_SUM_VECTOR(short);
111  break;
112 
114 #pragma omp parallel for
115  _XMP_SUM_VECTOR(unsigned short);
116  break;
117 
118  case _XMP_N_TYPE_INT:
119 #pragma omp parallel for
120  _XMP_SUM_VECTOR(int);
121  break;
122 
124 #pragma omp parallel for
125  _XMP_SUM_VECTOR(unsigned int);
126  break;
127 
128  case _XMP_N_TYPE_LONG:
129 #pragma omp parallel for
130  _XMP_SUM_VECTOR(long);
131  break;
132 
134 #pragma omp parallel for
135  _XMP_SUM_VECTOR(unsigned long);
136  break;
137 
139 #pragma omp parallel for
140  _XMP_SUM_VECTOR(long long);
141  break;
142 
144 #pragma omp parallel for
145  _XMP_SUM_VECTOR(unsigned long long);
146  break;
147 
148  case _XMP_N_TYPE_FLOAT:
149 #pragma omp parallel for
150  _XMP_SUM_VECTOR(float);
151  break;
152 
153  case _XMP_N_TYPE_DOUBLE:
154 #pragma omp parallel for
155  _XMP_SUM_VECTOR(double);
156  break;
157 
159 #pragma omp parallel for
160  _XMP_SUM_VECTOR(long double);
161  break;
162 
163 #ifdef __STD_IEC_559_COMPLEX__
164 
165  case _XMP_N_TYPE_FLOAT_IMAGINARY:
166 #pragma omp parallel for
167  _XMP_SUM_VECTOR(float imaginary);
168  break;
169 
171 #pragma omp parallel for
172  _XMP_SUM_VECTOR(float complex);
173  break;
174 
175  case _XMP_N_TYPE_DOUBLE_IMAGINARY:
176 #pragma omp parallel for
177  _XMP_SUM_VECTOR(double imaginary);
178  break;
179 
181 #pragma omp parallel for
182  _XMP_SUM_VECTOR(double complex);
183  break;
184 
185  case _XMP_N_TYPE_LONG_DOUBLE_IMAGINARY:
186 #pragma omp parallel for
187  _XMP_SUM_VECTOR(long double imaginary);
188  break;
189 
191 #pragma omp parallel for
192  _XMP_SUM_VECTOR(long double complex);
193  break;
194 
195 #endif
196 
197  case _XMP_N_TYPE_BOOL:
198  case _XMP_N_TYPE_CHAR:
201  default:
202  _XMP_fatal("_XMP_sum_vector: array arguments must be of a numerical type");
203  break;
204  }
205 
206  }
207  else {
208 
209  switch (type){
210 
211  case _XMP_N_TYPE_SHORT:
212  _XMP_SUM_VECTOR(short);
213  break;
214 
216  _XMP_SUM_VECTOR(unsigned short);
217  break;
218 
219  case _XMP_N_TYPE_INT:
220  _XMP_SUM_VECTOR(int);
221  break;
222 
224  _XMP_SUM_VECTOR(unsigned int);
225  break;
226 
227  case _XMP_N_TYPE_LONG:
228  _XMP_SUM_VECTOR(long);
229  break;
230 
232  _XMP_SUM_VECTOR(unsigned long);
233  break;
234 
236  _XMP_SUM_VECTOR(long long);
237  break;
238 
240  _XMP_SUM_VECTOR(unsigned long long);
241  break;
242 
243  case _XMP_N_TYPE_FLOAT:
244  _XMP_SUM_VECTOR(float);
245  break;
246 
247  case _XMP_N_TYPE_DOUBLE:
248  _XMP_SUM_VECTOR(double);
249  break;
250 
252  _XMP_SUM_VECTOR(long double);
253  break;
254 
255 #ifdef __STD_IEC_559_COMPLEX__
256 
257  case _XMP_N_TYPE_FLOAT_IMAGINARY:
258  _XMP_SUM_VECTOR(float imaginary);
259  break;
260 
262  _XMP_SUM_VECTOR(float complex);
263  break;
264 
265  case _XMP_N_TYPE_DOUBLE_IMAGINARY:
266  _XMP_SUM_VECTOR(double imaginary);
267  break;
268 
270  _XMP_SUM_VECTOR(double complex);
271  break;
272 
273  case _XMP_N_TYPE_LONG_DOUBLE_IMAGINARY:
274  _XMP_SUM_VECTOR(long double imaginary);
275  break;
276 
278  _XMP_SUM_VECTOR(long double complex);
279  break;
280 
281 #endif
282 
283  case _XMP_N_TYPE_BOOL:
284  case _XMP_N_TYPE_CHAR:
287  default:
288  _XMP_fatal("_XMP_sum_vector: array arguments must be of a numerical type");
289  break;
290  }
291 
292  }
293 
294 }
Here is the call graph for this function:
Here is the caller graph for this function:
_XMP_array_info_type::align_subscript
long long align_subscript
Definition: xmp_data_struct.h:246
_XMP_reflect_sched_type::is_periodic
int is_periodic
Definition: xmp_data_struct.h:126
_XMP_get_owner_pos
int _XMP_get_owner_pos(_XMP_array_t *a, int dim, int index)
Definition: xmp_reflect.c:1688
_XMP_nodes_info_type::size
int size
Definition: xmp_data_struct.h:32
_XMP_nodes_info_type
Definition: xmp_data_struct.h:31
_XMP_N_TYPE_BOOL
#define _XMP_N_TYPE_BOOL
Definition: xmp_constant.h:80
_XMP_template_type::info
_XMP_template_info_t info[1]
Definition: xmp_data_struct.h:115
_XMP_MAX_ASYNC_REQS
#define _XMP_MAX_ASYNC_REQS
Definition: xmp_data_struct.h:472
_XMP_N_TYPE_INT
#define _XMP_N_TYPE_INT
Definition: xmp_constant.h:85
_XMP_array_info_type::align_template_index
int align_template_index
Definition: xmp_data_struct.h:260
_XMP_template_info_type::ser_lower
long long ser_lower
Definition: xmp_data_struct.h:72
xmp_is_async
_Bool xmp_is_async()
Definition: xmp_async.c:20
_XMP_alloc
void * _XMP_alloc(size_t size)
Definition: xmp_util.c:21
_XMP_array_info_type
Definition: xmp_data_struct.h:194
_XMP_reduce_shadow_sum
void _XMP_reduce_shadow_sum(_XMP_array_t *a)
Definition: xmp_reduce_shadow.c:153
_XMP_template_type::chunk
_XMP_template_chunk_t * chunk
Definition: xmp_data_struct.h:112
_XMP_N_TYPE_DOUBLE
#define _XMP_N_TYPE_DOUBLE
Definition: xmp_constant.h:92
_XMP_array_info_type::ser_upper
int ser_upper
Definition: xmp_data_struct.h:200
_XMP_get_owner_pos
int _XMP_get_owner_pos(_XMP_array_t *a, int dim, int index)
Definition: xmp_reflect.c:1688
_XMP_reflect_sched_type::lo_recv_array
void * lo_recv_array
Definition: xmp_data_struct.h:137
_XMP_reduce_shadow_wait
void _XMP_reduce_shadow_wait(_XMP_array_t *a)
Definition: xmp_reduce_shadow.c:127
_XMP_N_TYPE_DOUBLE_COMPLEX
#define _XMP_N_TYPE_DOUBLE_COMPLEX
Definition: xmp_constant.h:102
_XMP_reflect_sched_type::lo_recv_buf
void * lo_recv_buf
Definition: xmp_data_struct.h:134
_XMP_array_info_type::shadow_type
int shadow_type
Definition: xmp_data_struct.h:248
_XMP_N_TYPE_LONG_DOUBLE
#define _XMP_N_TYPE_LONG_DOUBLE
Definition: xmp_constant.h:93
_XMP_reflect_sched_type
Definition: xmp_data_struct.h:119
_XMP_async_comm::type
int type
Definition: xmp_data_struct.h:463
_XMP_template_chunk_type::onto_nodes_info
_XMP_nodes_info_t * onto_nodes_info
Definition: xmp_data_struct.h:94
_XMP_reflect_sched_type::req
MPI_Request req[4]
Definition: xmp_data_struct.h:131
_XMP_N_TYPE_NONBASIC
#define _XMP_N_TYPE_NONBASIC
Definition: xmp_constant.h:104
_XMP_reflect_sched_type::hi_rank
int hi_rank
Definition: xmp_data_struct.h:143
_XMP_nodes_type::comm_rank
int comm_rank
Definition: xmp_data_struct.h:52
_xmp_omp_num_procs
int _xmp_omp_num_procs
Definition: xmp_pack_vector.c:7
_XMP_template_chunk_type::par_chunk_width
unsigned long long par_chunk_width
Definition: xmp_data_struct.h:86
_XMP_COMM_REDUCE_SHADOW
#define _XMP_COMM_REDUCE_SHADOW
Definition: xmp_constant.h:137
_XMP_reflect_sched_type::hi_send_buf
void * hi_send_buf
Definition: xmp_data_struct.h:135
_XMP_N_TYPE_UNSIGNED_LONGLONG
#define _XMP_N_TYPE_UNSIGNED_LONGLONG
Definition: xmp_constant.h:90
_XMP_template_type::onto_nodes
_XMP_nodes_t * onto_nodes
Definition: xmp_data_struct.h:111
_XMP_COMM_REFLECT
#define _XMP_COMM_REFLECT
Definition: xmp_constant.h:136
_XMP_array_info_type::shadow_size_lo
int shadow_size_lo
Definition: xmp_data_struct.h:249
_XMP_TSTART
#define _XMP_TSTART(t0)
Definition: xmp_internal.h:747
_XMP_array_type::align_template
_XMP_template_t * align_template
Definition: xmp_data_struct.h:312
_XMP_N_TYPE_LONG
#define _XMP_N_TYPE_LONG
Definition: xmp_constant.h:87
_XMP_N_TYPE_SHORT
#define _XMP_N_TYPE_SHORT
Definition: xmp_constant.h:83
_XMP_N_TYPE_FLOAT_COMPLEX
#define _XMP_N_TYPE_FLOAT_COMPLEX
Definition: xmp_constant.h:101
_XMP_N_TYPE_FLOAT
#define _XMP_N_TYPE_FLOAT
Definition: xmp_constant.h:91
_XMP_N_TYPE_UNSIGNED_INT
#define _XMP_N_TYPE_UNSIGNED_INT
Definition: xmp_constant.h:86
_XMP_array_info_type::align_manner
int align_manner
Definition: xmp_data_struct.h:197
_XMP_get_current_async
_XMP_async_comm_t * _XMP_get_current_async()
Definition: xmp_async.c:205
_XMP_N_TYPE_UNSIGNED_LONG
#define _XMP_N_TYPE_UNSIGNED_LONG
Definition: xmp_constant.h:88
_XMP_N_SHADOW_FULL
#define _XMP_N_SHADOW_FULL
Definition: xmp_constant.h:66
_XMP_reflect_sched_type::blocklength
int blocklength
Definition: xmp_data_struct.h:140
_XMP_array_info_type::ser_lower
int ser_lower
Definition: xmp_data_struct.h:199
_XMP_reflect_sched_type::stride
long long stride
Definition: xmp_data_struct.h:141
_XMP_reflect_sched_type::hi_width
int hi_width
Definition: xmp_data_struct.h:125
_XMP_pack_vector
void _XMP_pack_vector(char *restrict dst, char *restrict src, int count, int blocklength, long stride)
Definition: xmp_pack_vector.c:9
_XMP_async_comm::nreqs
int nreqs
Definition: xmp_data_struct.h:460
_XMP_array_type::order
int order
Definition: xmp_data_struct.h:276
_XMP_array_info_type::local_lower
int local_lower
Definition: xmp_data_struct.h:209
_XMP_N_MPI_TAG_REFLECT_HI
#define _XMP_N_MPI_TAG_REFLECT_HI
Definition: xmp_constant.h:12
_XMP_array_info_type::shadow_size_hi
int shadow_size_hi
Definition: xmp_data_struct.h:250
_XMP_reflect_sched_type::lo_rank
int lo_rank
Definition: xmp_data_struct.h:143
_XMP_reflect_pcopy_sched_dim
void _XMP_reflect_pcopy_sched_dim(_XMP_array_t *adesc, int target_dim, int lwidth, int uwidth, int is_periodic, int shadow_comm_type)
Definition: xmp_reflect.c:468
_XMP_N_TYPE_UNSIGNED_SHORT
#define _XMP_N_TYPE_UNSIGNED_SHORT
Definition: xmp_constant.h:84
_XMP_N_TYPE_CHAR
#define _XMP_N_TYPE_CHAR
Definition: xmp_constant.h:81
_XMP_N_TYPE_UNSIGNED_CHAR
#define _XMP_N_TYPE_UNSIGNED_CHAR
Definition: xmp_constant.h:82
_XMP_async_comm::reqs
MPI_Request * reqs
Definition: xmp_data_struct.h:464
_XMP_async_comm
Definition: xmp_data_struct.h:458
_XMP_nodes_info_type::rank
int rank
Definition: xmp_data_struct.h:35
_XMP_array_type::type_size
size_t type_size
Definition: xmp_data_struct.h:274
_XMP_array_info_type::alloc_size
int alloc_size
Definition: xmp_data_struct.h:212
_XMP_N_ALIGN_BLOCK
#define _XMP_N_ALIGN_BLOCK
Definition: xmp_constant.h:37
_XMP_array_type::info
_XMP_array_info_t info[1]
Definition: xmp_data_struct.h:313
_XMP_reflect_sched_type::hi_recv_array
void * hi_recv_array
Definition: xmp_data_struct.h:138
_XMP_free
void _XMP_free(void *p)
Definition: xmp_util.c:37
_XMP_reflect_sched_type::req_reduce
MPI_Request req_reduce[4]
Definition: xmp_data_struct.h:132
_XMP_ASSERT
#define _XMP_ASSERT(_flag)
Definition: xmp_internal.h:34
_XMP_N_TYPE_LONG_DOUBLE_COMPLEX
#define _XMP_N_TYPE_LONG_DOUBLE_COMPLEX
Definition: xmp_constant.h:103
_XMP_reflect_sched_type::lo_send_array
void * lo_send_array
Definition: xmp_data_struct.h:137
_XMP_nodes_info_type::multiplier
int multiplier
Definition: xmp_data_struct.h:37
_XMP_sum_vector
void _XMP_sum_vector(int type, char *restrict dst, char *restrict src, int count, int blocklength, long stride)
Definition: xmp_pack_vector.c:101
_XMP_async_comm::a
_XMP_array_t * a
Definition: xmp_data_struct.h:467
_XMP_array_type::array_addr_p
void * array_addr_p
Definition: xmp_data_struct.h:279
_XMP_nodes_type::comm
_XMP_comm_t * comm
Definition: xmp_data_struct.h:53
_XMP_array_type::dim
int dim
Definition: xmp_data_struct.h:272
_XMP_fatal
void _XMP_fatal(char *msg)
Definition: xmp_util.c:42
_XMP_reflect_sched_type::prev_pcopy_sched_type
int prev_pcopy_sched_type
Definition: xmp_data_struct.h:123
_XMP_array_info_type::dim_acc
unsigned long long dim_acc
Definition: xmp_data_struct.h:242
_XMP_array_info_type::reflect_sched
_XMP_reflect_sched_t * reflect_sched
Definition: xmp_data_struct.h:252
_XMP_N_MPI_TAG_REFLECT_LO
#define _XMP_N_MPI_TAG_REFLECT_LO
Definition: xmp_constant.h:11
_XMP_reflect_sched_type::reduce_is_initialized
int reduce_is_initialized
Definition: xmp_data_struct.h:122
_XMP_N_TYPE_LONGLONG
#define _XMP_N_TYPE_LONGLONG
Definition: xmp_constant.h:89
_XMP_reflect_sched_type::count
int count
Definition: xmp_data_struct.h:140
_XMP_reflect_sched_type::lo_send_buf
void * lo_send_buf
Definition: xmp_data_struct.h:134
_XMP_reflect_sched_type::hi_recv_buf
void * hi_recv_buf
Definition: xmp_data_struct.h:135
_XMP_N_SHADOW_NORMAL
#define _XMP_N_SHADOW_NORMAL
Definition: xmp_constant.h:65
_XMP_TEND2
#define _XMP_TEND2(t, tt, t0)
Definition: xmp_internal.h:749
_XMP_N_SHADOW_NONE
#define _XMP_N_SHADOW_NONE
Definition: xmp_constant.h:64
_XMP_reflect_sched_type::hi_send_array
void * hi_send_array
Definition: xmp_data_struct.h:138
_XMP_array_type::type
int type
Definition: xmp_data_struct.h:273
_XMP_N_ALIGN_GBLOCK
#define _XMP_N_ALIGN_GBLOCK
Definition: xmp_constant.h:40
_XMP_template_chunk_type::mapping_array
long long * mapping_array
Definition: xmp_data_struct.h:88
_XMP_array_type::is_allocated
_Bool is_allocated
Definition: xmp_data_struct.h:270
_XMP_SUM_VECTOR
#define _XMP_SUM_VECTOR(_type)
Definition: xmp_pack_vector.c:94
_XMP_reflect_sched_type::lo_width
int lo_width
Definition: xmp_data_struct.h:125
_XMP_reflect_pack_dim
void _XMP_reflect_pack_dim(_XMP_array_t *a, int i, int *lwidth, int *uwidth, int *is_periodic, int shadow_comm_type)
Definition: xmp_reflect.c:1768
_XMP_array_info_type::is_shadow_comm_member
_Bool is_shadow_comm_member
Definition: xmp_data_struct.h:195
_XMP_array_info_type::local_upper
int local_upper
Definition: xmp_data_struct.h:210