libxmp/libxmpf in Omni Compiler
1.3.4
|
|
void | _XMP_reflect_pcopy_sched_dim (_XMP_array_t *adesc, int target_dim, int lwidth, int uwidth, int is_periodic, int shadow_comm_type) |
|
void | _XMP_reflect_pack_dim (_XMP_array_t *a, int i, int *lwidth, int *uwidth, int *is_periodic, int shadow_comm_type) |
|
void | _XMP_sum_vector (int type, char *restrict dst, char *restrict src, int count, int blocklength, long stride) |
|
int | _XMP_get_owner_pos (_XMP_array_t *a, int dim, int index) |
|
void | _XMP_set_reduce_shadow__ (_XMP_array_t *a, int dim, int lwidth, int uwidth, int is_periodic) |
|
void | _XMP_reduce_shadow__ (_XMP_array_t *a) |
|
void | _XMP_reduce_shadow_wait (_XMP_array_t *a) |
|
void | _XMP_reduce_shadow_sum (_XMP_array_t *a) |
|
◆ _XMP_get_owner_pos()
int _XMP_get_owner_pos |
( |
_XMP_array_t * |
a, |
|
|
int |
dim, |
|
|
int |
index |
|
) |
| |
1700 pos = (index + align_offset - tlb) / chunk;
1705 int tpos = index + align_offset;
1708 for (
int i = 0; i < np; i++){
1709 if (m[i] <= tpos && tpos < m[i+1]){
◆ _XMP_reduce_shadow__()
32 MPI_Request *reqs = NULL;
42 _xmp_set_reduce_shadow_flag = 0;
46 if (!_xmp_set_reduce_shadow_flag){
47 for (
int i = 0; i < a->
dim; i++){
51 _xmp_is_periodic[i] = 0;
55 for (
int i = 0; i < a->
dim; i++){
66 if (_xmp_lwidth[i] || _xmp_uwidth[i]){
71 _xmp_lwidth[i] != shadow_sched->
lo_width ||
72 _xmp_uwidth[i] != shadow_sched->
hi_width ||
78 shadow_sched->
lo_width = _xmp_lwidth[i];
79 shadow_sched->
hi_width = _xmp_uwidth[i];
86 if (shadow_sched->
req_reduce[0] != MPI_REQUEST_NULL)
91 _XMP_fatal(
"too many arrays in an asynchronous reflect/reduce_shadow");
93 memcpy(&reqs[nreqs], shadow_sched->
req_reduce, 4 *
sizeof(MPI_Request));
113 async->
nreqs += nreqs;
117 _xmp_set_reduce_shadow_flag = 0;
118 for (
int i = 0; i < a->
dim; i++){
121 _xmp_is_periodic[i] = 0;
◆ _XMP_reduce_shadow_sum()
157 for (
int i = 0; i < a->
dim; i++){
162 if (!shadow_sched)
continue;
164 int lwidth = shadow_sched->
lo_width;
165 int uwidth = shadow_sched->
hi_width;
176 if (lwidth && (is_periodic || my_pos != ub_pos)){
181 shadow_sched->
stride / type_size);
185 if (uwidth && (is_periodic || my_pos != lb_pos)){
190 shadow_sched->
stride / type_size);
◆ _XMP_reduce_shadow_wait()
129 for (
int i = 0; i < a->
dim; i++){
134 if (!shadow_sched)
continue;
136 int lwidth = shadow_sched->
lo_width;
137 int uwidth = shadow_sched->
hi_width;
139 if (!lwidth && !uwidth)
continue;
142 MPI_Waitall(4, shadow_sched->
req_reduce, MPI_STATUSES_IGNORE);
◆ _XMP_reflect_pack_dim()
void _XMP_reflect_pack_dim |
( |
_XMP_array_t * |
a, |
|
|
int |
i, |
|
|
int * |
lwidth, |
|
|
int * |
uwidth, |
|
|
int * |
is_periodic, |
|
|
int |
shadow_comm_type |
|
) |
| |
1772 char *pack_dst_lo, *pack_src_lo;
int dst_lo;
1773 char *pack_dst_hi, *pack_src_hi;
int dst_hi;
1776 if (a->
order == MPI_ORDER_FORTRAN){
1777 if (i == a->
dim - 1)
return;
1779 else if (a->
order == MPI_ORDER_C){
1783 _XMP_fatal(
"cannot determin the base language.");
1811 if (lwidth[i] && dst_lo != MPI_PROC_NULL){
1818 if (uwidth[i] && dst_hi != MPI_PROC_NULL){
◆ _XMP_reflect_pcopy_sched_dim()
void _XMP_reflect_pcopy_sched_dim |
( |
_XMP_array_t * |
adesc, |
|
|
int |
target_dim, |
|
|
int |
lwidth, |
|
|
int |
uwidth, |
|
|
int |
is_periodic, |
|
|
int |
shadow_comm_type |
|
) |
| |
471 if (lwidth == 0 && uwidth == 0)
return;
479 _XMP_fatal(
"reflect width is larger than shadow width.");
487 if (ni->
size == 1 && !is_periodic)
return;
489 int ndims = adesc->
dim;
492 int my_pos = ni->
rank;
496 int lo_pos = (my_pos == lb_pos) ? ub_pos : my_pos - 1;
497 int hi_pos = (my_pos == ub_pos) ? lb_pos : my_pos + 1;
502 int lo_rank = my_rank + (lo_pos - my_pos) * ni->
multiplier;
503 int hi_rank = my_rank + (hi_pos - my_pos) * ni->
multiplier;
505 int count = 0, blocklength = 0;
506 long long stride = 0;
511 void *lo_send_array = NULL, *lo_recv_array = NULL;
512 void *hi_send_array = NULL, *hi_recv_array = NULL;
514 void *lo_send_buf = NULL;
515 void *lo_recv_buf = NULL;
516 void *hi_send_buf = NULL;
517 void *hi_recv_buf = NULL;
526 if ((adesc->
order == MPI_ORDER_FORTRAN && target_dim != ndims - 1) ||
527 (adesc->
order == MPI_ORDER_C && target_dim != 0)){
531 count = reflect->
count;
542 if (adesc->
order == MPI_ORDER_FORTRAN){
545 blocklength = type_size;
548 for (
int i = ndims - 2; i >= target_dim; i--){
552 for (
int i = 1; i <= target_dim; i++){
558 else if (adesc->
order == MPI_ORDER_C){
561 blocklength = type_size;
562 stride = ainfo[ndims-1].
alloc_size * type_size;
564 for (
int i = 1; i <= target_dim; i++){
568 for (
int i = ndims - 2; i >= target_dim; i--){
575 _XMP_fatal(
"cannot determin the base language.");
588 lo_send_array = array_addr;
589 lo_recv_array = array_addr;
591 for (
int i = 0; i < ndims; i++) {
593 int lb_send, lb_recv;
594 unsigned long long dim_acc;
596 if (i == target_dim) {
608 lo_send_array = (
void *)((
char *)lo_send_array + lb_send * dim_acc * type_size);
609 lo_recv_array = (
void *)((
char *)lo_recv_array + lb_recv * dim_acc * type_size);
619 hi_send_array = array_addr;
620 hi_recv_array = array_addr;
622 for (
int i = 0; i < ndims; i++) {
624 int lb_send, lb_recv;
625 unsigned long long dim_acc;
627 if (i == target_dim) {
639 hi_send_array = (
void *)((
char *)hi_send_array + lb_send * dim_acc * type_size);
640 hi_recv_array = (
void *)((
char *)hi_recv_array + lb_recv * dim_acc * type_size);
651 ((adesc->
order == MPI_ORDER_FORTRAN && target_dim == ndims - 1) ||
652 (adesc->
order == MPI_ORDER_C && target_dim == 0))){
666 lo_buf_size = lwidth * blocklength * count;
669 ((adesc->
order == MPI_ORDER_FORTRAN && target_dim == ndims - 1) ||
670 (adesc->
order == MPI_ORDER_C && target_dim == 0))){
671 lo_send_buf = lo_send_array;
672 lo_recv_buf = lo_recv_array;
678 _XMP_TEND2(xmptiming_.t_mem, xmptiming_.tdim_mem[target_dim], t0);
687 hi_buf_size = uwidth * blocklength * count;
690 ((adesc->
order == MPI_ORDER_FORTRAN && target_dim == ndims - 1) ||
691 (adesc->
order == MPI_ORDER_C && target_dim == 0))){
692 hi_send_buf = hi_send_array;
693 hi_recv_buf = hi_recv_array;
699 _XMP_TEND2(xmptiming_.t_mem, xmptiming_.tdim_mem[target_dim], t0);
708 reflect->
count = count;
730 if (!is_periodic && my_pos == lb_pos){
731 lo_rank = MPI_PROC_NULL;
734 if (!is_periodic && my_pos == ub_pos){
735 hi_rank = MPI_PROC_NULL;
753 if (reflect->
req_reduce[0] != MPI_REQUEST_NULL){
757 if (reflect->
req_reduce[1] != MPI_REQUEST_NULL){
761 MPI_Send_init(reflect->
lo_recv_buf, lo_buf_size, MPI_BYTE, src,
763 MPI_Recv_init(reflect->
lo_send_buf, lo_buf_size, MPI_BYTE, dst,
767 if (reflect->
req[0] != MPI_REQUEST_NULL){
768 MPI_Request_free(&reflect->
req[0]);
771 if (reflect->
req[1] != MPI_REQUEST_NULL){
772 MPI_Request_free(&reflect->
req[1]);
775 MPI_Recv_init(reflect->
lo_recv_buf, lo_buf_size, MPI_BYTE, src,
777 MPI_Send_init(reflect->
lo_send_buf, lo_buf_size, MPI_BYTE, dst,
793 if (reflect->
req_reduce[2] != MPI_REQUEST_NULL){
797 if (reflect->
req_reduce[3] != MPI_REQUEST_NULL){
801 MPI_Send_init(reflect->
hi_recv_buf, hi_buf_size, MPI_BYTE, src,
803 MPI_Recv_init(reflect->
hi_send_buf, hi_buf_size, MPI_BYTE, dst,
807 if (reflect->
req[2] != MPI_REQUEST_NULL){
808 MPI_Request_free(&reflect->
req[2]);
811 if (reflect->
req[3] != MPI_REQUEST_NULL){
812 MPI_Request_free(&reflect->
req[3]);
815 MPI_Recv_init(reflect->
hi_recv_buf, hi_buf_size, MPI_BYTE, src,
817 MPI_Send_init(reflect->
hi_send_buf, hi_buf_size, MPI_BYTE, dst,
◆ _XMP_set_reduce_shadow__()
void _XMP_set_reduce_shadow__ |
( |
_XMP_array_t * |
a, |
|
|
int |
dim, |
|
|
int |
lwidth, |
|
|
int |
uwidth, |
|
|
int |
is_periodic |
|
) |
| |
21 _xmp_set_reduce_shadow_flag = 1;
22 _xmp_lwidth[dim] = lwidth;
23 _xmp_uwidth[dim] = uwidth;
24 _xmp_is_periodic[dim] = is_periodic;
◆ _XMP_sum_vector()
void _XMP_sum_vector |
( |
int |
type, |
|
|
char *restrict |
dst, |
|
|
char *restrict |
src, |
|
|
int |
count, |
|
|
int |
blocklength, |
|
|
long |
stride |
|
) |
| |
109 #pragma omp parallel for
114 #pragma omp parallel for
119 #pragma omp parallel for
124 #pragma omp parallel for
129 #pragma omp parallel for
134 #pragma omp parallel for
139 #pragma omp parallel for
144 #pragma omp parallel for
149 #pragma omp parallel for
154 #pragma omp parallel for
159 #pragma omp parallel for
163 #ifdef __STD_IEC_559_COMPLEX__
165 case _XMP_N_TYPE_FLOAT_IMAGINARY:
166 #pragma omp parallel for
171 #pragma omp parallel for
175 case _XMP_N_TYPE_DOUBLE_IMAGINARY:
176 #pragma omp parallel for
181 #pragma omp parallel for
185 case _XMP_N_TYPE_LONG_DOUBLE_IMAGINARY:
186 #pragma omp parallel for
191 #pragma omp parallel for
202 _XMP_fatal(
"_XMP_sum_vector: array arguments must be of a numerical type");
255 #ifdef __STD_IEC_559_COMPLEX__
257 case _XMP_N_TYPE_FLOAT_IMAGINARY:
265 case _XMP_N_TYPE_DOUBLE_IMAGINARY:
273 case _XMP_N_TYPE_LONG_DOUBLE_IMAGINARY:
288 _XMP_fatal(
"_XMP_sum_vector: array arguments must be of a numerical type");
long long align_subscript
Definition: xmp_data_struct.h:246
int is_periodic
Definition: xmp_data_struct.h:126
int _XMP_get_owner_pos(_XMP_array_t *a, int dim, int index)
Definition: xmp_reflect.c:1688
int size
Definition: xmp_data_struct.h:32
Definition: xmp_data_struct.h:31
#define _XMP_N_TYPE_BOOL
Definition: xmp_constant.h:80
_XMP_template_info_t info[1]
Definition: xmp_data_struct.h:115
#define _XMP_MAX_ASYNC_REQS
Definition: xmp_data_struct.h:472
#define _XMP_N_TYPE_INT
Definition: xmp_constant.h:85
int align_template_index
Definition: xmp_data_struct.h:260
long long ser_lower
Definition: xmp_data_struct.h:72
_Bool xmp_is_async()
Definition: xmp_async.c:20
void * _XMP_alloc(size_t size)
Definition: xmp_util.c:21
Definition: xmp_data_struct.h:194
void _XMP_reduce_shadow_sum(_XMP_array_t *a)
Definition: xmp_reduce_shadow.c:153
_XMP_template_chunk_t * chunk
Definition: xmp_data_struct.h:112
#define _XMP_N_TYPE_DOUBLE
Definition: xmp_constant.h:92
int ser_upper
Definition: xmp_data_struct.h:200
int _XMP_get_owner_pos(_XMP_array_t *a, int dim, int index)
Definition: xmp_reflect.c:1688
void * lo_recv_array
Definition: xmp_data_struct.h:137
void _XMP_reduce_shadow_wait(_XMP_array_t *a)
Definition: xmp_reduce_shadow.c:127
#define _XMP_N_TYPE_DOUBLE_COMPLEX
Definition: xmp_constant.h:102
void * lo_recv_buf
Definition: xmp_data_struct.h:134
int shadow_type
Definition: xmp_data_struct.h:248
#define _XMP_N_TYPE_LONG_DOUBLE
Definition: xmp_constant.h:93
Definition: xmp_data_struct.h:119
int type
Definition: xmp_data_struct.h:463
_XMP_nodes_info_t * onto_nodes_info
Definition: xmp_data_struct.h:94
MPI_Request req[4]
Definition: xmp_data_struct.h:131
#define _XMP_N_TYPE_NONBASIC
Definition: xmp_constant.h:104
int hi_rank
Definition: xmp_data_struct.h:143
int comm_rank
Definition: xmp_data_struct.h:52
int _xmp_omp_num_procs
Definition: xmp_pack_vector.c:7
unsigned long long par_chunk_width
Definition: xmp_data_struct.h:86
#define _XMP_COMM_REDUCE_SHADOW
Definition: xmp_constant.h:137
void * hi_send_buf
Definition: xmp_data_struct.h:135
#define _XMP_N_TYPE_UNSIGNED_LONGLONG
Definition: xmp_constant.h:90
_XMP_nodes_t * onto_nodes
Definition: xmp_data_struct.h:111
#define _XMP_COMM_REFLECT
Definition: xmp_constant.h:136
int shadow_size_lo
Definition: xmp_data_struct.h:249
#define _XMP_TSTART(t0)
Definition: xmp_internal.h:747
_XMP_template_t * align_template
Definition: xmp_data_struct.h:312
#define _XMP_N_TYPE_LONG
Definition: xmp_constant.h:87
#define _XMP_N_TYPE_SHORT
Definition: xmp_constant.h:83
#define _XMP_N_TYPE_FLOAT_COMPLEX
Definition: xmp_constant.h:101
#define _XMP_N_TYPE_FLOAT
Definition: xmp_constant.h:91
#define _XMP_N_TYPE_UNSIGNED_INT
Definition: xmp_constant.h:86
int align_manner
Definition: xmp_data_struct.h:197
_XMP_async_comm_t * _XMP_get_current_async()
Definition: xmp_async.c:205
#define _XMP_N_TYPE_UNSIGNED_LONG
Definition: xmp_constant.h:88
#define _XMP_N_SHADOW_FULL
Definition: xmp_constant.h:66
int blocklength
Definition: xmp_data_struct.h:140
int ser_lower
Definition: xmp_data_struct.h:199
long long stride
Definition: xmp_data_struct.h:141
int hi_width
Definition: xmp_data_struct.h:125
void _XMP_pack_vector(char *restrict dst, char *restrict src, int count, int blocklength, long stride)
Definition: xmp_pack_vector.c:9
int nreqs
Definition: xmp_data_struct.h:460
int order
Definition: xmp_data_struct.h:276
int local_lower
Definition: xmp_data_struct.h:209
#define _XMP_N_MPI_TAG_REFLECT_HI
Definition: xmp_constant.h:12
int shadow_size_hi
Definition: xmp_data_struct.h:250
int lo_rank
Definition: xmp_data_struct.h:143
void _XMP_reflect_pcopy_sched_dim(_XMP_array_t *adesc, int target_dim, int lwidth, int uwidth, int is_periodic, int shadow_comm_type)
Definition: xmp_reflect.c:468
#define _XMP_N_TYPE_UNSIGNED_SHORT
Definition: xmp_constant.h:84
#define _XMP_N_TYPE_CHAR
Definition: xmp_constant.h:81
#define _XMP_N_TYPE_UNSIGNED_CHAR
Definition: xmp_constant.h:82
MPI_Request * reqs
Definition: xmp_data_struct.h:464
Definition: xmp_data_struct.h:458
int rank
Definition: xmp_data_struct.h:35
size_t type_size
Definition: xmp_data_struct.h:274
int alloc_size
Definition: xmp_data_struct.h:212
#define _XMP_N_ALIGN_BLOCK
Definition: xmp_constant.h:37
_XMP_array_info_t info[1]
Definition: xmp_data_struct.h:313
void * hi_recv_array
Definition: xmp_data_struct.h:138
void _XMP_free(void *p)
Definition: xmp_util.c:37
MPI_Request req_reduce[4]
Definition: xmp_data_struct.h:132
#define _XMP_ASSERT(_flag)
Definition: xmp_internal.h:34
#define _XMP_N_TYPE_LONG_DOUBLE_COMPLEX
Definition: xmp_constant.h:103
void * lo_send_array
Definition: xmp_data_struct.h:137
int multiplier
Definition: xmp_data_struct.h:37
void _XMP_sum_vector(int type, char *restrict dst, char *restrict src, int count, int blocklength, long stride)
Definition: xmp_pack_vector.c:101
_XMP_array_t * a
Definition: xmp_data_struct.h:467
void * array_addr_p
Definition: xmp_data_struct.h:279
_XMP_comm_t * comm
Definition: xmp_data_struct.h:53
int dim
Definition: xmp_data_struct.h:272
void _XMP_fatal(char *msg)
Definition: xmp_util.c:42
int prev_pcopy_sched_type
Definition: xmp_data_struct.h:123
unsigned long long dim_acc
Definition: xmp_data_struct.h:242
_XMP_reflect_sched_t * reflect_sched
Definition: xmp_data_struct.h:252
#define _XMP_N_MPI_TAG_REFLECT_LO
Definition: xmp_constant.h:11
int reduce_is_initialized
Definition: xmp_data_struct.h:122
#define _XMP_N_TYPE_LONGLONG
Definition: xmp_constant.h:89
int count
Definition: xmp_data_struct.h:140
void * lo_send_buf
Definition: xmp_data_struct.h:134
void * hi_recv_buf
Definition: xmp_data_struct.h:135
#define _XMP_N_SHADOW_NORMAL
Definition: xmp_constant.h:65
#define _XMP_TEND2(t, tt, t0)
Definition: xmp_internal.h:749
#define _XMP_N_SHADOW_NONE
Definition: xmp_constant.h:64
void * hi_send_array
Definition: xmp_data_struct.h:138
int type
Definition: xmp_data_struct.h:273
#define _XMP_N_ALIGN_GBLOCK
Definition: xmp_constant.h:40
long long * mapping_array
Definition: xmp_data_struct.h:88
_Bool is_allocated
Definition: xmp_data_struct.h:270
#define _XMP_SUM_VECTOR(_type)
Definition: xmp_pack_vector.c:94
int lo_width
Definition: xmp_data_struct.h:125
void _XMP_reflect_pack_dim(_XMP_array_t *a, int i, int *lwidth, int *uwidth, int *is_periodic, int shadow_comm_type)
Definition: xmp_reflect.c:1768
_Bool is_shadow_comm_member
Definition: xmp_data_struct.h:195
int local_upper
Definition: xmp_data_struct.h:210