libxmp/libxmpf in Omni Compiler
1.3.4
|
#include "xmp_internal.h"
#include <string.h>
#include <stdio.h>
#include <math.h>
|
void | _XMP_reflect_pcopy_sched_dim (_XMP_array_t *adesc, int target_dim, int lwidth, int uwidth, int is_periodic, int shadow_comm_type) |
|
int | _XMP_get_owner_pos (_XMP_array_t *a, int dim, int index) |
|
void | _XMP_reflect_pack_dim (_XMP_array_t *a, int i, int *lwidth, int *uwidth, int *is_periodic, int shadow_comm_type) |
|
void | _XMP_reflect_async_cardinal (_XMP_array_t *a, int async_id) |
|
void | _XMP_reflect_async_ordinal (_XMP_array_t *a, int async_id) |
|
void | xmp_dbg_printf (char *fmt,...) |
|
void | _XMP_set_reflect__ (_XMP_array_t *a, int dim, int lwidth, int uwidth, int is_periodic) |
|
void | _XMP_reflect__ (_XMP_array_t *a) |
|
void | _XMP_reflect_async__ (_XMP_array_t *a, int async_id) |
|
◆ _XMP_get_owner_pos()
int _XMP_get_owner_pos |
( |
_XMP_array_t * |
a, |
|
|
int |
dim, |
|
|
int |
index |
|
) |
| |
1700 pos = (index + align_offset - tlb) / chunk;
1705 int tpos = index + align_offset;
1708 for (
int i = 0; i < np; i++){
1709 if (m[i] <= tpos && tpos < m[i+1]){
◆ _XMP_reflect__()
126 _xmp_set_reflect_flag = 0;
130 if (!_xmp_set_reflect_flag){
131 for (
int i = 0; i < a->
dim; i++){
135 _xmp_is_periodic[i] = 0;
140 for (
int i = 0; i < a->
dim; i++){
151 if (_xmp_lwidth[i] || _xmp_uwidth[i]){
175 _xmp_lwidth[i] != reflect->
lo_width ||
176 _xmp_uwidth[i] != reflect->
hi_width ||
183 _XMP_reflect_normal_sched_dim(a, i, _xmp_lwidth[i], _xmp_uwidth[i], _xmp_is_periodic[i]);
199 if (reflect->
req[0] != MPI_REQUEST_NULL)
200 MPI_Startall(4, reflect->
req);
201 _XMP_TEND2(xmptiming_.t_comm, xmptiming_.tdim_comm[i], t0);
205 MPI_Waitall(4, reflect->
req, MPI_STATUSES_IGNORE);
206 _XMP_TEND2(xmptiming_.t_wait, xmptiming_.tdim_wait[i], t0);
209 _XMP_reflect_unpack_dim(a, i, _xmp_lwidth, _xmp_uwidth, _xmp_is_periodic);
226 _XMP_reflect_wait(a, _xmp_lwidth, _xmp_uwidth, _xmp_is_periodic);
229 _xmp_set_reflect_flag = 0;
230 for (
int i = 0; i < a->
dim; i++){
233 _xmp_is_periodic[i] = 0;
◆ _XMP_reflect_async__()
void _XMP_reflect_async__ |
( |
_XMP_array_t * |
a, |
|
|
int |
async_id |
|
) |
| |
948 _xmp_set_reflect_flag = 0;
952 if (!_xmp_set_reflect_flag){
953 for (
int i = 0; i < a->
dim; i++){
956 _XMP_fatal(
"asynchronous reflect for full shadow not supported.");
960 _xmp_is_periodic[i] = 0;
964 int reflect_ndims = 0;
965 for (
int i = 0; i < a->
dim; i++){
966 if (_xmp_lwidth[i] || _xmp_uwidth[i]){
971 if (reflect_ndims == 0){
974 else if (reflect_ndims == 1 || !is_ordinal){
981 _xmp_set_reflect_flag = 0;
982 for (
int i = 0; i < a->
dim; i++){
985 _xmp_is_periodic[i] = 0;
◆ _XMP_reflect_async_cardinal()
void _XMP_reflect_async_cardinal |
( |
_XMP_array_t * |
a, |
|
|
int |
async_id |
|
) |
| |
995 MPI_Request *reqs = &async->
reqs[async->
nreqs];
999 for (
int i = 0; i < a->
dim; i++){
1010 if (_xmp_lwidth[i] || _xmp_uwidth[i]){
1015 _xmp_lwidth[i] != reflect->
lo_width ||
1016 _xmp_uwidth[i] != reflect->
hi_width ||
1019 reflect->
lo_width = _xmp_lwidth[i];
1020 reflect->
hi_width = _xmp_uwidth[i];
1023 _XMP_reflect_normal_sched_dim(a, i, _xmp_lwidth[i], _xmp_uwidth[i], _xmp_is_periodic[i]);
1028 _XMP_fatal(
"too many arrays in an asynchronous reflect");
1030 memcpy(&reqs[nreqs], reflect->
req, 4 *
sizeof(MPI_Request));
1034 if (reflect->
req[0] != MPI_REQUEST_NULL)
1035 MPI_Startall(4, reflect->
req);
1036 _XMP_TEND2(xmptiming_.t_comm, xmptiming_.tdim_comm[i], t0);
1048 async->
nreqs += nreqs;
◆ _XMP_reflect_async_ordinal()
void _XMP_reflect_async_ordinal |
( |
_XMP_array_t * |
a, |
|
|
int |
async_id |
|
) |
| |
1060 _Bool reusable_sched =
false;
1063 int max_nreqs = (pow(3, n) - 1) * 2;
1065 async_reflect->
datatype = (MPI_Datatype *)
_XMP_alloc(
sizeof(MPI_Datatype) * max_nreqs);
1066 async_reflect->
reqs = (MPI_Request *)
_XMP_alloc(
sizeof(MPI_Request) * max_nreqs);
1067 for (
int i = 0; i < max_nreqs; i++){
1068 async_reflect->
datatype[i] = MPI_DATATYPE_NULL;
1069 async_reflect->
reqs[i] = MPI_REQUEST_NULL;
1071 async_reflect->
nreqs = 0;
1075 reusable_sched =
true;
1077 for (
int i = 0; i < n; i++){
1078 if (async_reflect->
lwidth[i] != _xmp_lwidth[i] ||
1079 async_reflect->
uwidth[i] != _xmp_uwidth[i] ||
1080 async_reflect->
is_periodic[i] != _xmp_is_periodic[i]){
1081 reusable_sched =
false;
1087 if (!reusable_sched){
1092 for (
int i = 0; i < n; i++){
1093 async_reflect->
lwidth[i] = _xmp_lwidth[i];
1094 async_reflect->
uwidth[i] = _xmp_uwidth[i];
1095 async_reflect->
is_periodic[i] = _xmp_is_periodic[i];
1097 if (_xmp_lwidth[i] > 0) lb[i] = -1;
1098 if (_xmp_uwidth[i] > 0) ub[i] = 1;
1101 for (
int i = 0; i < async_reflect->
nreqs; i++){
1102 if (async_reflect->
datatype[i] != MPI_DATATYPE_NULL)
1103 MPI_Type_free(&async_reflect->
datatype[i]);
1104 if (async_reflect->
reqs[i] != MPI_REQUEST_NULL)
1105 MPI_Request_free(&async_reflect->
reqs[i]);
1107 async_reflect->
nreqs = 0;
1110 for (ishadow[0] = lb[0]; ishadow[0] <= ub[0]; ishadow[0]++){
1111 for (ishadow[1] = lb[1]; ishadow[1] <= ub[1]; ishadow[1]++){
1112 for (ishadow[2] = lb[2]; ishadow[2] <= ub[2]; ishadow[2]++){
1113 for (ishadow[3] = lb[3]; ishadow[3] <= ub[3]; ishadow[3]++){
1114 for (ishadow[4] = lb[4]; ishadow[4] <= ub[4]; ishadow[4]++){
1115 for (ishadow[5] = lb[5]; ishadow[5] <= ub[5]; ishadow[5]++){
1116 for (ishadow[6] = lb[6]; ishadow[6] <= ub[6]; ishadow[6]++){
1121 for (
int i = 0; i < n; i++){
1122 if (ishadow[i] != 0) nnzero++;
1124 if (nnzero == 0)
continue;
1126 _XMP_reflect_sched_dir(a, ishadow, _xmp_lwidth, _xmp_uwidth, _xmp_is_periodic);
1133 MPI_Request *reqs = &async->
reqs[async->
nreqs];
1137 _XMP_fatal(
"too many arrays in an asynchronous reflect");
1139 memcpy(reqs, async_reflect->
reqs, async_reflect->
nreqs *
sizeof(MPI_Request));
1144 MPI_Startall(async_reflect->
nreqs, reqs);
◆ _XMP_reflect_pack_dim()
void _XMP_reflect_pack_dim |
( |
_XMP_array_t * |
a, |
|
|
int |
i, |
|
|
int * |
lwidth, |
|
|
int * |
uwidth, |
|
|
int * |
is_periodic, |
|
|
int |
shadow_comm_type |
|
) |
| |
1772 char *pack_dst_lo, *pack_src_lo;
int dst_lo;
1773 char *pack_dst_hi, *pack_src_hi;
int dst_hi;
1776 if (a->
order == MPI_ORDER_FORTRAN){
1777 if (i == a->
dim - 1)
return;
1779 else if (a->
order == MPI_ORDER_C){
1783 _XMP_fatal(
"cannot determin the base language.");
1811 if (lwidth[i] && dst_lo != MPI_PROC_NULL){
1818 if (uwidth[i] && dst_hi != MPI_PROC_NULL){
◆ _XMP_reflect_pcopy_sched_dim()
void _XMP_reflect_pcopy_sched_dim |
( |
_XMP_array_t * |
adesc, |
|
|
int |
target_dim, |
|
|
int |
lwidth, |
|
|
int |
uwidth, |
|
|
int |
is_periodic, |
|
|
int |
shadow_comm_type |
|
) |
| |
471 if (lwidth == 0 && uwidth == 0)
return;
479 _XMP_fatal(
"reflect width is larger than shadow width.");
487 if (ni->
size == 1 && !is_periodic)
return;
489 int ndims = adesc->
dim;
492 int my_pos = ni->
rank;
496 int lo_pos = (my_pos == lb_pos) ? ub_pos : my_pos - 1;
497 int hi_pos = (my_pos == ub_pos) ? lb_pos : my_pos + 1;
502 int lo_rank = my_rank + (lo_pos - my_pos) * ni->
multiplier;
503 int hi_rank = my_rank + (hi_pos - my_pos) * ni->
multiplier;
505 int count = 0, blocklength = 0;
506 long long stride = 0;
511 void *lo_send_array = NULL, *lo_recv_array = NULL;
512 void *hi_send_array = NULL, *hi_recv_array = NULL;
514 void *lo_send_buf = NULL;
515 void *lo_recv_buf = NULL;
516 void *hi_send_buf = NULL;
517 void *hi_recv_buf = NULL;
526 if ((adesc->
order == MPI_ORDER_FORTRAN && target_dim != ndims - 1) ||
527 (adesc->
order == MPI_ORDER_C && target_dim != 0)){
531 count = reflect->
count;
542 if (adesc->
order == MPI_ORDER_FORTRAN){
545 blocklength = type_size;
548 for (
int i = ndims - 2; i >= target_dim; i--){
552 for (
int i = 1; i <= target_dim; i++){
558 else if (adesc->
order == MPI_ORDER_C){
561 blocklength = type_size;
562 stride = ainfo[ndims-1].
alloc_size * type_size;
564 for (
int i = 1; i <= target_dim; i++){
568 for (
int i = ndims - 2; i >= target_dim; i--){
575 _XMP_fatal(
"cannot determin the base language.");
588 lo_send_array = array_addr;
589 lo_recv_array = array_addr;
591 for (
int i = 0; i < ndims; i++) {
593 int lb_send, lb_recv;
594 unsigned long long dim_acc;
596 if (i == target_dim) {
608 lo_send_array = (
void *)((
char *)lo_send_array + lb_send * dim_acc * type_size);
609 lo_recv_array = (
void *)((
char *)lo_recv_array + lb_recv * dim_acc * type_size);
619 hi_send_array = array_addr;
620 hi_recv_array = array_addr;
622 for (
int i = 0; i < ndims; i++) {
624 int lb_send, lb_recv;
625 unsigned long long dim_acc;
627 if (i == target_dim) {
639 hi_send_array = (
void *)((
char *)hi_send_array + lb_send * dim_acc * type_size);
640 hi_recv_array = (
void *)((
char *)hi_recv_array + lb_recv * dim_acc * type_size);
651 ((adesc->
order == MPI_ORDER_FORTRAN && target_dim == ndims - 1) ||
652 (adesc->
order == MPI_ORDER_C && target_dim == 0))){
666 lo_buf_size = lwidth * blocklength * count;
669 ((adesc->
order == MPI_ORDER_FORTRAN && target_dim == ndims - 1) ||
670 (adesc->
order == MPI_ORDER_C && target_dim == 0))){
671 lo_send_buf = lo_send_array;
672 lo_recv_buf = lo_recv_array;
678 _XMP_TEND2(xmptiming_.t_mem, xmptiming_.tdim_mem[target_dim], t0);
687 hi_buf_size = uwidth * blocklength * count;
690 ((adesc->
order == MPI_ORDER_FORTRAN && target_dim == ndims - 1) ||
691 (adesc->
order == MPI_ORDER_C && target_dim == 0))){
692 hi_send_buf = hi_send_array;
693 hi_recv_buf = hi_recv_array;
699 _XMP_TEND2(xmptiming_.t_mem, xmptiming_.tdim_mem[target_dim], t0);
708 reflect->
count = count;
730 if (!is_periodic && my_pos == lb_pos){
731 lo_rank = MPI_PROC_NULL;
734 if (!is_periodic && my_pos == ub_pos){
735 hi_rank = MPI_PROC_NULL;
753 if (reflect->
req_reduce[0] != MPI_REQUEST_NULL){
757 if (reflect->
req_reduce[1] != MPI_REQUEST_NULL){
761 MPI_Send_init(reflect->
lo_recv_buf, lo_buf_size, MPI_BYTE, src,
763 MPI_Recv_init(reflect->
lo_send_buf, lo_buf_size, MPI_BYTE, dst,
767 if (reflect->
req[0] != MPI_REQUEST_NULL){
768 MPI_Request_free(&reflect->
req[0]);
771 if (reflect->
req[1] != MPI_REQUEST_NULL){
772 MPI_Request_free(&reflect->
req[1]);
775 MPI_Recv_init(reflect->
lo_recv_buf, lo_buf_size, MPI_BYTE, src,
777 MPI_Send_init(reflect->
lo_send_buf, lo_buf_size, MPI_BYTE, dst,
793 if (reflect->
req_reduce[2] != MPI_REQUEST_NULL){
797 if (reflect->
req_reduce[3] != MPI_REQUEST_NULL){
801 MPI_Send_init(reflect->
hi_recv_buf, hi_buf_size, MPI_BYTE, src,
803 MPI_Recv_init(reflect->
hi_send_buf, hi_buf_size, MPI_BYTE, dst,
807 if (reflect->
req[2] != MPI_REQUEST_NULL){
808 MPI_Request_free(&reflect->
req[2]);
811 if (reflect->
req[3] != MPI_REQUEST_NULL){
812 MPI_Request_free(&reflect->
req[3]);
815 MPI_Recv_init(reflect->
hi_recv_buf, hi_buf_size, MPI_BYTE, src,
817 MPI_Send_init(reflect->
hi_send_buf, hi_buf_size, MPI_BYTE, dst,
◆ _XMP_set_reflect__()
void _XMP_set_reflect__ |
( |
_XMP_array_t * |
a, |
|
|
int |
dim, |
|
|
int |
lwidth, |
|
|
int |
uwidth, |
|
|
int |
is_periodic |
|
) |
| |
65 _xmp_set_reflect_flag = 1;
66 _xmp_lwidth[dim] = lwidth;
67 _xmp_uwidth[dim] = uwidth;
68 _xmp_is_periodic[dim] = is_periodic;
◆ xmp_dbg_printf()
void xmp_dbg_printf |
( |
char * |
fmt, |
|
|
|
... |
|
) |
| |
43 vsprintf(buf,fmt,args);
◆ _xmp_reflect_pack_flag
int _xmp_reflect_pack_flag |
long long align_subscript
Definition: xmp_data_struct.h:246
int is_periodic
Definition: xmp_data_struct.h:126
int uwidth[_XMP_N_MAX_DIM]
Definition: xmp_data_struct.h:183
int size
Definition: xmp_data_struct.h:32
Definition: xmp_data_struct.h:31
_XMP_template_info_t info[1]
Definition: xmp_data_struct.h:115
#define _XMP_MAX_ASYNC_REQS
Definition: xmp_data_struct.h:472
int align_template_index
Definition: xmp_data_struct.h:260
_Bool is_periodic[_XMP_N_MAX_DIM]
Definition: xmp_data_struct.h:184
long long ser_lower
Definition: xmp_data_struct.h:72
void * _XMP_alloc(size_t size)
Definition: xmp_util.c:21
Definition: xmp_data_struct.h:194
_XMP_template_chunk_t * chunk
Definition: xmp_data_struct.h:112
MPI_Request * reqs
Definition: xmp_data_struct.h:187
MPI_Datatype * datatype
Definition: xmp_data_struct.h:186
void _XMP_reflect_async_cardinal(_XMP_array_t *a, int async_id)
Definition: xmp_reflect.c:991
int ser_upper
Definition: xmp_data_struct.h:200
int _XMP_get_owner_pos(_XMP_array_t *a, int dim, int index)
Definition: xmp_reflect.c:1688
void * lo_recv_array
Definition: xmp_data_struct.h:137
void * lo_recv_buf
Definition: xmp_data_struct.h:134
int shadow_type
Definition: xmp_data_struct.h:248
Definition: xmp_data_struct.h:119
#define _XMP_TEND(t, t0)
Definition: xmp_internal.h:748
_XMP_nodes_info_t * onto_nodes_info
Definition: xmp_data_struct.h:94
MPI_Request req[4]
Definition: xmp_data_struct.h:131
int hi_rank
Definition: xmp_data_struct.h:143
int comm_rank
Definition: xmp_data_struct.h:52
int reflect_is_initialized
Definition: xmp_data_struct.h:121
int nreqs
Definition: xmp_data_struct.h:189
unsigned long long par_chunk_width
Definition: xmp_data_struct.h:86
#define _XMP_COMM_REDUCE_SHADOW
Definition: xmp_constant.h:137
int _XMP_world_rank
Definition: xmp_world.c:9
void * hi_send_buf
Definition: xmp_data_struct.h:135
_XMP_nodes_t * onto_nodes
Definition: xmp_data_struct.h:111
#define _XMP_COMM_REFLECT
Definition: xmp_constant.h:136
int shadow_size_lo
Definition: xmp_data_struct.h:249
#define _XMP_TSTART(t0)
Definition: xmp_internal.h:747
_XMP_template_t * align_template
Definition: xmp_data_struct.h:312
int align_manner
Definition: xmp_data_struct.h:197
void _XMP_reflect_async_ordinal(_XMP_array_t *a, int async_id)
Definition: xmp_reflect.c:1055
_XMP_async_comm_t * _XMP_get_current_async()
Definition: xmp_async.c:205
#define _XMP_N_SHADOW_FULL
Definition: xmp_constant.h:66
int blocklength
Definition: xmp_data_struct.h:140
_XMP_async_reflect_t * async_reflect
Definition: xmp_data_struct.h:296
int ser_lower
Definition: xmp_data_struct.h:199
long long stride
Definition: xmp_data_struct.h:141
int hi_width
Definition: xmp_data_struct.h:125
void _XMP_pack_vector(char *restrict dst, char *restrict src, int count, int blocklength, long stride)
Definition: xmp_pack_vector.c:9
int nreqs
Definition: xmp_data_struct.h:460
int order
Definition: xmp_data_struct.h:276
void _XMP_reflect_pack_dim(_XMP_array_t *a, int i, int *lwidth, int *uwidth, int *is_periodic, int shadow_comm_type)
Definition: xmp_reflect.c:1768
int local_lower
Definition: xmp_data_struct.h:209
#define _XMP_N_MPI_TAG_REFLECT_HI
Definition: xmp_constant.h:12
int shadow_size_hi
Definition: xmp_data_struct.h:250
int lo_rank
Definition: xmp_data_struct.h:143
MPI_Request * reqs
Definition: xmp_data_struct.h:464
Definition: xmp_data_struct.h:458
int rank
Definition: xmp_data_struct.h:35
size_t type_size
Definition: xmp_data_struct.h:274
int alloc_size
Definition: xmp_data_struct.h:212
#define _XMP_N_ALIGN_BLOCK
Definition: xmp_constant.h:37
_XMP_array_info_t info[1]
Definition: xmp_data_struct.h:313
void * hi_recv_array
Definition: xmp_data_struct.h:138
MPI_Request req_reduce[4]
Definition: xmp_data_struct.h:132
void _XMP_free(void *p)
Definition: xmp_util.c:37
#define _XMP_ASSERT(_flag)
Definition: xmp_internal.h:34
void * lo_send_array
Definition: xmp_data_struct.h:137
int multiplier
Definition: xmp_data_struct.h:37
void _XMP_reflect_pcopy_sched_dim(_XMP_array_t *adesc, int target_dim, int lwidth, int uwidth, int is_periodic, int shadow_comm_type)
Definition: xmp_reflect.c:468
void * array_addr_p
Definition: xmp_data_struct.h:279
_XMP_comm_t * comm
Definition: xmp_data_struct.h:53
int dim
Definition: xmp_data_struct.h:272
void _XMP_fatal(char *msg)
Definition: xmp_util.c:42
int prev_pcopy_sched_type
Definition: xmp_data_struct.h:123
unsigned long long dim_acc
Definition: xmp_data_struct.h:242
_XMP_reflect_sched_t * reflect_sched
Definition: xmp_data_struct.h:252
#define _XMP_N_MPI_TAG_REFLECT_LO
Definition: xmp_constant.h:11
int count
Definition: xmp_data_struct.h:140
int lwidth[_XMP_N_MAX_DIM]
Definition: xmp_data_struct.h:183
void * lo_send_buf
Definition: xmp_data_struct.h:134
#define _XMP_N_MAX_DIM
Definition: xmp_constant.h:6
void * hi_recv_buf
Definition: xmp_data_struct.h:135
#define _XMP_N_SHADOW_NORMAL
Definition: xmp_constant.h:65
#define _XMP_TEND2(t, tt, t0)
Definition: xmp_internal.h:749
#define _XMP_N_SHADOW_NONE
Definition: xmp_constant.h:64
void * hi_send_array
Definition: xmp_data_struct.h:138
void _XMP_reflect_shadow_FULL(void *array_addr, void *array_desc, int array_index)
#define _XMP_N_ALIGN_GBLOCK
Definition: xmp_constant.h:40
long long * mapping_array
Definition: xmp_data_struct.h:88
int _xmp_reflect_pack_flag
Definition: xmp_pack_vector.c:298
_Bool is_allocated
Definition: xmp_data_struct.h:270
int lo_width
Definition: xmp_data_struct.h:125
Definition: xmp_data_struct.h:181
_Bool is_shadow_comm_member
Definition: xmp_data_struct.h:195
int local_upper
Definition: xmp_data_struct.h:210