libxmp/libxmpf in Omni Compiler
1.3.4
|
#include "xmp.h"
#include "xmp_internal.h"
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
|
void | _XMP_align_local_idx (long long int global_idx, int *local_idx, _XMP_array_t *array, int array_axis, int *rank) |
|
int | check_template (_XMP_template_t *dst_t, _XMP_template_t *src_t) |
|
void | xmp_transpose (void *dst_p, void *src_p, int opt) |
|
void | xmpf_transpose (void *dst_p, void *src_p, int opt) |
|
void | xmp_matmul (void *x_p, void *a_p, void *b_p) |
|
void | xmpf_matmul (void *x_p, void *a_p, void *b_p) |
|
void | xmp_gather (void *x_d, void *a_d,...) |
|
void | xmpf_gather (void *x_p, void *a_p, _XMP_array_t **idx_array) |
|
void | xmp_scatter (void *x_d, void *a_d,...) |
|
void | xmpf_scatter (void *x_p, void *a_p, _XMP_array_t **idx_array) |
|
void | xmp_pack (void *v_p, void *a_p, void *m_p) |
|
void | xmp_pack_mask (void *v_p, void *a_p, void *m_p) |
|
void | xmp_pack_nomask (void *v_p, void *a_p) |
|
void | xmpf_pack (void *v_p, void *a_p, void *m_p) |
|
void | xmpf_pack_mask (void *v_p, void *a_p, void *m_p) |
|
void | xmpf_pack_nomask (void *v_p, void *a_p) |
|
void | xmp_unpack (void *a_p, void *v_p, void *m_p) |
|
void | xmp_unpack_mask (void *a_p, void *v_p, void *m_p) |
|
void | xmp_unpack_nomask (void *a_p, void *v_p) |
|
void | xmpf_unpack (void *a_p, void *v_p, void *m_p) |
|
void | xmpf_unpack_mask (void *a_p, void *v_p, void *m_p) |
|
void | xmpf_unpack_nomask (void *a_p, void *v_p) |
|
void | _XMP_atomic_define_0 (void *dst_desc, size_t dst_offset, int value, void *src_desc, size_t src_offset, size_t elmt_size) |
|
void | _XMP_atomic_define_1 (void *dst_desc, size_t dst_offset, int image0, int value, void *src_desc, size_t src_offset, size_t elmt_size) |
|
void | _XMP_atomic_define_2 (void *dst_desc, size_t dst_offset, int image0, int image1, int value, void *src_desc, size_t src_offset, size_t elmt_size) |
|
void | _XMP_atomic_define_3 (void *dst_desc, size_t dst_offset, int image0, int image1, int image2, int value, void *src_desc, size_t src_offset, size_t elmt_size) |
|
void | _XMP_atomic_define_4 (void *dst_desc, size_t dst_offset, int image0, int image1, int image2, int image3, int value, void *src_desc, size_t src_offset, size_t elmt_size) |
|
void | _XMP_atomic_define_5 (void *dst_desc, size_t dst_offset, int image0, int image1, int image2, int image3, int image4, int value, void *src_desc, size_t src_offset, size_t elmt_size) |
|
void | _XMP_atomic_define_6 (void *dst_desc, size_t dst_offset, int image0, int image1, int image2, int image3, int image4, int image5, int value, void *src_desc, size_t src_offset, size_t elmt_size) |
|
void | _XMP_atomic_define_7 (void *dst_desc, size_t dst_offset, int image0, int image1, int image2, int image3, int image4, int image5, int image6, int value, void *src_desc, size_t src_offset, size_t elmt_size) |
|
void | _XMP_atomic_ref_0 (void *dst_desc, size_t dst_offset, int *value, void *src_desc, size_t src_offset, size_t elmt_size) |
|
void | _XMP_atomic_ref_1 (void *dst_desc, size_t dst_offset, int image, int *value, void *src_desc, size_t src_offset, size_t elmt_size) |
|
void | _XMP_atomic_ref_2 (void *dst_desc, size_t dst_offset, int image0, int image1, int *value, void *src_desc, size_t src_offset, size_t elmt_size) |
|
void | _XMP_atomic_ref_3 (void *dst_desc, size_t dst_offset, int image0, int image1, int image2, int *value, void *src_desc, size_t src_offset, size_t elmt_size) |
|
void | _XMP_atomic_ref_4 (void *dst_desc, size_t dst_offset, int image0, int image1, int image2, int image3, int *value, void *src_desc, size_t src_offset, size_t elmt_size) |
|
void | _XMP_atomic_ref_5 (void *dst_desc, size_t dst_offset, int image0, int image1, int image2, int image3, int image4, int *value, void *src_desc, size_t src_offset, size_t elmt_size) |
|
void | _XMP_atomic_ref_6 (void *dst_desc, size_t dst_offset, int image0, int image1, int image2, int image3, int image4, int image5, int *value, void *src_desc, size_t src_offset, size_t elmt_size) |
|
void | _XMP_atomic_ref_7 (void *dst_desc, size_t dst_offset, int image0, int image1, int image2, int image3, int image4, int image5, int image6, int *value, void *src_desc, size_t src_offset, size_t elmt_size) |
|
◆ MPI_PORTABLE_PLATFORM_H
#define MPI_PORTABLE_PLATFORM_H |
◆ _XMP_align_local_idx()
void _XMP_align_local_idx |
( |
long long int |
global_idx, |
|
|
int * |
local_idx, |
|
|
_XMP_array_t * |
array, |
|
|
int |
array_axis, |
|
|
int * |
rank |
|
) |
| |
1366 long long tbase =
template->info[template_index].ser_lower;
1368 int irank, idiv, imod;
1374 *local_idx = global_idx + offset - base;
1389 idiv = offset/n_info->
size;
1390 imod = offset%n_info->
size;
1391 *rank = (global_idx + offset - base) % n_info->
size;
1392 *local_idx = (global_idx + offset - base) / n_info->
size;
1394 *local_idx = *local_idx - (idiv + 1);
1396 *local_idx = *local_idx - idiv;
1403 idiv = (offset/w)/n_info->
size;
1404 int imod1 = (offset/w)%n_info->
size;
1405 int imod2 = offset%w;
1406 int off = global_idx + offset - base;
1407 *local_idx = (off / (n_info->
size*w)) * w + off%w;
1409 *rank=(off/w)% (n_info->
size);
1411 if (imod1 == *rank ){
1412 *local_idx = *local_idx - idiv*w-imod2;
1413 }
else if (imod1 > *rank){
1414 *local_idx = *local_idx - (idiv+1)*w;
1416 }
else if (imod1 == 0){
1417 if (imod1 == *rank ){
1418 *local_idx = *local_idx - idiv*w -imod2;
1420 *local_idx = *local_idx - idiv*w;
1429 for(
int i=1;i<(n_info->
size+1);i++){
1430 if(global_idx + offset < chunk->mapping_array[i]+ (base - tbase)){
1436 for(
int i=1;i<n_info->size+1;i++){
1437 if(offset < chunk->mapping_array[i]+(base-tbase)){
1439 idiv = offset - (chunk->
mapping_array[i-1] + (base - tbase) - base);
1443 if (*rank == irank){
1444 *local_idx = *local_idx - idiv;
1451 *local_idx=global_idx - base;
1455 _XMP_fatal(
"_XMP_: unknown chunk dist_manner");
◆ _XMP_atomic_define_0()
void _XMP_atomic_define_0 |
( |
void * |
dst_desc, |
|
|
size_t |
dst_offset, |
|
|
int |
value, |
|
|
void * |
src_desc, |
|
|
size_t |
src_offset, |
|
|
size_t |
elmt_size |
|
) |
| |
6334 #if defined(_XMP_GASNET) || defined(_XMP_FJRDMA) || defined(_XMP_MPI3_ONESIDED) || defined(_XMP_UTOFU)
6344 #elif _XMP_MPI3_ONESIDED
◆ _XMP_atomic_define_1()
void _XMP_atomic_define_1 |
( |
void * |
dst_desc, |
|
|
size_t |
dst_offset, |
|
|
int |
image0, |
|
|
int |
value, |
|
|
void * |
src_desc, |
|
|
size_t |
src_offset, |
|
|
size_t |
elmt_size |
|
) |
| |
6352 #if defined(_XMP_GASNET) || defined(_XMP_FJRDMA) || defined(_XMP_MPI3_ONESIDED) || defined(_XMP_UTOFU)
6362 #elif _XMP_MPI3_ONESIDED
◆ _XMP_atomic_define_2()
void _XMP_atomic_define_2 |
( |
void * |
dst_desc, |
|
|
size_t |
dst_offset, |
|
|
int |
image0, |
|
|
int |
image1, |
|
|
int |
value, |
|
|
void * |
src_desc, |
|
|
size_t |
src_offset, |
|
|
size_t |
elmt_size |
|
) |
| |
6370 #if defined(_XMP_GASNET) || defined(_XMP_FJRDMA) || defined(_XMP_MPI3_ONESIDED) || defined(_XMP_UTOFU)
6381 #elif _XMP_MPI3_ONESIDED
◆ _XMP_atomic_define_3()
void _XMP_atomic_define_3 |
( |
void * |
dst_desc, |
|
|
size_t |
dst_offset, |
|
|
int |
image0, |
|
|
int |
image1, |
|
|
int |
image2, |
|
|
int |
value, |
|
|
void * |
src_desc, |
|
|
size_t |
src_offset, |
|
|
size_t |
elmt_size |
|
) |
| |
6389 #if defined(_XMP_GASNET) || defined(_XMP_FJRDMA) || defined(_XMP_MPI3_ONESIDED) || defined(_XMP_UTOFU)
6402 #elif _XMP_MPI3_ONESIDED
◆ _XMP_atomic_define_4()
void _XMP_atomic_define_4 |
( |
void * |
dst_desc, |
|
|
size_t |
dst_offset, |
|
|
int |
image0, |
|
|
int |
image1, |
|
|
int |
image2, |
|
|
int |
image3, |
|
|
int |
value, |
|
|
void * |
src_desc, |
|
|
size_t |
src_offset, |
|
|
size_t |
elmt_size |
|
) |
| |
6410 #if defined(_XMP_GASNET) || defined(_XMP_FJRDMA) || defined(_XMP_MPI3_ONESIDED) || defined(_XMP_UTOFU)
6424 #elif _XMP_MPI3_ONESIDED
◆ _XMP_atomic_define_5()
void _XMP_atomic_define_5 |
( |
void * |
dst_desc, |
|
|
size_t |
dst_offset, |
|
|
int |
image0, |
|
|
int |
image1, |
|
|
int |
image2, |
|
|
int |
image3, |
|
|
int |
image4, |
|
|
int |
value, |
|
|
void * |
src_desc, |
|
|
size_t |
src_offset, |
|
|
size_t |
elmt_size |
|
) |
| |
6432 #if defined(_XMP_GASNET) || defined(_XMP_FJRDMA) || defined(_XMP_MPI3_ONESIDED) || defined(_XMP_UTOFU)
6447 #elif _XMP_MPI3_ONESIDED
◆ _XMP_atomic_define_6()
void _XMP_atomic_define_6 |
( |
void * |
dst_desc, |
|
|
size_t |
dst_offset, |
|
|
int |
image0, |
|
|
int |
image1, |
|
|
int |
image2, |
|
|
int |
image3, |
|
|
int |
image4, |
|
|
int |
image5, |
|
|
int |
value, |
|
|
void * |
src_desc, |
|
|
size_t |
src_offset, |
|
|
size_t |
elmt_size |
|
) |
| |
6456 #if defined(_XMP_GASNET) || defined(_XMP_FJRDMA) || defined(_XMP_MPI3_ONESIDED) || defined(_XMP_UTOFU)
6472 #elif _XMP_MPI3_ONESIDED
◆ _XMP_atomic_define_7()
void _XMP_atomic_define_7 |
( |
void * |
dst_desc, |
|
|
size_t |
dst_offset, |
|
|
int |
image0, |
|
|
int |
image1, |
|
|
int |
image2, |
|
|
int |
image3, |
|
|
int |
image4, |
|
|
int |
image5, |
|
|
int |
image6, |
|
|
int |
value, |
|
|
void * |
src_desc, |
|
|
size_t |
src_offset, |
|
|
size_t |
elmt_size |
|
) |
| |
6481 #if defined(_XMP_GASNET) || defined(_XMP_FJRDMA) || defined(_XMP_MPI3_ONESIDED) || defined(_XMP_UTOFU)
6498 #elif _XMP_MPI3_ONESIDED
◆ _XMP_atomic_ref_0()
void _XMP_atomic_ref_0 |
( |
void * |
dst_desc, |
|
|
size_t |
dst_offset, |
|
|
int * |
value, |
|
|
void * |
src_desc, |
|
|
size_t |
src_offset, |
|
|
size_t |
elmt_size |
|
) |
| |
6506 #if defined(_XMP_GASNET) || defined(_XMP_FJRDMA) || defined(_XMP_MPI3_ONESIDED) || defined(_XMP_UTOFU)
6517 #elif _XMP_MPI3_ONESIDED
◆ _XMP_atomic_ref_1()
void _XMP_atomic_ref_1 |
( |
void * |
dst_desc, |
|
|
size_t |
dst_offset, |
|
|
int |
image, |
|
|
int * |
value, |
|
|
void * |
src_desc, |
|
|
size_t |
src_offset, |
|
|
size_t |
elmt_size |
|
) |
| |
6525 #if defined(_XMP_GASNET) || defined(_XMP_FJRDMA) || defined(_XMP_MPI3_ONESIDED) || defined(_XMP_UTOFU)
6535 #elif _XMP_MPI3_ONESIDED
◆ _XMP_atomic_ref_2()
void _XMP_atomic_ref_2 |
( |
void * |
dst_desc, |
|
|
size_t |
dst_offset, |
|
|
int |
image0, |
|
|
int |
image1, |
|
|
int * |
value, |
|
|
void * |
src_desc, |
|
|
size_t |
src_offset, |
|
|
size_t |
elmt_size |
|
) |
| |
6543 #if defined(_XMP_GASNET) || defined(_XMP_FJRDMA) || defined(_XMP_MPI3_ONESIDED) || defined(_XMP_UTOFU)
6555 #elif _XMP_MPI3_ONESIDED
◆ _XMP_atomic_ref_3()
void _XMP_atomic_ref_3 |
( |
void * |
dst_desc, |
|
|
size_t |
dst_offset, |
|
|
int |
image0, |
|
|
int |
image1, |
|
|
int |
image2, |
|
|
int * |
value, |
|
|
void * |
src_desc, |
|
|
size_t |
src_offset, |
|
|
size_t |
elmt_size |
|
) |
| |
6563 #if defined(_XMP_GASNET) || defined(_XMP_FJRDMA) || defined(_XMP_MPI3_ONESIDED) || defined(_XMP_UTOFU)
6576 #elif _XMP_MPI3_ONESIDED
◆ _XMP_atomic_ref_4()
void _XMP_atomic_ref_4 |
( |
void * |
dst_desc, |
|
|
size_t |
dst_offset, |
|
|
int |
image0, |
|
|
int |
image1, |
|
|
int |
image2, |
|
|
int |
image3, |
|
|
int * |
value, |
|
|
void * |
src_desc, |
|
|
size_t |
src_offset, |
|
|
size_t |
elmt_size |
|
) |
| |
6584 #if defined(_XMP_GASNET) || defined(_XMP_FJRDMA) || defined(_XMP_MPI3_ONESIDED) || defined(_XMP_UTOFU)
6598 #elif _XMP_MPI3_ONESIDED
◆ _XMP_atomic_ref_5()
void _XMP_atomic_ref_5 |
( |
void * |
dst_desc, |
|
|
size_t |
dst_offset, |
|
|
int |
image0, |
|
|
int |
image1, |
|
|
int |
image2, |
|
|
int |
image3, |
|
|
int |
image4, |
|
|
int * |
value, |
|
|
void * |
src_desc, |
|
|
size_t |
src_offset, |
|
|
size_t |
elmt_size |
|
) |
| |
6607 #if defined(_XMP_GASNET) || defined(_XMP_FJRDMA) || defined(_XMP_MPI3_ONESIDED) || defined(_XMP_UTOFU)
6622 #elif _XMP_MPI3_ONESIDED
◆ _XMP_atomic_ref_6()
void _XMP_atomic_ref_6 |
( |
void * |
dst_desc, |
|
|
size_t |
dst_offset, |
|
|
int |
image0, |
|
|
int |
image1, |
|
|
int |
image2, |
|
|
int |
image3, |
|
|
int |
image4, |
|
|
int |
image5, |
|
|
int * |
value, |
|
|
void * |
src_desc, |
|
|
size_t |
src_offset, |
|
|
size_t |
elmt_size |
|
) |
| |
6631 #if defined(_XMP_GASNET) || defined(_XMP_FJRDMA) || defined(_XMP_MPI3_ONESIDED) || defined(_XMP_UTOFU)
6647 #elif _XMP_MPI3_ONESIDED
◆ _XMP_atomic_ref_7()
void _XMP_atomic_ref_7 |
( |
void * |
dst_desc, |
|
|
size_t |
dst_offset, |
|
|
int |
image0, |
|
|
int |
image1, |
|
|
int |
image2, |
|
|
int |
image3, |
|
|
int |
image4, |
|
|
int |
image5, |
|
|
int |
image6, |
|
|
int * |
value, |
|
|
void * |
src_desc, |
|
|
size_t |
src_offset, |
|
|
size_t |
elmt_size |
|
) |
| |
6656 #if defined(_XMP_GASNET) || defined(_XMP_FJRDMA) || defined(_XMP_MPI3_ONESIDED) || defined(_XMP_UTOFU)
6673 #elif _XMP_MPI3_ONESIDED
◆ check_template()
1220 if(dst_t == src_t)
return 1;
1222 if(dst_t->
dim != src_t->
dim ||
1226 for(i=0; i<dst_t->
dim; i++){
◆ xmp_gather()
void xmp_gather |
( |
void * |
x_d, |
|
|
void * |
a_d, |
|
|
|
... |
|
) |
| |
4777 va_start( valst, a_d );
4778 for(i=0;i<a_p->
dim;i++){
4780 idx_array[i] = idx_p;
4784 xmp_gather_kernel(x_d, a_d, idx_array);
◆ xmp_matmul()
void xmp_matmul |
( |
void * |
x_p, |
|
|
void * |
a_p, |
|
|
void * |
b_p |
|
) |
| |
3802 if(x_d->
dim != 2 || a_d->
dim != 2 || b_d->
dim != 2){
3803 _XMP_fatal(
"xmp_matmul: argument dimension is not 2");
3807 _XMP_fatal(
"xmp_matmul: argument type is not match");
3813 _XMP_fatal(
"xmp_matmul: argument is not distributed");
3825 for(i=0; i<x_d->
dim; i++){
3835 for(i=0; i<x_d->
dim; i++){
3876 }
else if(same_align == 2){
3900 if(same_nodes && !duplicate && same_align == 1){
3901 xmp_matmul_allgather(x_d, a_d, b_d, dist_dim);
3902 }
else if(xmpf_running && same_nodes && !duplicate && same_align == 2){
3903 xmp_matmul_blockf(x_d, a_d, b_d);
3904 }
else if(!xmpf_running && same_nodes && !duplicate && same_align == 2){
3905 xmp_matmul_blockc(x_d, a_d, b_d);
3907 xmp_matmul_no_opt(x_d, a_d, b_d);
◆ xmp_pack()
void xmp_pack |
( |
void * |
v_p, |
|
|
void * |
a_p, |
|
|
void * |
m_p |
|
) |
| |
5737 MPI_Request *com_req1;
5751 int *,
int *,
int *,
int *, int,
void *);
5753 int *,
int *,
void *);
5759 struct Listindx *next;
5765 struct Listindx *
head;
5766 struct Listindx *
tail;
5769 struct Listinfo *listinfo1;
5770 struct Listinfo *listinfo2;
5781 xmp_pack_recv_info = xmp_pack_unpack_array_v;
5782 xmp_pack_send_info = xmp_pack_unpack_array_a;
5794 _XMP_fatal(
"xmp_pack: 1st argument dimension is not 1");
5799 if(a_d->
dim != m_d->
dim){
5800 _XMP_fatal(
"xmp_pack: 2nd and 3rd argument dimension is not match");
5806 _XMP_fatal(
"xmp_pack: 1st and 2nd argument type is not match");
5812 _XMP_fatal(
"xmp_pack: argument is not distributed");
5815 _XMP_fatal(
"xmp_pack: argument is not distributed");
5835 listinfo1 = (
struct Listinfo*)
_XMP_alloc(
sizeof(
struct Listinfo)*size);
5836 listinfo2 = (
struct Listinfo*)
_XMP_alloc(
sizeof(
struct Listinfo)*size);
5837 for(i=0;i<size;i++){
5838 listinfo1[i].num = 0;
5839 listinfo1[i].head = NULL;
5840 listinfo1[i].tail = NULL;
5841 listinfo2[i].num = 0;
5842 listinfo2[i].head = NULL;
5843 listinfo2[i].tail = NULL;
5846 for(i=0; i<a_d->
dim; i++){
5853 for(i=0; i<a_d->
dim; i++){
5857 maskflag = (
int*)
_XMP_alloc(masknum*
sizeof(
int));
5858 pickindx = (
int*)
_XMP_alloc(masknum*
sizeof(
int));
5860 com_req1 = (MPI_Request*)
_XMP_alloc(size*
sizeof(MPI_Request));
5863 for(i=0;i<masknum;i++){
5867 for(i=0;i<size;i++){
5868 com_req1[i] = MPI_REQUEST_NULL;
5875 xmp_pack_unpack_dim(m_d, m_d->
dim, offset, &wkcount, maskflag, lindx2);
5877 xmp_pack_unpack_dim(m_d, -1, offset, &wkcount, maskflag, lindx2);
5883 MPI_Allreduce(MPI_IN_PLACE, maskflag, masknum, MPI_INT, MPI_SUM, *comm);
5885 for(i=0;i<masknum;i++){
5893 for(i=0;i<masknum;i++){
5895 pickindx[i]=icounter++;
5906 xmp_pack_recv_info(a_d, v_d, a_d->
dim, offset, &wkcount, lindx2, pickindx,
5907 &vcount, packflag,(
void *)listinfo2);
5909 xmp_pack_recv_info(a_d, v_d, -1, offset, &wkcount, lindx2, pickindx,
5910 &vcount, packflag,(
void *)listinfo2);
5917 xmp_pack_send_info(a_d, v_d, a_d->
dim, offset, lindx2, pickindx,
5920 xmp_pack_send_info(a_d, v_d, -1, offset, lindx2, pickindx,
5926 for(i=0;i<size;i++){
5927 if(listinfo1[i].num > 0){
5928 p = listinfo1[i].head;
5929 for(j=0;j<listinfo1[i].num;j++){
5939 for(i=0;i<size;i++){
5940 if(listinfo1[i].num > 0){
5941 p = listinfo1[i].head;
5942 for(j=0;j<listinfo1[i].num;j++){
5948 MPI_Isend(buf+dtoffset, a_d->
type_size*listinfo1[i].num, MPI_BYTE,
5949 i, 99, *comm, &com_req1[comcount]);
5950 dtoffset += listinfo1[i].num*a_d->
type_size;
5956 for(i=0;i<size;i++){
5957 if(listinfo2[i].num > 0){
5960 MPI_Recv(buf2, v_d->
type_size*listinfo2[i].num, MPI_BYTE, i, 99, *comm, &istatus);
5962 p = listinfo2[i].head;
5963 for(j=0;j<listinfo2[i].num;j++){
5975 MPI_Waitall(comcount, com_req1, MPI_STATUSES_IGNORE);
5980 duplicate_copy(v_d);
◆ xmp_pack_mask()
void xmp_pack_mask |
( |
void * |
v_p, |
|
|
void * |
a_p, |
|
|
void * |
m_p |
|
) |
| |
◆ xmp_pack_nomask()
void xmp_pack_nomask |
( |
void * |
v_p, |
|
|
void * |
a_p |
|
) |
| |
◆ xmp_scatter()
void xmp_scatter |
( |
void * |
x_d, |
|
|
void * |
a_d, |
|
|
|
... |
|
) |
| |
5014 va_start( valst, a_d );
5015 for(i=0;i<x_p->
dim;i++){
5017 idx_array[i] = idx_p;
5021 xmp_scatter_kernel(x_d, a_d, idx_array);
◆ xmp_transpose()
void xmp_transpose |
( |
void * |
dst_p, |
|
|
void * |
src_p, |
|
|
int |
opt |
|
) |
| |
1256 int dst_alloc_size[2];
1257 int src_alloc_size[2];
1264 if(dst_d->
dim != 2 || src_d->
dim != 2){
1265 _XMP_fatal(
"xmp_transpose: argument dimension is not 2");
1269 _XMP_fatal(
"xmp_transpose: argument type is not match");
1273 _XMP_fatal(
"xmp_transpose: argument is not distributed");
1282 dst_alloc_size[0] = 0;
1283 dst_alloc_size[1] = 0;
1289 src_alloc_size[0] = 0;
1290 src_alloc_size[1] = 0;
1300 for(i=0; i<dst_d->
dim; i++){
1320 for(i=0; i<src_d->
dim; i++){
1348 unsigned long long w;
1379 printf(
"rank%d: nodes %d: template %d: align %d: regular %d\n",
1380 i, same_nodes, same_template, same_align, regular);
1393 if(same_nodes && same_template && !same_align && dist_num == 1 &&
1407 for(j=src_d->
info[1].
local_lower; j<=src_d->info[1].local_upper; j++){
1409 for(i=src_d->
info[0].
local_lower; i<=src_d->info[0].local_upper; i++){
1411 memcpy(dst_array_p+(dj*dst_alloc_size[0]+di)*dst_d->
type_size,
1412 src_array_p+(j*src_alloc_size[0]+i)*src_d->
type_size,
1419 for(i=src_d->
info[0].
local_lower; i<=src_d->info[0].local_upper; i++){
1421 for(j=src_d->
info[1].
local_lower; j<=src_d->info[1].local_upper; j++){
1423 memcpy(dst_array_p+(di*dst_alloc_size[1]+dj)*dst_d->
type_size,
1424 src_array_p+(i*src_alloc_size[1]+j)*src_d->
type_size,
1431 show_array(src_d, NULL);
1432 show_array(dst_d, NULL);
1436 else if(xmpf_running && same_nodes && same_align && regular && !duplicate &&
1442 xmp_transpose_original(dst_d, src_d, opt);
1446 else if(same_nodes && same_align && regular && !duplicate && dist_num == 1){
1449 xmp_transpose_alltoall(dst_d, src_d, opt, dist_dim);
1460 xmp_transpose_no_opt(dst_d, src_d, opt);
◆ xmp_unpack()
void xmp_unpack |
( |
void * |
a_p, |
|
|
void * |
v_p, |
|
|
void * |
m_p |
|
) |
| |
6040 MPI_Request *com_req2;
6054 int *,
int *,
int *,
int *, int,
void *);
6056 int *,
int *,
void *);
6061 struct Listindx *next;
6066 struct Listindx *
head;
6067 struct Listindx *
tail;
6069 struct Listinfo *listinfo1;
6070 struct Listinfo *listinfo2;
6080 xmp_unpack_send_info = xmp_pack_unpack_array_v;
6081 xmp_unpack_recv_info = xmp_pack_unpack_array_a;
6093 _XMP_fatal(
"xmp_unpack: 2st argument dimension is not 1");
6098 if(a_d->
dim != m_d->
dim){
6099 _XMP_fatal(
"xmp_unpack: 1nd and 3rd argument dimension is not match");
6105 _XMP_fatal(
"xmp_unpack: 1st and 2nd argument type is not match");
6111 _XMP_fatal(
"xmp_unpack: argument is not distributed");
6114 _XMP_fatal(
"xmp_unpack: argument is not distributed");
6134 listinfo1 = (
struct Listinfo*)
_XMP_alloc(
sizeof(
struct Listinfo)*size);
6135 listinfo2 = (
struct Listinfo*)
_XMP_alloc(
sizeof(
struct Listinfo)*size);
6136 for(i=0;i<size;i++){
6137 listinfo1[i].num = 0;
6138 listinfo1[i].head = NULL;
6139 listinfo1[i].tail = NULL;
6140 listinfo2[i].num = 0;
6141 listinfo2[i].head = NULL;
6142 listinfo2[i].tail = NULL;
6145 for(i=0; i<a_d->
dim; i++){
6152 for(i=0; i<a_d->
dim; i++){
6155 for(i=0; i<v_d->
dim; i++){
6158 maskflag = (
int*)
_XMP_alloc(masknum*
sizeof(
int));
6159 pickindx = (
int*)
_XMP_alloc(masknum*
sizeof(
int));
6161 com_req2 = (MPI_Request*)
_XMP_alloc(size*
sizeof(MPI_Request));
6164 for(i=0;i<masknum;i++){
6168 for(i=0;i<size;i++){
6169 com_req2[i] = MPI_REQUEST_NULL;
6176 xmp_pack_unpack_dim(m_d, m_d->
dim, offset, &wkcount, maskflag, lindx2);
6178 xmp_pack_unpack_dim(m_d, -1, offset, &wkcount, maskflag, lindx2);
6184 MPI_Allreduce(MPI_IN_PLACE, maskflag, masknum, MPI_INT, MPI_SUM, *comm);
6186 for(i=0;i<masknum;i++){
6194 for(i=0;i<masknum;i++){
6196 pickindx[i]=icounter++;
6207 xmp_unpack_send_info(a_d, v_d, a_d->
dim, offset, &wkcount, lindx2, pickindx,
6208 &vcount, packflag, listinfo2);
6210 xmp_unpack_send_info(a_d, v_d, -1, offset, &wkcount, lindx2, pickindx,
6211 &vcount, packflag, listinfo2);
6218 xmp_unpack_recv_info(a_d, v_d, a_d->
dim, offset, lindx2, pickindx,
6221 xmp_unpack_recv_info(a_d, v_d, -1, offset, lindx2, pickindx,
6227 for(i=0;i<size;i++){
6228 if(listinfo2[i].num > 0){
6229 p = listinfo2[i].head;
6230 for(j=0;j<listinfo2[i].num;j++){
6240 for(i=0;i<size;i++){
6241 if(listinfo2[i].num > 0){
6242 p = listinfo2[i].head;
6243 for(j=0;j<listinfo2[i].num;j++){
6249 MPI_Isend(buf+dtoffset, v_d->
type_size*listinfo2[i].num, MPI_BYTE,
6250 i, 99, *comm, &com_req2[comcount]);
6251 dtoffset += listinfo2[i].num*v_d->
type_size;
6257 for(i=0;i<size;i++){
6258 if(listinfo1[i].num > 0){
6261 MPI_Recv(buf2, a_d->
type_size*listinfo1[i].num, MPI_BYTE, i, 99, *comm, &istatus);
6263 p = listinfo1[i].head;
6264 for(j=0;j<listinfo1[i].num;j++){
6275 MPI_Waitall(comcount, com_req2, MPI_STATUSES_IGNORE);
6280 duplicate_copy(a_d);
◆ xmp_unpack_mask()
void xmp_unpack_mask |
( |
void * |
a_p, |
|
|
void * |
v_p, |
|
|
void * |
m_p |
|
) |
| |
◆ xmp_unpack_nomask()
void xmp_unpack_nomask |
( |
void * |
a_p, |
|
|
void * |
v_p |
|
) |
| |
◆ xmpf_gather()
void xmpf_gather |
( |
void * |
x_p, |
|
|
void * |
a_p, |
|
|
_XMP_array_t ** |
idx_array |
|
) |
| |
4792 xmp_gather_kernel(x_p, a_p, idx_array);
◆ xmpf_matmul()
void xmpf_matmul |
( |
void * |
x_p, |
|
|
void * |
a_p, |
|
|
void * |
b_p |
|
) |
| |
◆ xmpf_pack()
void xmpf_pack |
( |
void * |
v_p, |
|
|
void * |
a_p, |
|
|
void * |
m_p |
|
) |
| |
◆ xmpf_pack_mask()
void xmpf_pack_mask |
( |
void * |
v_p, |
|
|
void * |
a_p, |
|
|
void * |
m_p |
|
) |
| |
◆ xmpf_pack_nomask()
void xmpf_pack_nomask |
( |
void * |
v_p, |
|
|
void * |
a_p |
|
) |
| |
◆ xmpf_scatter()
void xmpf_scatter |
( |
void * |
x_p, |
|
|
void * |
a_p, |
|
|
_XMP_array_t ** |
idx_array |
|
) |
| |
5029 xmp_scatter_kernel(x_p, a_p, idx_array);
◆ xmpf_transpose()
void xmpf_transpose |
( |
void * |
dst_p, |
|
|
void * |
src_p, |
|
|
int |
opt |
|
) |
| |
◆ xmpf_unpack()
void xmpf_unpack |
( |
void * |
a_p, |
|
|
void * |
v_p, |
|
|
void * |
m_p |
|
) |
| |
◆ xmpf_unpack_mask()
void xmpf_unpack_mask |
( |
void * |
a_p, |
|
|
void * |
v_p, |
|
|
void * |
m_p |
|
) |
| |
◆ xmpf_unpack_nomask()
void xmpf_unpack_nomask |
( |
void * |
a_p, |
|
|
void * |
v_p |
|
) |
| |
long long align_subscript
Definition: xmp_data_struct.h:246
void _XMP_gasnet_atomic_ref(int target_rank, _XMP_coarray_t *dst_desc, size_t dst_offset, int *value, size_t elmt_size)
Definition: xmp_intrinsic_gasnet.c:95
long long par_upper
Definition: xmp_data_struct.h:81
int size
Definition: xmp_data_struct.h:32
_XMP_nodes_info_t info[1]
Definition: xmp_data_struct.h:60
Definition: xmp_data_struct.h:31
int comm_size
Definition: xmp_data_struct.h:48
void _XMP_mpi_atomic_ref(int target_rank, _XMP_coarray_t *c, size_t offset, int *value, size_t elmt_size)
Definition: xmp_intrinsic_mpi.c:21
_XMP_template_info_t info[1]
Definition: xmp_data_struct.h:115
unsigned long long ser_size
Definition: xmp_data_struct.h:74
int align_template_index
Definition: xmp_data_struct.h:260
#define _XMP_N_DIST_BLOCK
Definition: xmp_constant.h:29
void * _XMP_alloc(size_t size)
Definition: xmp_util.c:21
void _XMP_gasnet_atomic_define(int target_rank, _XMP_coarray_t *dst_desc, size_t dst_offset, int value, _XMP_coarray_t *src_desc, size_t src_offset, size_t elmt_size)
Definition: xmp_intrinsic_gasnet.c:57
_XMP_template_chunk_t * chunk
Definition: xmp_data_struct.h:112
void xmp_transpose(void *dst_p, void *src_p, int opt)
Definition: xmp_intrinsic.c:1245
void xmp_unpack(void *a_p, void *v_p, void *m_p)
Definition: xmp_intrinsic.c:6031
int ser_upper
Definition: xmp_data_struct.h:200
long long par_lower
Definition: xmp_data_struct.h:80
int * distance_of_image_elmts
Definition: xmp_data_struct.h:357
int ser_size
Definition: xmp_data_struct.h:201
_XMP_nodes_info_t * onto_nodes_info
Definition: xmp_data_struct.h:94
MemoryChunkOrder_t * head
Definition: xmpco_alloc.c:92
_Bool is_fixed
Definition: xmp_data_struct.h:104
void xmp_matmul(void *x_p, void *a_p, void *b_p)
Definition: xmp_intrinsic.c:3785
int dim
Definition: xmp_data_struct.h:108
unsigned long long par_chunk_width
Definition: xmp_data_struct.h:86
int _XMP_world_rank
Definition: xmp_world.c:9
_XMP_nodes_t * onto_nodes
Definition: xmp_data_struct.h:111
int shadow_size_lo
Definition: xmp_data_struct.h:249
MemoryChunkOrder_t * tail
Definition: xmpco_alloc.c:93
_XMP_template_t * align_template
Definition: xmp_data_struct.h:312
int align_manner
Definition: xmp_data_struct.h:197
#define _XMP_N_DIST_CYCLIC
Definition: xmp_constant.h:30
Definition: xmp_data_struct.h:98
int ser_lower
Definition: xmp_data_struct.h:199
Definition: xmp_data_struct.h:78
int onto_nodes_index
Definition: xmp_data_struct.h:92
#define _XMP_N_DIST_BLOCK_CYCLIC
Definition: xmp_constant.h:31
int local_lower
Definition: xmp_data_struct.h:209
#define _XMP_N_DIST_GBLOCK
Definition: xmp_constant.h:32
int shadow_size_hi
Definition: xmp_data_struct.h:250
Definition: xmp_data_struct.h:266
int par_stride
Definition: xmp_data_struct.h:85
int dist_manner
Definition: xmp_data_struct.h:87
size_t type_size
Definition: xmp_data_struct.h:274
int alloc_size
Definition: xmp_data_struct.h:212
#define _XMP_N_ALIGN_CYCLIC
Definition: xmp_constant.h:38
#define _XMP_N_ALIGN_BLOCK
Definition: xmp_constant.h:37
Definition: xmp_data_struct.h:328
#define _XMP_N_ALIGN_DUPLICATION
Definition: xmp_constant.h:36
_XMP_array_info_t info[1]
Definition: xmp_data_struct.h:313
void _XMP_free(void *p)
Definition: xmp_util.c:37
void _XMP_fjrdma_atomic_ref(int target_rank, _XMP_coarray_t *dst_desc, size_t dst_offset, int *value, _XMP_coarray_t *src_desc, size_t src_offset, size_t elmt_size)
Definition: xmp_intrinsic_fjrdma.c:26
#define _XMP_N_ALIGN_NOT_ALIGNED
Definition: xmp_constant.h:35
void _XMP_utofu_atomic_define(int target_rank, _XMP_coarray_t *dst_desc, size_t dst_offset, int value, size_t elmt_size)
Definition: xmp_intrinsic_utofu.c:32
void * array_addr_p
Definition: xmp_data_struct.h:279
#define _XMP_N_ALIGN_BLOCK_CYCLIC
Definition: xmp_constant.h:39
int dim
Definition: xmp_data_struct.h:272
void xmp_pack(void *v_p, void *a_p, void *m_p)
Definition: xmp_intrinsic.c:5728
void _XMP_fatal(char *msg)
Definition: xmp_util.c:42
unsigned long long par_width
Definition: xmp_data_struct.h:82
int check_template(_XMP_template_t *dst_t, _XMP_template_t *src_t)
Definition: xmp_intrinsic.c:1216
_Bool is_distributed
Definition: xmp_data_struct.h:105
int dim
Definition: xmp_data_struct.h:47
void _XMP_fjrdma_atomic_define(int target_rank, _XMP_coarray_t *dst_desc, size_t dst_offset, int value, _XMP_coarray_t *src_desc, size_t src_offset, size_t elmt_size)
Definition: xmp_intrinsic_fjrdma.c:3
void _XMP_mpi_atomic_define(int target_rank, _XMP_coarray_t *c, size_t offset, int value, size_t elmt_size)
Definition: xmp_intrinsic_mpi.c:3
int type
Definition: xmp_data_struct.h:273
#define _XMP_N_ALIGN_GBLOCK
Definition: xmp_constant.h:40
long long * mapping_array
Definition: xmp_data_struct.h:88
_Bool is_allocated
Definition: xmp_data_struct.h:270
void * _XMP_get_execution_nodes(void)
Definition: xmp_nodes_stack.c:46
void _XMP_utofu_atomic_ref(int target_rank, _XMP_coarray_t *dst_desc, size_t dst_offset, int *value, size_t elmt_size)
Definition: xmp_intrinsic_utofu.c:59
int local_upper
Definition: xmp_data_struct.h:210