libxmp/libxmpf in Omni Compiler  1.3.4
xmp_coarray_utils.c File Reference
#include <string.h>
#include "xmp_internal.h"
#include "xmp_math_function.h"
Include dependency graph for xmp_coarray_utils.c:

Functions

void _XMP_check_less_than_SIZE_MAX (const long s)
 
size_t _XMP_get_offset (const _XMP_array_section_t *array_info, const int dims)
 
size_t _XMP_calc_max_copy_chunk (const int dst_dims, const int src_dims, const _XMP_array_section_t *dst_info, const _XMP_array_section_t *src_info)
 
_Bool _XMP_check_overlapping (const char *dst_start, const char *dst_end, const char *src_start, const char *src_end)
 
void _XMP_local_contiguous_copy (char *dst, const char *src, const size_t dst_elmts, const size_t src_elmts, const size_t elmt_size)
 
int _XMP_get_dim_of_allelmts (const int dims, const _XMP_array_section_t *array_info)
 
void _XMP_stride_memcpy_1dim (char *buf1, const char *buf2, const _XMP_array_section_t *array_info, size_t element_size, const int flag)
 
void _XMP_stride_memcpy_2dim (char *buf1, const char *buf2, const _XMP_array_section_t *array_info, size_t element_size, const int flag)
 
void _XMP_stride_memcpy_3dim (char *buf1, const char *buf2, const _XMP_array_section_t *array_info, size_t element_size, const int flag)
 
void _XMP_stride_memcpy_4dim (char *buf1, const char *buf2, const _XMP_array_section_t *array_info, size_t element_size, const int flag)
 
void _XMP_stride_memcpy_5dim (char *buf1, const char *buf2, const _XMP_array_section_t *array_info, size_t element_size, const int flag)
 
void _XMP_stride_memcpy_6dim (char *buf1, const char *buf2, const _XMP_array_section_t *array_info, size_t element_size, const int flag)
 
void _XMP_stride_memcpy_7dim (char *buf1, const char *buf2, const _XMP_array_section_t *array_info, size_t element_size, const int flag)
 
void _XMP_set_coarray_addresses_with_chunk (uint64_t *addrs, const uint64_t base_addr, const _XMP_array_section_t *array_info, const int dims, const size_t chunk_size, const size_t copy_elmts)
 
void _XMP_set_coarray_addresses (const uint64_t addr, const _XMP_array_section_t *array, const int dims, const size_t elmts, uint64_t *addrs)
 
int _is_all_element (const _XMP_array_section_t *array_info, int dim)
 
int _check_round (const _XMP_array_section_t *array, const int dim)
 
int _is_constant_stride_1dim ()
 If 1dim array has a constant stride, return TRUE (Always TRUE) More...
 
int _is_constant_stride_2dim (const _XMP_array_section_t *array_info)
 
int _is_constant_stride_3dim (const _XMP_array_section_t *array_info)
 
int _is_constant_stride_4dim (const _XMP_array_section_t *array_info)
 
int _is_constant_stride_5dim (const _XMP_array_section_t *array_info)
 
int _is_constant_stride_6dim (const _XMP_array_section_t *array_info)
 
int _is_constant_stride_7dim (const _XMP_array_section_t *array_info)
 
int _is_the_same_shape_except_for_start (const _XMP_array_section_t *array1_info, const _XMP_array_section_t *array2_info, const int array1_dims, const int array2_dims)
 
int _XMP_is_the_same_constant_stride (const _XMP_array_section_t *array1_info, const _XMP_array_section_t *array2_info, const int array1_dims, const int array2_dims)
 
long _XMP_calc_stride (const _XMP_array_section_t *array_info, const int dims, const size_t chunk_size)
 

Function Documentation

◆ _check_round()

int _check_round ( const _XMP_array_section_t array,
const int  dim 
)
1388 {
1389  return array[dim].length * array[dim].stride - array[dim].elmts == 0;
1390 }
Here is the caller graph for this function:

◆ _is_all_element()

int _is_all_element ( const _XMP_array_section_t array_info,
int  dim 
)
1369  {
1370  if(array_info[dim].elmts == array_info[dim].length)
1371  return _XMP_N_INT_TRUE;
1372  else
1373  return _XMP_N_INT_FALSE;
1374 }
Here is the caller graph for this function:

◆ _is_constant_stride_1dim()

int _is_constant_stride_1dim ( )

If 1dim array has a constant stride, return TRUE (Always TRUE)

1396 {
1397  return _XMP_N_INT_TRUE;
1398 }
Here is the caller graph for this function:

◆ _is_constant_stride_2dim()

int _is_constant_stride_2dim ( const _XMP_array_section_t array_info)
1406 {
1407  if(array_info[0].stride == 1 && _check_round(array_info, 1)){
1408  return _XMP_N_INT_TRUE;
1409  }
1410  else if(array_info[1].stride == 1){
1411  return _XMP_N_INT_TRUE;
1412  }
1413 
1414  return _XMP_N_INT_FALSE;
1415 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ _is_constant_stride_3dim()

int _is_constant_stride_3dim ( const _XMP_array_section_t array_info)
1423 {
1424  if(array_info[1].stride == 1 && _is_all_element(array_info, 2)){
1425  return _XMP_N_INT_TRUE;
1426  }
1427  else if(array_info[0].stride == 1){
1428  if(_check_round(array_info, 1) && array_info[2].stride == 1){
1429  return _XMP_N_INT_TRUE;
1430  }
1431  else if(_is_all_element(array_info, 1) && _check_round(array_info, 2)){
1432  return _XMP_N_INT_TRUE;
1433  }
1434  }
1435 
1436  return _XMP_N_INT_FALSE;
1437 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ _is_constant_stride_4dim()

int _is_constant_stride_4dim ( const _XMP_array_section_t array_info)
1445 {
1446  if(array_info[1].stride == 1 && _is_all_element(array_info, 2) &&
1447  _is_all_element(array_info, 3)){
1448  return _XMP_N_INT_TRUE;
1449  }
1450  else if(array_info[0].stride == 1){
1451  if(_check_round(array_info, 1) && array_info[2].stride == 1 &&
1452  _is_all_element(array_info, 3)){
1453  return _XMP_N_INT_TRUE;
1454  }
1455  else if(_is_all_element(array_info, 1) && _check_round(array_info, 2) &&
1456  array_info[3].stride == 1){
1457  return _XMP_N_INT_TRUE;
1458  }
1459  else if(_is_all_element(array_info, 1) && _is_all_element(array_info, 2) &&
1460  _check_round(array_info, 3)){
1461  return _XMP_N_INT_TRUE;
1462  }
1463  }
1464 
1465  return _XMP_N_INT_FALSE;
1466 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ _is_constant_stride_5dim()

int _is_constant_stride_5dim ( const _XMP_array_section_t array_info)
1474 {
1475  if(array_info[1].stride == 1 && _is_all_element(array_info, 2) &&
1476  _is_all_element(array_info, 3) && _is_all_element(array_info, 4)){
1477  return _XMP_N_INT_TRUE;
1478  }
1479  else if(array_info[0].stride == 1){
1480  if(_check_round(array_info, 1) && array_info[2].stride == 1 &&
1481  _is_all_element(array_info, 3) && _is_all_element(array_info, 4)){
1482  return _XMP_N_INT_TRUE;
1483  }
1484  else if(_is_all_element(array_info, 1) && _check_round(array_info, 2) &&
1485  array_info[3].stride == 1 && _is_all_element(array_info, 4)){
1486  return _XMP_N_INT_TRUE;
1487  }
1488  else if(_is_all_element(array_info, 1) && _is_all_element(array_info, 2) &&
1489  _check_round(array_info, 3) && array_info[4].stride == 1){
1490  return _XMP_N_INT_TRUE;
1491  }
1492  else if(_is_all_element(array_info, 1) && _is_all_element(array_info, 2) &&
1493  _is_all_element(array_info, 3) && _check_round(array_info, 4)){
1494  return _XMP_N_INT_TRUE;
1495  }
1496  }
1497 
1498  return _XMP_N_INT_FALSE;
1499 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ _is_constant_stride_6dim()

int _is_constant_stride_6dim ( const _XMP_array_section_t array_info)
1507 {
1508  if(array_info[1].stride == 1 && _is_all_element(array_info, 2) &&
1509  _is_all_element(array_info, 3) && _is_all_element(array_info, 4) &&
1510  _is_all_element(array_info, 5)){
1511  return _XMP_N_INT_TRUE;
1512  }
1513  else if(array_info[0].stride == 1){
1514  if(_check_round(array_info, 1) && array_info[2].stride == 1 &&
1515  _is_all_element(array_info, 3) && _is_all_element(array_info, 4) &&
1516  _is_all_element(array_info, 5)){
1517  return _XMP_N_INT_TRUE;
1518  }
1519  else if(_is_all_element(array_info, 1) && _check_round(array_info, 2) &&
1520  array_info[3].stride == 1 && _is_all_element(array_info, 4) &&
1521  _is_all_element(array_info, 5)){
1522  return _XMP_N_INT_TRUE;
1523  }
1524  else if(_is_all_element(array_info, 1) && _is_all_element(array_info, 2) &&
1525  _check_round(array_info, 3) && array_info[4].stride == 1 &&
1526  _is_all_element(array_info, 5)){
1527  return _XMP_N_INT_TRUE;
1528  }
1529  else if(_is_all_element(array_info, 1) && _is_all_element(array_info, 2) &&
1530  _is_all_element(array_info, 3) && _check_round(array_info, 4) &&
1531  array_info[5].stride == 1){
1532  return _XMP_N_INT_TRUE;
1533  }
1534  else if(_is_all_element(array_info, 1) && _is_all_element(array_info, 2) &&
1535  _is_all_element(array_info, 3) && _is_all_element(array_info, 4) &&
1536  _check_round(array_info, 5)){
1537  return _XMP_N_INT_TRUE;
1538  }
1539  }
1540 
1541  return _XMP_N_INT_FALSE;
1542 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ _is_constant_stride_7dim()

int _is_constant_stride_7dim ( const _XMP_array_section_t array_info)
1550 {
1551  if(array_info[1].stride == 1 && _is_all_element(array_info, 2) &&
1552  _is_all_element(array_info, 3) && _is_all_element(array_info, 4) &&
1553  _is_all_element(array_info, 5) && _is_all_element(array_info, 6)){
1554  return _XMP_N_INT_TRUE;
1555  }
1556  else if(array_info[0].stride == 1){
1557  if(_check_round(array_info, 1) && array_info[2].stride == 1 &&
1558  _is_all_element(array_info, 3) && _is_all_element(array_info, 4) &&
1559  _is_all_element(array_info, 5) && _is_all_element(array_info, 6)){
1560  return _XMP_N_INT_TRUE;
1561  }
1562  else if(_is_all_element(array_info, 1) && _check_round(array_info, 2) &&
1563  array_info[3].stride == 1 && _is_all_element(array_info, 4) &&
1564  _is_all_element(array_info, 5) && _is_all_element(array_info, 6)){
1565  return _XMP_N_INT_TRUE;
1566  }
1567  else if(_is_all_element(array_info, 1) && _is_all_element(array_info, 2) &&
1568  _check_round(array_info, 3) && array_info[4].stride == 1 &&
1569  _is_all_element(array_info, 5) && _is_all_element(array_info, 6)){
1570  return _XMP_N_INT_TRUE;
1571  }
1572  else if(_is_all_element(array_info, 1) && _is_all_element(array_info, 2) &&
1573  _is_all_element(array_info, 3) && _check_round(array_info, 4) &&
1574  array_info[5].stride == 1 && _is_all_element(array_info, 6)){
1575  return _XMP_N_INT_TRUE;
1576  }
1577  else if(_is_all_element(array_info, 1) && _is_all_element(array_info, 2) &&
1578  _is_all_element(array_info, 3) && _is_all_element(array_info, 4) &&
1579  _check_round(array_info, 5) && array_info[6].stride == 1){
1580  return _XMP_N_INT_TRUE;
1581  }
1582  else if(_is_all_element(array_info, 1) && _is_all_element(array_info, 2) &&
1583  _is_all_element(array_info, 3) && _is_all_element(array_info, 4) &&
1584  _is_all_element(array_info, 5) && _check_round(array_info, 6)){
1585  return _XMP_N_INT_TRUE;
1586  }
1587  }
1588 
1589  return _XMP_N_INT_FALSE;
1590 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ _is_the_same_shape_except_for_start()

int _is_the_same_shape_except_for_start ( const _XMP_array_section_t array1_info,
const _XMP_array_section_t array2_info,
const int  array1_dims,
const int  array2_dims 
)
1603 {
1604  if(array1_dims != array2_dims) return _XMP_N_INT_FALSE;
1605 
1606  for(int i=0;i<array1_dims;i++)
1607  if(array1_info[i].length != array2_info[i].length ||
1608  array1_info[i].elmts != array2_info[i].elmts ||
1609  array1_info[i].stride != array2_info[i].stride)
1610  return _XMP_N_INT_FALSE;
1611 
1612  return _XMP_N_INT_TRUE;
1613 }
Here is the caller graph for this function:

◆ _XMP_calc_max_copy_chunk()

size_t _XMP_calc_max_copy_chunk ( const int  dst_dims,
const int  src_dims,
const _XMP_array_section_t dst_info,
const _XMP_array_section_t src_info 
)
45 {
46  int dst_copy_chunk_dim = _XMP_get_dim_of_allelmts(dst_dims, dst_info);
47  int src_copy_chunk_dim = _XMP_get_dim_of_allelmts(src_dims, src_info);
48  size_t dst_copy_chunk = _XMP_calc_copy_chunk(dst_copy_chunk_dim, dst_info);
49  size_t src_copy_chunk = _XMP_calc_copy_chunk(src_copy_chunk_dim, src_info);
50 
51  return _XMP_M_MIN(dst_copy_chunk, src_copy_chunk);
52 }
Here is the call graph for this function:

◆ _XMP_calc_stride()

long _XMP_calc_stride ( const _XMP_array_section_t array_info,
const int  dims,
const size_t  chunk_size 
)
1666 {
1667  // uint64_t stride_offset[dims], tmp[dims];
1668  // size_t stride[2];
1669  long stride_offset[dims], tmp[dims];
1670  long stride[2];
1671 
1672  // Temporally variables to reduce calculation for offset
1673  for(int i=0;i<dims;i++)
1674  stride_offset[i] = array_info[i].stride * array_info[i].distance;
1675 
1676  switch (dims){
1677  size_t chunk_len;
1678  case 1:
1679  chunk_len = chunk_size / array_info[0].distance;
1680  for(size_t i=0,num=0;i<array_info[0].length;i+=chunk_len){
1681  stride[num++] = stride_offset[0] * chunk_len * i;
1682  if(num == 2) goto end;
1683  }
1684  case 2:
1685  if(array_info[0].distance > chunk_size){ // array_info[0].distance > chunk_size >= array_info[1].distance
1686  chunk_len = chunk_size / array_info[1].distance;
1687  for(size_t i=0,num=0;i<array_info[0].length;i++){
1688  tmp[0] = stride_offset[0] * i;
1689  for(size_t j=0;j<array_info[1].length;j+=chunk_len){
1690  tmp[1] = stride_offset[1] * j;
1691  stride[num++] = tmp[0] + tmp[1];
1692  if(num == 2) goto end;
1693  }
1694  }
1695  }
1696  else{ // chunk_size >= array_info[0].distance
1697  chunk_len = chunk_size / array_info[0].distance;
1698  for(size_t i=0,num=0;i<array_info[0].length;i+=chunk_len){
1699  stride[num++] = stride_offset[0] * i;
1700  if(num == 2) goto end;
1701  }
1702  }
1703  case 3:
1704  if(array_info[1].distance > chunk_size){ // array_info[1].distance > chunk_size >= array_info[2].distance
1705  chunk_len = chunk_size / array_info[2].distance;
1706  for(size_t i=0,num=0;i<array_info[0].length;i++){
1707  tmp[0] = stride_offset[0] * i;
1708  for(size_t j=0;j<array_info[1].length;j++){
1709  tmp[1] = stride_offset[1] * j;
1710  for(size_t k=0;k<array_info[2].length;k+=chunk_len){
1711  tmp[2] = stride_offset[2] * k;
1712  stride[num++] = tmp[0] + tmp[1] + tmp[2];
1713  if(num == 2) goto end;
1714  }
1715  }
1716  }
1717  }
1718  else if(array_info[0].distance > chunk_size){ // array_info[0].distance > chunk_size >= array_info[1].distance
1719  chunk_len = chunk_size / array_info[1].distance;
1720  for(size_t i=0,num=0;i<array_info[0].length;i++){
1721  tmp[0] = stride_offset[0] * i;
1722  for(size_t j=0;j<array_info[1].length;j+=chunk_len){
1723  tmp[1] = stride_offset[1] * j;
1724  stride[num++] = tmp[0] + tmp[1];
1725  if(num == 2) goto end;
1726  }
1727  }
1728  }
1729  else{ // chunk_size >= array_info[0].distance
1730  chunk_len = chunk_size / array_info[0].distance;
1731  for(size_t i=0,num=0;i<array_info[0].length;i+=chunk_len){
1732  stride[num++] = stride_offset[0] * i;
1733  if(num == 2) goto end;
1734  }
1735  }
1736  break;
1737  case 4:
1738  if(array_info[2].distance > chunk_size){ // array_info[2].distance > chunk_size >= array_info[3].distance
1739  chunk_len = chunk_size / array_info[3].distance;
1740  for(size_t i=0,num=0;i<array_info[0].length;i++){
1741  tmp[0] = stride_offset[0] * i;
1742  for(size_t j=0;j<array_info[1].length;j++){
1743  tmp[1] = stride_offset[1] * j;
1744  for(size_t k=0;k<array_info[2].length;k++){
1745  tmp[2] = stride_offset[2] * k;
1746  for(size_t l=0;l<array_info[3].length;l+=chunk_len){
1747  tmp[3] = stride_offset[3] * l;
1748  stride[num++] = tmp[0] + tmp[1] + tmp[2] + tmp[3];
1749  if(num == 2) goto end;
1750  }
1751  }
1752  }
1753  }
1754  }
1755  else if(array_info[1].distance > chunk_size){ // array_info[1].distance > chunk_size >= array_info[2].distance
1756  chunk_len = chunk_size / array_info[2].distance;
1757  for(size_t i=0,num=0;i<array_info[0].length;i++){
1758  tmp[0] = stride_offset[0] * i;
1759  for(size_t j=0;j<array_info[1].length;j++){
1760  tmp[1] = stride_offset[1] * j;
1761  for(size_t k=0;k<array_info[2].length;k+=chunk_len){
1762  tmp[2] = stride_offset[2] * k;
1763  stride[num++] = tmp[0] + tmp[1] + tmp[2];
1764  if(num == 2) goto end;
1765  }
1766  }
1767  }
1768  }
1769  else if(array_info[0].distance > chunk_size){ // array_info[0].distance > chunk_size >= array_info[1].distance
1770  chunk_len = chunk_size / array_info[1].distance;
1771  for(size_t i=0,num=0;i<array_info[0].length;i++){
1772  tmp[0] = stride_offset[0] * i;
1773  for(size_t j=0;j<array_info[1].length;j+=chunk_len){
1774  tmp[1] = stride_offset[1] * j;
1775  stride[num++] = tmp[0] + tmp[1];
1776  if(num == 2) goto end;
1777  }
1778  }
1779  }
1780  else{ // chunk_size >= array_info[0].distance
1781  chunk_len = chunk_size / array_info[0].distance;
1782  for(size_t i=0,num=0;i<array_info[0].length;i+=chunk_len){
1783  stride[num++] = stride_offset[0] * i;
1784  if(num == 2) goto end;
1785  }
1786  }
1787  break;
1788  case 5:
1789  if(array_info[3].distance > chunk_size){ // array_info[3].distance > chunk_size >= array_info[4].distance
1790  chunk_len = chunk_size / array_info[4].distance;
1791  for(size_t i=0,num=0;i<array_info[0].length;i++){
1792  tmp[0] = stride_offset[0] * i;
1793  for(size_t j=0;j<array_info[1].length;j++){
1794  tmp[1] = stride_offset[1] * j;
1795  for(size_t k=0;k<array_info[2].length;k++){
1796  tmp[2] = stride_offset[2] * k;
1797  for(size_t l=0;l<array_info[3].length;l++){
1798  tmp[3] = stride_offset[3] * l;
1799  for(size_t m=0;m<array_info[4].length;m+=chunk_len){
1800  tmp[4] = stride_offset[4] * m;
1801  stride[num++] = tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4];
1802  if(num == 2) goto end;
1803  }
1804  }
1805  }
1806  }
1807  }
1808  }
1809  else if(array_info[2].distance > chunk_size){ // array_info[2].distance > chunk_size >= array_info[3].distance
1810  chunk_len = chunk_size / array_info[3].distance;
1811  for(size_t i=0,num=0;i<array_info[0].length;i++){
1812  tmp[0] = stride_offset[0] * i;
1813  for(size_t j=0;j<array_info[1].length;j++){
1814  tmp[1] = stride_offset[1] * j;
1815  for(size_t k=0;k<array_info[2].length;k++){
1816  tmp[2] = stride_offset[2] * k;
1817  for(size_t l=0;l<array_info[3].length;l+=chunk_len){
1818  tmp[3] = stride_offset[3] * l;
1819  stride[num++] = tmp[0] + tmp[1] + tmp[2] + tmp[3];
1820  if(num == 2) goto end;
1821  }
1822  }
1823  }
1824  }
1825  }
1826  else if(array_info[1].distance > chunk_size){ // array_info[1].distance > chunk_size >= array_info[2].distance
1827  chunk_len = chunk_size / array_info[2].distance;
1828  for(size_t i=0,num=0;i<array_info[0].length;i++){
1829  tmp[0] = stride_offset[0] * i;
1830  for(size_t j=0;j<array_info[1].length;j++){
1831  tmp[1] = stride_offset[1] * j;
1832  for(size_t k=0;k<array_info[2].length;k+=chunk_len){
1833  tmp[2] = stride_offset[2] * k;
1834  stride[num++] = tmp[0] + tmp[1] + tmp[2];
1835  if(num == 2) goto end;
1836  }
1837  }
1838  }
1839  }
1840  else if(array_info[0].distance > chunk_size){ // array_info[0].distance > chunk_size >= array_info[1].distance
1841  chunk_len = chunk_size / array_info[1].distance;
1842  for(size_t i=0,num=0;i<array_info[0].length;i++){
1843  tmp[0] = stride_offset[0] * i;
1844  for(size_t j=0;j<array_info[1].length;j+=chunk_len){
1845  tmp[1] = stride_offset[1] * j;
1846  stride[num++] = tmp[0] + tmp[1];
1847  if(num == 2) goto end;
1848  }
1849  }
1850  }
1851  else{ // chunk_size >= array_info[0].distance
1852  chunk_len = chunk_size / array_info[0].distance;
1853  for(size_t i=0,num=0;i<array_info[0].length;i+=chunk_len){
1854  stride[num++] = stride_offset[0] * i;
1855  if(num == 2) goto end;
1856  }
1857  }
1858  break;
1859  case 6:
1860  if(array_info[4].distance > chunk_size){ // array_info[4].distance > chunk_size >= array_info[5].distance
1861  chunk_len = chunk_size / array_info[5].distance;
1862  for(size_t i=0,num=0;i<array_info[0].length;i++){
1863  tmp[0] = stride_offset[0] * i;
1864  for(size_t j=0;j<array_info[1].length;j++){
1865  tmp[1] = stride_offset[1] * j;
1866  for(size_t k=0;k<array_info[2].length;k++){
1867  tmp[2] = stride_offset[2] * k;
1868  for(size_t l=0;l<array_info[3].length;l++){
1869  tmp[3] = stride_offset[3] * l;
1870  for(size_t m=0;m<array_info[4].length;m++){
1871  tmp[4] = stride_offset[4] * m;
1872  for(size_t n=0;n<array_info[5].length;n+=chunk_len){
1873  tmp[5] = stride_offset[5] * n;
1874  stride[num++] = tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4] + tmp[5];
1875  if(num == 2) goto end;
1876  }
1877  }
1878  }
1879  }
1880  }
1881  }
1882  }
1883  else if(array_info[3].distance > chunk_size){ // array_info[3].distance > chunk_size >= array_info[4].distance
1884  chunk_len = chunk_size / array_info[4].distance;
1885  for(size_t i=0,num=0;i<array_info[0].length;i++){
1886  tmp[0] = stride_offset[0] * i;
1887  for(size_t j=0;j<array_info[1].length;j++){
1888  tmp[1] = stride_offset[1] * j;
1889  for(size_t k=0;k<array_info[2].length;k++){
1890  tmp[2] = stride_offset[2] * k;
1891  for(size_t l=0;l<array_info[3].length;l++){
1892  tmp[3] = stride_offset[3] * l;
1893  for(size_t m=0;m<array_info[4].length;m+=chunk_len){
1894  tmp[4] = stride_offset[4] * m;
1895  stride[num++] = tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4];
1896  if(num == 2) goto end;
1897  }
1898  }
1899  }
1900  }
1901  }
1902  }
1903  else if(array_info[2].distance > chunk_size){ // array_info[2].distance > chunk_size >= array_info[3].distance
1904  chunk_len = chunk_size / array_info[3].distance;
1905  for(size_t i=0,num=0;i<array_info[0].length;i++){
1906  tmp[0] = stride_offset[0] * i;
1907  for(size_t j=0;j<array_info[1].length;j++){
1908  tmp[1] = stride_offset[1] * j;
1909  for(size_t k=0;k<array_info[2].length;k++){
1910  tmp[2] = stride_offset[2] * k;
1911  for(size_t l=0;l<array_info[3].length;l+=chunk_len){
1912  tmp[3] = stride_offset[3] * l;
1913  stride[num++] = tmp[0] + tmp[1] + tmp[2] + tmp[3];
1914  if(num == 2) goto end;
1915  }
1916  }
1917  }
1918  }
1919  }
1920  else if(array_info[1].distance > chunk_size){ // array_info[1].distance > chunk_size >= array_info[2].distance
1921  chunk_len = chunk_size / array_info[2].distance;
1922  for(size_t i=0,num=0;i<array_info[0].length;i++){
1923  tmp[0] = stride_offset[0] * i;
1924  for(size_t j=0;j<array_info[1].length;j++){
1925  tmp[1] = stride_offset[1] * j;
1926  for(size_t k=0;k<array_info[2].length;k+=chunk_len){
1927  tmp[2] = stride_offset[2] * k;
1928  stride[num++] = tmp[0] + tmp[1] + tmp[2];
1929  if(num == 2) goto end;
1930  }
1931  }
1932  }
1933  }
1934  else if(array_info[0].distance > chunk_size){ // array_info[0].distance > chunk_size >= array_info[1].distance
1935  chunk_len = chunk_size / array_info[1].distance;
1936  for(size_t i=0,num=0;i<array_info[0].length;i++){
1937  tmp[0] = stride_offset[0] * i;
1938  for(size_t j=0;j<array_info[1].length;j+=chunk_len){
1939  tmp[1] = stride_offset[1] * j;
1940  stride[num++] = tmp[0] + tmp[1];
1941  if(num == 2) goto end;
1942  }
1943  }
1944  }
1945  else{ // chunk_size >= array_info[0].distance
1946  chunk_len = chunk_size / array_info[0].distance;
1947  for(size_t i=0,num=0;i<array_info[0].length;i+=chunk_len){
1948  stride[num++] = stride_offset[0] * i;
1949  if(num == 2) goto end;
1950  }
1951  }
1952  break;
1953  case 7:
1954  if(array_info[5].distance > chunk_size){ // array_info[5].distance > chunk_size >= array_info[6].distance
1955  chunk_len = chunk_size / array_info[6].distance;
1956  for(size_t i=0,num=0;i<array_info[0].length;i++){
1957  tmp[0] = stride_offset[0] * i;
1958  for(size_t j=0;j<array_info[1].length;j++){
1959  tmp[1] = stride_offset[1] * j;
1960  for(size_t k=0;k<array_info[2].length;k++){
1961  tmp[2] = stride_offset[2] * k;
1962  for(size_t l=0;l<array_info[3].length;l++){
1963  tmp[3] = stride_offset[3] * l;
1964  for(size_t m=0;m<array_info[4].length;m++){
1965  tmp[4] = stride_offset[4] * m;
1966  for(size_t n=0;n<array_info[5].length;n++){
1967  tmp[5] = stride_offset[5] * n;
1968  for(size_t p=0;p<array_info[6].length;p+=chunk_len){
1969  tmp[6] = stride_offset[6] * p;
1970  stride[num++] = tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4] + tmp[5] + tmp[6];
1971  if(num == 2) goto end;
1972  }
1973  }
1974  }
1975  }
1976  }
1977  }
1978  }
1979  }
1980  else if(array_info[4].distance > chunk_size){ // array_info[4].distance > chunk_size >= array_info[5].distance
1981  chunk_len = chunk_size / array_info[5].distance;
1982  for(size_t i=0,num=0;i<array_info[0].length;i++){
1983  tmp[0] = stride_offset[0] * i;
1984  for(size_t j=0;j<array_info[1].length;j++){
1985  tmp[1] = stride_offset[1] * j;
1986  for(size_t k=0;k<array_info[2].length;k++){
1987  tmp[2] = stride_offset[2] * k;
1988  for(size_t l=0;l<array_info[3].length;l++){
1989  tmp[3] = stride_offset[3] * l;
1990  for(size_t m=0;m<array_info[4].length;m++){
1991  tmp[4] = stride_offset[4] * m;
1992  for(size_t n=0;n<array_info[5].length;n+=chunk_len){
1993  tmp[5] = stride_offset[5] * n;
1994  stride[num++] = tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4] + tmp[5];
1995  if(num == 2) goto end;
1996  }
1997  }
1998  }
1999  }
2000  }
2001  }
2002  }
2003  else if(array_info[3].distance > chunk_size){ // array_info[3].distance > chunk_size >= array_info[4].distance
2004  chunk_len = chunk_size / array_info[4].distance;
2005  for(size_t i=0,num=0;i<array_info[0].length;i++){
2006  tmp[0] = stride_offset[0] * i;
2007  for(size_t j=0;j<array_info[1].length;j++){
2008  tmp[1] = stride_offset[1] * j;
2009  for(size_t k=0;k<array_info[2].length;k++){
2010  tmp[2] = stride_offset[2] * k;
2011  for(size_t l=0;l<array_info[3].length;l++){
2012  tmp[3] = stride_offset[3] * l;
2013  for(size_t m=0;m<array_info[4].length;m+=chunk_len){
2014  tmp[4] = stride_offset[4] * m;
2015  stride[num++] = tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4];
2016  if(num == 2) goto end;
2017  }
2018  }
2019  }
2020  }
2021  }
2022  }
2023  else if(array_info[2].distance > chunk_size){ // array_info[2].distance > chunk_size >= array_info[3].distance
2024  chunk_len = chunk_size / array_info[3].distance;
2025  for(size_t i=0,num=0;i<array_info[0].length;i++){
2026  tmp[0] = stride_offset[0] * i;
2027  for(size_t j=0;j<array_info[1].length;j++){
2028  tmp[1] = stride_offset[1] * j;
2029  for(size_t k=0;k<array_info[2].length;k++){
2030  tmp[2] = stride_offset[2] * k;
2031  for(size_t l=0;l<array_info[3].length;l+=chunk_len){
2032  tmp[3] = stride_offset[3] * l;
2033  stride[num++] = tmp[0] + tmp[1] + tmp[2] + tmp[3];
2034  if(num == 2) goto end;
2035  }
2036  }
2037  }
2038  }
2039  }
2040  else if(array_info[1].distance > chunk_size){ // array_info[1].distance > chunk_size >= array_info[2].distance
2041  chunk_len = chunk_size / array_info[2].distance;
2042  for(size_t i=0,num=0;i<array_info[0].length;i++){
2043  tmp[0] = stride_offset[0] * i;
2044  for(size_t j=0;j<array_info[1].length;j++){
2045  tmp[1] = stride_offset[1] * j;
2046  for(size_t k=0;k<array_info[2].length;k+=chunk_len){
2047  tmp[2] = stride_offset[2] * k;
2048  stride[num++] = tmp[0] + tmp[1] + tmp[2];
2049  if(num == 2) goto end;
2050  }
2051  }
2052  }
2053  }
2054  else if(array_info[0].distance > chunk_size){ // array_info[0].distance > chunk_size >= array_info[1].distance
2055  chunk_len = chunk_size / array_info[1].distance;
2056  for(size_t i=0,num=0;i<array_info[0].length;i++){
2057  tmp[0] = stride_offset[0] * i;
2058  for(size_t j=0;j<array_info[1].length;j+=chunk_len){
2059  tmp[1] = stride_offset[1] * j;
2060  stride[num++] = tmp[0] + tmp[1];
2061  if(num == 2) goto end;
2062  }
2063  }
2064  }
2065  else{ // chunk_size >= array_info[0].distance
2066  chunk_len = chunk_size / array_info[0].distance;
2067  for(size_t i=0,num=0;i<array_info[0].length;i+=chunk_len){
2068  stride[num++] = stride_offset[0] * i;
2069  if(num == 2) goto end;
2070  }
2071  }
2072  break;
2073  }
2074 
2075  end:
2076  return stride[1] - stride[0];
2077 }

◆ _XMP_check_less_than_SIZE_MAX()

void _XMP_check_less_than_SIZE_MAX ( const long  s)
10 {
11  if(s > SIZE_MAX){
12  fprintf(stderr, "Coarray size is %ld. Coarray size must be < %zu\n", s, SIZE_MAX);
14  }
15 }
Here is the call graph for this function:

◆ _XMP_check_overlapping()

_Bool _XMP_check_overlapping ( const char *  dst_start,
const char *  dst_end,
const char *  src_start,
const char *  src_end 
)
65 {
66  return (dst_start <= src_start && src_start < dst_end) ||
67  (src_start <= dst_start && dst_start < src_end);
68 }
Here is the caller graph for this function:

◆ _XMP_get_dim_of_allelmts()

int _XMP_get_dim_of_allelmts ( const int  dims,
const _XMP_array_section_t array_info 
)
121 {
122  int val = dims;
123 
124  for(int i=dims-1;i>=0;i--){
125  if(array_info[i].start == 0 && array_info[i].length == array_info[i].elmts)
126  val--;
127  else
128  return val;
129  }
130 
131  return val;
132 }
Here is the caller graph for this function:

◆ _XMP_get_offset()

size_t _XMP_get_offset ( const _XMP_array_section_t array_info,
const int  dims 
)
23 {
24  size_t offset = 0;
25  for(int i=0;i<dims;i++)
26  offset += array_info[i].start * array_info[i].distance;
27 
28  return offset;
29 }
Here is the caller graph for this function:

◆ _XMP_is_the_same_constant_stride()

int _XMP_is_the_same_constant_stride ( const _XMP_array_section_t array1_info,
const _XMP_array_section_t array2_info,
const int  array1_dims,
const int  array2_dims 
)
1630 {
1631  if(! _is_the_same_shape_except_for_start(array1_info, array2_info,
1632  array1_dims, array2_dims))
1633  return _XMP_N_INT_FALSE;
1634 
1635  switch (array1_dims){
1636  case 1:
1637  return _is_constant_stride_1dim();
1638  case 2:
1639  return _is_constant_stride_2dim(array1_info);
1640  case 3:
1641  return _is_constant_stride_3dim(array1_info);
1642  case 4:
1643  return _is_constant_stride_4dim(array1_info);
1644  case 5:
1645  return _is_constant_stride_5dim(array1_info);
1646  case 6:
1647  return _is_constant_stride_6dim(array1_info);
1648  case 7:
1649  return _is_constant_stride_7dim(array1_info);
1650  default:
1651  _XMP_fatal("Coarray Error ! Dimension is too big.\n");
1652  return _XMP_N_INT_FALSE; // dummy
1653  }
1654 }
Here is the call graph for this function:

◆ _XMP_local_contiguous_copy()

void _XMP_local_contiguous_copy ( char *  dst,
const char *  src,
const size_t  dst_elmts,
const size_t  src_elmts,
const size_t  elmt_size 
)
81 {
82  if(dst_elmts == src_elmts){ /* a[0:100]:[1] = b[1:100]; or a[0:100] = b[1:100]:[1];*/
83  size_t offset = dst_elmts * elmt_size;
84  if(_XMP_check_overlapping(dst, dst+offset, src, src+offset)){
85  memmove(dst, src, offset);
86  }
87  else
88  memcpy(dst, src, offset);
89  }
90  else if(src_elmts == 1){ /* a[0:100]:[1] = b[1]; or a[0:100] = b[1]:[1]; */
91  size_t offset = 0;
92  for(size_t i=0;i<dst_elmts;i++){
93  if(dst+offset != src)
94  memcpy(dst+offset, src, elmt_size);
95 
96  offset += elmt_size;
97  }
98  }
99  else{
100  _XMP_fatal("Coarray Error ! transfer size is wrong.\n");
101  }
102 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ _XMP_set_coarray_addresses()

void _XMP_set_coarray_addresses ( const uint64_t  addr,
const _XMP_array_section_t array,
const int  dims,
const size_t  elmts,
uint64_t *  addrs 
)
1247 {
1248  uint64_t stride_offset[dims], tmp[dims];
1249 
1250  // Temporally variables to reduce calculation for offset
1251  for(int i=0;i<dims;i++)
1252  stride_offset[i] = array[i].stride * array[i].distance;
1253 
1254  switch (dims){
1255  case 1:
1256  for(size_t i=0, num=0;i<array[0].length;i++){
1257  tmp[0] = stride_offset[0] * i;
1258  addrs[num++] = addr + tmp[0];
1259  }
1260  break;
1261  case 2:
1262  for(size_t i=0, num=0;i<array[0].length;i++){
1263  tmp[0] = stride_offset[0] * i;
1264  for(size_t j=0;j<array[1].length;j++){
1265  tmp[1] = stride_offset[1] * j;
1266  addrs[num++] = addr + tmp[0] + tmp[1];
1267  }
1268  }
1269  break;
1270  case 3:
1271  for(size_t i=0, num=0;i<array[0].length;i++){
1272  tmp[0] = stride_offset[0] * i;
1273  for(size_t j=0;j<array[1].length;j++){
1274  tmp[1] = stride_offset[1] * j;
1275  for(size_t k=0;k<array[2].length;k++){
1276  tmp[2] = stride_offset[2] * k;
1277  addrs[num++] = addr + tmp[0] + tmp[1] + tmp[2];
1278  }
1279  }
1280  }
1281  break;
1282  case 4:
1283  for(size_t i=0, num=0;i<array[0].length;i++){
1284  tmp[0] = stride_offset[0] * i;
1285  for(size_t j=0;j<array[1].length;j++){
1286  tmp[1] = stride_offset[1] * j;
1287  for(size_t k=0;k<array[2].length;k++){
1288  tmp[2] = stride_offset[2] * k;
1289  for(size_t l=0;l<array[3].length;l++){
1290  tmp[3] = stride_offset[3] * l;
1291  addrs[num++] = addr + tmp[0] + tmp[1] + tmp[2] + tmp[3];
1292  }
1293  }
1294  }
1295  }
1296  break;
1297  case 5:
1298  for(size_t i=0, num=0;i<array[0].length;i++){
1299  tmp[0] = stride_offset[0] * i;
1300  for(size_t j=0;j<array[1].length;j++){
1301  tmp[1] = stride_offset[1] * j;
1302  for(size_t k=0;k<array[2].length;k++){
1303  tmp[2] = stride_offset[2] * k;
1304  for(size_t l=0;l<array[3].length;l++){
1305  tmp[3] = stride_offset[3] * l;
1306  for(size_t m=0;m<array[4].length;m++){
1307  tmp[4] = stride_offset[4] * m;
1308  addrs[num++] = addr + tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4];
1309  }
1310  }
1311  }
1312  }
1313  }
1314  break;
1315  case 6:
1316  for(size_t i=0, num=0;i<array[0].length;i++){
1317  tmp[0] = stride_offset[0] * i;
1318  for(size_t j=0;j<array[1].length;j++){
1319  tmp[1] = stride_offset[1] * j;
1320  for(size_t k=0;k<array[2].length;k++){
1321  tmp[2] = stride_offset[2] * k;
1322  for(size_t l=0;l<array[3].length;l++){
1323  tmp[3] = stride_offset[3] * l;
1324  for(size_t m=0;m<array[4].length;m++){
1325  tmp[4] = stride_offset[4] * m;
1326  for(size_t n=0;n<array[5].length;n++){
1327  tmp[5] = stride_offset[5] * n;
1328  addrs[num++] = addr + tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4] + tmp[5];
1329  }
1330  }
1331  }
1332  }
1333  }
1334  }
1335  break;
1336  case 7:
1337  for(size_t i=0, num=0;i<array[0].length;i++){
1338  tmp[0] = stride_offset[0] * i;
1339  for(size_t j=0;j<array[1].length;j++){
1340  tmp[1] = stride_offset[1] * j;
1341  for(size_t k=0;k<array[2].length;k++){
1342  tmp[2] = stride_offset[2] * k;
1343  for(size_t l=0;l<array[3].length;l++){
1344  tmp[3] = stride_offset[3] * l;
1345  for(size_t m=0;m<array[4].length;m++){
1346  tmp[4] = stride_offset[4] * m;
1347  for(size_t n=0;n<array[5].length;n++){
1348  tmp[5] = stride_offset[5] * n;
1349  for(size_t p=0;p<array[6].length;p++){
1350  tmp[6] = stride_offset[6] * p;
1351  addrs[num++] = addr + tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4] + tmp[5] + tmp[6];
1352  }
1353  }
1354  }
1355  }
1356  }
1357  }
1358  }
1359  break;
1360  }
1361 }

◆ _XMP_set_coarray_addresses_with_chunk()

void _XMP_set_coarray_addresses_with_chunk ( uint64_t *  addrs,
const uint64_t  base_addr,
const _XMP_array_section_t array_info,
const int  dims,
const size_t  chunk_size,
const size_t  copy_elmts 
)
862 {
863  uint64_t stride_offset[dims], tmp[dims];
864 
865  // Temporally variables to reduce calculation for offset
866  for(int i=0;i<dims;i++)
867  stride_offset[i] = array_info[i].stride * array_info[i].distance;
868 
869  // array_info[dims-1].distance is an element size
870  // chunk_size >= array_info[dims-1].distance
871  switch (dims){
872  int chunk_len;
873  case 1:
874  chunk_len = chunk_size / array_info[0].distance;
875  for(size_t i=0,num=0;i<array_info[0].length;i+=chunk_len){
876  addrs[num++] = stride_offset[0] * i + base_addr;
877  }
878  break;
879  case 2:
880  if(array_info[0].distance > chunk_size){ // array_info[0].distance > chunk_size >= array_info[1].distance
881  chunk_len = chunk_size / array_info[1].distance;
882  for(size_t i=0,num=0;i<array_info[0].length;i++){
883  tmp[0] = stride_offset[0] * i;
884  for(size_t j=0;j<array_info[1].length;j+=chunk_len){
885  tmp[1] = stride_offset[1] * j;
886  addrs[num++] = tmp[0] + tmp[1] + base_addr;
887  }
888  }
889  }
890  else{ // chunk_size >= array_info[0].distance
891  chunk_len = chunk_size / array_info[0].distance;
892  for(size_t i=0,num=0;i<array_info[0].length;i+=chunk_len){
893  addrs[num++] = stride_offset[0] * i + base_addr;
894  }
895  }
896  break;
897  case 3:
898  if(array_info[1].distance > chunk_size){ // array_info[1].distance > chunk_size >= array_info[2].distance
899  chunk_len = chunk_size / array_info[2].distance;
900  for(size_t i=0,num=0;i<array_info[0].length;i++){
901  tmp[0] = stride_offset[0] * i;
902  for(size_t j=0;j<array_info[1].length;j++){
903  tmp[1] = stride_offset[1] * j;
904  for(size_t k=0;k<array_info[2].length;k+=chunk_len){
905  tmp[2] = stride_offset[2] * k;
906  addrs[num++] = tmp[0] + tmp[1] + tmp[2] + base_addr;
907  }
908  }
909  }
910  }
911  else if(array_info[0].distance > chunk_size){ // array_info[0].distance > chunk_size >= array_info[1].distance
912  chunk_len = chunk_size / array_info[1].distance;
913  for(size_t i=0,num=0;i<array_info[0].length;i++){
914  tmp[0] = stride_offset[0] * i;
915  for(size_t j=0;j<array_info[1].length;j+=chunk_len){
916  tmp[1] = stride_offset[1] * j;
917  addrs[num++] = tmp[0] + tmp[1] + base_addr;
918  }
919  }
920  }
921  else{ // chunk_size >= array_info[0].distance
922  chunk_len = chunk_size / array_info[0].distance;
923  for(size_t i=0,num=0;i<array_info[0].length;i+=chunk_len){
924  addrs[num++] = stride_offset[0] * i + base_addr;
925  }
926  }
927  break;
928  case 4:
929  if(array_info[2].distance > chunk_size){ // array_info[2].distance > chunk_size >= array_info[3].distance
930  chunk_len = chunk_size / array_info[3].distance;
931  for(size_t i=0,num=0;i<array_info[0].length;i++){
932  tmp[0] = stride_offset[0] * i;
933  for(size_t j=0;j<array_info[1].length;j++){
934  tmp[1] = stride_offset[1] * j;
935  for(size_t k=0;k<array_info[2].length;k++){
936  tmp[2] = stride_offset[2] * k;
937  for(size_t l=0;l<array_info[3].length;l+=chunk_len){
938  tmp[3] = stride_offset[3] * l;
939  addrs[num++] = tmp[0] + tmp[1] + tmp[2] + tmp[3] + base_addr;
940  }
941  }
942  }
943  }
944  }
945  else if(array_info[1].distance > chunk_size){ // array_info[1].distance > chunk_size >= array_info[2].distance
946  chunk_len = chunk_size / array_info[2].distance;
947  for(size_t i=0,num=0;i<array_info[0].length;i++){
948  tmp[0] = stride_offset[0] * i;
949  for(size_t j=0;j<array_info[1].length;j++){
950  tmp[1] = stride_offset[1] * j;
951  for(size_t k=0;k<array_info[2].length;k+=chunk_len){
952  tmp[2] = stride_offset[2] * k;
953  addrs[num++] = tmp[0] + tmp[1] + tmp[2] + base_addr;
954  }
955  }
956  }
957  }
958  else if(array_info[0].distance > chunk_size){ // array_info[0].distance > chunk_size >= array_info[1].distance
959  chunk_len = chunk_size / array_info[1].distance;
960  for(size_t i=0,num=0;i<array_info[0].length;i++){
961  tmp[0] = stride_offset[0] * i;
962  for(size_t j=0;j<array_info[1].length;j+=chunk_len){
963  tmp[1] = stride_offset[1] * j;
964  addrs[num++] = tmp[0] + tmp[1] + base_addr;
965  }
966  }
967  }
968  else{ // chunk_size >= array_info[0].distance
969  chunk_len = chunk_size / array_info[0].distance;
970  for(size_t i=0,num=0;i<array_info[0].length;i+=chunk_len){
971  addrs[num++] = stride_offset[0] * i + base_addr;
972  }
973  }
974  break;
975  case 5:
976  if(array_info[3].distance > chunk_size){ // array_info[3].distance > chunk_size >= array_info[4].distance
977  chunk_len = chunk_size / array_info[4].distance;
978  for(size_t i=0,num=0;i<array_info[0].length;i++){
979  tmp[0] = stride_offset[0] * i;
980  for(size_t j=0;j<array_info[1].length;j++){
981  tmp[1] = stride_offset[1] * j;
982  for(size_t k=0;k<array_info[2].length;k++){
983  tmp[2] = stride_offset[2] * k;
984  for(size_t l=0;l<array_info[3].length;l++){
985  tmp[3] = stride_offset[3] * l;
986  for(size_t m=0;m<array_info[4].length;m+=chunk_len){
987  tmp[4] = stride_offset[4] * m;
988  addrs[num++] = tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4] + base_addr;
989  }
990  }
991  }
992  }
993  }
994  }
995  else if(array_info[2].distance > chunk_size){ // array_info[2].distance > chunk_size >= array_info[3].distance
996  chunk_len = chunk_size / array_info[3].distance;
997  for(size_t i=0,num=0;i<array_info[0].length;i++){
998  tmp[0] = stride_offset[0] * i;
999  for(size_t j=0;j<array_info[1].length;j++){
1000  tmp[1] = stride_offset[1] * j;
1001  for(size_t k=0;k<array_info[2].length;k++){
1002  tmp[2] = stride_offset[2] * k;
1003  for(size_t l=0;l<array_info[3].length;l+=chunk_len){
1004  tmp[3] = stride_offset[3] * l;
1005  addrs[num++] = tmp[0] + tmp[1] + tmp[2] + tmp[3] + base_addr;
1006  }
1007  }
1008  }
1009  }
1010  }
1011  else if(array_info[1].distance > chunk_size){ // array_info[1].distance > chunk_size >= array_info[2].distance
1012  chunk_len = chunk_size / array_info[2].distance;
1013  for(size_t i=0,num=0;i<array_info[0].length;i++){
1014  tmp[0] = stride_offset[0] * i;
1015  for(size_t j=0;j<array_info[1].length;j++){
1016  tmp[1] = stride_offset[1] * j;
1017  for(size_t k=0;k<array_info[2].length;k+=chunk_len){
1018  tmp[2] = stride_offset[2] * k;
1019  addrs[num++] = tmp[0] + tmp[1] + tmp[2] + base_addr;
1020  }
1021  }
1022  }
1023  }
1024  else if(array_info[0].distance > chunk_size){ // array_info[0].distance > chunk_size >= array_info[1].distance
1025  chunk_len = chunk_size / array_info[1].distance;
1026  for(size_t i=0,num=0;i<array_info[0].length;i++){
1027  tmp[0] = stride_offset[0] * i;
1028  for(size_t j=0;j<array_info[1].length;j+=chunk_len){
1029  tmp[1] = stride_offset[1] * j;
1030  addrs[num++] = tmp[0] + tmp[1] + base_addr;
1031  }
1032  }
1033  }
1034  else{ // chunk_size >= array_info[0].distance
1035  chunk_len = chunk_size / array_info[0].distance;
1036  for(size_t i=0,num=0;i<array_info[0].length;i+=chunk_len){
1037  addrs[num++] = stride_offset[0] * i + base_addr;
1038  }
1039  }
1040  break;
1041  case 6:
1042  if(array_info[4].distance > chunk_size){ // array_info[4].distance > chunk_size >= array_info[5].distance
1043  chunk_len = chunk_size / array_info[5].distance;
1044  for(size_t i=0,num=0;i<array_info[0].length;i++){
1045  tmp[0] = stride_offset[0] * i;
1046  for(size_t j=0;j<array_info[1].length;j++){
1047  tmp[1] = stride_offset[1] * j;
1048  for(size_t k=0;k<array_info[2].length;k++){
1049  tmp[2] = stride_offset[2] * k;
1050  for(size_t l=0;l<array_info[3].length;l++){
1051  tmp[3] = stride_offset[3] * l;
1052  for(size_t m=0;m<array_info[4].length;m++){
1053  tmp[4] = stride_offset[4] * m;
1054  for(size_t n=0;n<array_info[5].length;n+=chunk_len){
1055  tmp[5] = stride_offset[5] * n;
1056  addrs[num++] = tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4] + tmp[5] + base_addr;
1057  }
1058  }
1059  }
1060  }
1061  }
1062  }
1063  }
1064  else if(array_info[3].distance > chunk_size){ // array_info[3].distance > chunk_size >= array_info[4].distance
1065  chunk_len = chunk_size / array_info[4].distance;
1066  for(size_t i=0,num=0;i<array_info[0].length;i++){
1067  tmp[0] = stride_offset[0] * i;
1068  for(size_t j=0;j<array_info[1].length;j++){
1069  tmp[1] = stride_offset[1] * j;
1070  for(size_t k=0;k<array_info[2].length;k++){
1071  tmp[2] = stride_offset[2] * k;
1072  for(size_t l=0;l<array_info[3].length;l++){
1073  tmp[3] = stride_offset[3] * l;
1074  for(size_t m=0;m<array_info[4].length;m+=chunk_len){
1075  tmp[4] = stride_offset[4] * m;
1076  addrs[num++] = tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4] + base_addr;
1077  }
1078  }
1079  }
1080  }
1081  }
1082  }
1083  else if(array_info[2].distance > chunk_size){ // array_info[2].distance > chunk_size >= array_info[3].distance
1084  chunk_len = chunk_size / array_info[3].distance;
1085  for(size_t i=0,num=0;i<array_info[0].length;i++){
1086  tmp[0] = stride_offset[0] * i;
1087  for(size_t j=0;j<array_info[1].length;j++){
1088  tmp[1] = stride_offset[1] * j;
1089  for(size_t k=0;k<array_info[2].length;k++){
1090  tmp[2] = stride_offset[2] * k;
1091  for(size_t l=0;l<array_info[3].length;l+=chunk_len){
1092  tmp[3] = stride_offset[3] * l;
1093  addrs[num++] = tmp[0] + tmp[1] + tmp[2] + tmp[3] + base_addr;
1094  }
1095  }
1096  }
1097  }
1098  }
1099  else if(array_info[1].distance > chunk_size){ // array_info[1].distance > chunk_size >= array_info[2].distance
1100  chunk_len = chunk_size / array_info[2].distance;
1101  for(size_t i=0,num=0;i<array_info[0].length;i++){
1102  tmp[0] = stride_offset[0] * i;
1103  for(size_t j=0;j<array_info[1].length;j++){
1104  tmp[1] = stride_offset[1] * j;
1105  for(size_t k=0;k<array_info[2].length;k+=chunk_len){
1106  tmp[2] = stride_offset[2] * k;
1107  addrs[num++] = tmp[0] + tmp[1] + tmp[2] + base_addr;
1108  }
1109  }
1110  }
1111  }
1112  else if(array_info[0].distance > chunk_size){ // array_info[0].distance > chunk_size >= array_info[1].distance
1113  chunk_len = chunk_size / array_info[1].distance;
1114  for(size_t i=0,num=0;i<array_info[0].length;i++){
1115  tmp[0] = stride_offset[0] * i;
1116  for(size_t j=0;j<array_info[1].length;j+=chunk_len){
1117  tmp[1] = stride_offset[1] * j;
1118  addrs[num++] = tmp[0] + tmp[1] + base_addr;
1119  }
1120  }
1121  }
1122  else{ // chunk_size >= array_info[0].distance
1123  chunk_len = chunk_size / array_info[0].distance;
1124  for(size_t i=0,num=0;i<array_info[0].length;i+=chunk_len){
1125  addrs[num++] = stride_offset[0] * i + base_addr;
1126  }
1127  }
1128  break;
1129  case 7:
1130  if(array_info[5].distance > chunk_size){ // array_info[5].distance > chunk_size >= array_info[6].distance
1131  chunk_len = chunk_size / array_info[6].distance;
1132  for(size_t i=0,num=0;i<array_info[0].length;i++){
1133  tmp[0] = stride_offset[0] * i;
1134  for(size_t j=0;j<array_info[1].length;j++){
1135  tmp[1] = stride_offset[1] * j;
1136  for(size_t k=0;k<array_info[2].length;k++){
1137  tmp[2] = stride_offset[2] * k;
1138  for(size_t l=0;l<array_info[3].length;l++){
1139  tmp[3] = stride_offset[3] * l;
1140  for(size_t m=0;m<array_info[4].length;m++){
1141  tmp[4] = stride_offset[4] * m;
1142  for(size_t n=0;n<array_info[5].length;n++){
1143  tmp[5] = stride_offset[5] * n;
1144  for(size_t p=0;p<array_info[6].length;p+=chunk_len){
1145  tmp[6] = stride_offset[6] * p;
1146  addrs[num++] = tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4] + tmp[5] + tmp[6] + base_addr;
1147  }
1148  }
1149  }
1150  }
1151  }
1152  }
1153  }
1154  }
1155  else if(array_info[4].distance > chunk_size){ // array_info[4].distance > chunk_size >= array_info[5].distance
1156  chunk_len = chunk_size / array_info[5].distance;
1157  for(size_t i=0,num=0;i<array_info[0].length;i++){
1158  tmp[0] = stride_offset[0] * i;
1159  for(size_t j=0;j<array_info[1].length;j++){
1160  tmp[1] = stride_offset[1] * j;
1161  for(size_t k=0;k<array_info[2].length;k++){
1162  tmp[2] = stride_offset[2] * k;
1163  for(size_t l=0;l<array_info[3].length;l++){
1164  tmp[3] = stride_offset[3] * l;
1165  for(size_t m=0;m<array_info[4].length;m++){
1166  tmp[4] = stride_offset[4] * m;
1167  for(size_t n=0;n<array_info[5].length;n+=chunk_len){
1168  tmp[5] = stride_offset[5] * n;
1169  addrs[num++] = tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4] + tmp[5] + base_addr;
1170  }
1171  }
1172  }
1173  }
1174  }
1175  }
1176  }
1177  else if(array_info[3].distance > chunk_size){ // array_info[3].distance > chunk_size >= array_info[4].distance
1178  chunk_len = chunk_size / array_info[4].distance;
1179  for(size_t i=0,num=0;i<array_info[0].length;i++){
1180  tmp[0] = stride_offset[0] * i;
1181  for(size_t j=0;j<array_info[1].length;j++){
1182  tmp[1] = stride_offset[1] * j;
1183  for(size_t k=0;k<array_info[2].length;k++){
1184  tmp[2] = stride_offset[2] * k;
1185  for(size_t l=0;l<array_info[3].length;l++){
1186  tmp[3] = stride_offset[3] * l;
1187  for(size_t m=0;m<array_info[4].length;m+=chunk_len){
1188  tmp[4] = stride_offset[4] * m;
1189  addrs[num++] = tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4] + base_addr;
1190  }
1191  }
1192  }
1193  }
1194  }
1195  }
1196  else if(array_info[2].distance > chunk_size){ // array_info[2].distance > chunk_size >= array_info[3].distance
1197  chunk_len = chunk_size / array_info[3].distance;
1198  for(size_t i=0,num=0;i<array_info[0].length;i++){
1199  tmp[0] = stride_offset[0] * i;
1200  for(size_t j=0;j<array_info[1].length;j++){
1201  tmp[1] = stride_offset[1] * j;
1202  for(size_t k=0;k<array_info[2].length;k++){
1203  tmp[2] = stride_offset[2] * k;
1204  for(size_t l=0;l<array_info[3].length;l+=chunk_len){
1205  tmp[3] = stride_offset[3] * l;
1206  addrs[num++] = tmp[0] + tmp[1] + tmp[2] + tmp[3] + base_addr;
1207  }
1208  }
1209  }
1210  }
1211  }
1212  else if(array_info[1].distance > chunk_size){ // array_info[1].distance > chunk_size >= array_info[2].distance
1213  chunk_len = chunk_size / array_info[2].distance;
1214  for(size_t i=0,num=0;i<array_info[0].length;i++){
1215  tmp[0] = stride_offset[0] * i;
1216  for(size_t j=0;j<array_info[1].length;j++){
1217  tmp[1] = stride_offset[1] * j;
1218  for(size_t k=0;k<array_info[2].length;k+=chunk_len){
1219  tmp[2] = stride_offset[2] * k;
1220  addrs[num++] = tmp[0] + tmp[1] + tmp[2] + base_addr;
1221  }
1222  }
1223  }
1224  }
1225  else if(array_info[0].distance > chunk_size){ // array_info[0].distance > chunk_size >= array_info[1].distance
1226  chunk_len = chunk_size / array_info[1].distance;
1227  for(size_t i=0,num=0;i<array_info[0].length;i++){
1228  tmp[0] = stride_offset[0] * i;
1229  for(size_t j=0;j<array_info[1].length;j+=chunk_len){
1230  tmp[1] = stride_offset[1] * j;
1231  addrs[num++] = tmp[0] + tmp[1] + base_addr;
1232  }
1233  }
1234  }
1235  else{ // chunk_size >= array_info[0].distance
1236  chunk_len = chunk_size / array_info[0].distance;
1237  for(size_t i=0,num=0;i<array_info[0].length;i+=chunk_len){
1238  addrs[num++] = stride_offset[0] * i + base_addr;
1239  }
1240  }
1241  break;
1242  }
1243 }

◆ _XMP_stride_memcpy_1dim()

void _XMP_stride_memcpy_1dim ( char *  buf1,
const char *  buf2,
const _XMP_array_section_t array_info,
size_t  element_size,
const int  flag 
)
144 {
145  size_t buf1_offset = 0;
146  size_t tmp, stride_offset = array_info[0].stride * array_info[0].distance;
147 
148  switch (flag){
149  case _XMP_PACK:
150  if(array_info[0].stride == 1){
151  memcpy(buf1, buf2, element_size*array_info[0].length);
152  }
153  else{
154  for(size_t i=0;i<array_info[0].length;i++){
155  tmp = stride_offset * i;
156  memcpy(buf1 + buf1_offset, buf2 + tmp, element_size);
157  buf1_offset += element_size;
158  }
159  }
160  break;
161  case _XMP_UNPACK:
162  if(array_info[0].stride == 1){
163  memcpy(buf1, buf2, element_size*array_info[0].length);
164  }
165  else{
166  for(size_t i=0;i<array_info[0].length;i++){
167  tmp = stride_offset * i;
168  memcpy(buf1 + tmp, buf2 + buf1_offset, element_size);
169  buf1_offset += element_size;
170  }
171  }
172  break;
173  case _XMP_SCALAR_MCOPY:
174  for(size_t i=0;i<array_info[0].length;i++){
175  tmp = stride_offset * i;
176  if(buf1 + tmp != buf2)
177  memcpy(buf1 + tmp, buf2, element_size);
178  }
179  break;
180  }
181 }

◆ _XMP_stride_memcpy_2dim()

void _XMP_stride_memcpy_2dim ( char *  buf1,
const char *  buf2,
const _XMP_array_section_t array_info,
size_t  element_size,
const int  flag 
)
193 {
194  size_t buf1_offset = 0;
195  size_t tmp[2], stride_offset[2];
196 
197  for(int i=0;i<2;i++)
198  stride_offset[i] = array_info[i].stride * array_info[i].distance;
199 
200  switch (flag){
201  case _XMP_PACK:
202  if(array_info[1].stride == 1){
203  element_size *= array_info[1].length;
204  for(size_t i=0;i<array_info[0].length;i++){
205  memcpy(buf1 + buf1_offset, buf2 + stride_offset[0] * i, element_size);
206  buf1_offset += element_size;
207  }
208  }
209  else{
210  for(size_t i=0;i<array_info[0].length;i++){
211  tmp[0] = stride_offset[0] * i;
212  for(size_t j=0;j<array_info[1].length;j++){
213  tmp[1] = stride_offset[1] * j;
214  memcpy(buf1 + buf1_offset, buf2 + tmp[0] + tmp[1], element_size);
215  buf1_offset += element_size;
216  }
217  }
218  }
219  break;
220  case _XMP_UNPACK:
221  if(array_info[1].stride == 1){
222  element_size *= array_info[1].length;
223  for(size_t i=0;i<array_info[0].length;i++){
224  memcpy(buf1 + stride_offset[0] * i, buf2 + buf1_offset, element_size);
225  buf1_offset += element_size;
226  }
227  }
228  else{
229  for(size_t i=0;i<array_info[0].length;i++){
230  tmp[0] = stride_offset[0] * i;
231  for(size_t j=0;j<array_info[1].length;j++){
232  tmp[1] = stride_offset[1] * j;
233  memcpy(buf1 + tmp[0] + tmp[1], buf2 + buf1_offset, element_size);
234  buf1_offset += element_size;
235  }
236  }
237  }
238  break;
239  case _XMP_SCALAR_MCOPY:
240  for(size_t i=0;i<array_info[0].length;i++){
241  tmp[0] = stride_offset[0] * i;
242  for(size_t j=0;j<array_info[1].length;j++){
243  tmp[1] = stride_offset[1] * j;
244  if(buf1 + tmp[0] + tmp[1] != buf2)
245  memcpy(buf1 + tmp[0] + tmp[1], buf2, element_size);
246  }
247  }
248  break;
249  }
250 }

◆ _XMP_stride_memcpy_3dim()

void _XMP_stride_memcpy_3dim ( char *  buf1,
const char *  buf2,
const _XMP_array_section_t array_info,
size_t  element_size,
const int  flag 
)
262 {
263  size_t buf1_offset = 0;
264  size_t tmp[3], stride_offset[3];
265 
266  for(int i=0;i<3;i++)
267  stride_offset[i] = array_info[i].stride * array_info[i].distance;
268 
269  switch (flag){
270  case _XMP_PACK:
271  if(array_info[2].stride == 1){
272  element_size *= array_info[2].length;
273  for(size_t i=0;i<array_info[0].length;i++){
274  tmp[0] = stride_offset[0] * i;
275  for(size_t j=0;j<array_info[1].length;j++){
276  tmp[1] = stride_offset[1] * j;
277  memcpy(buf1 + buf1_offset, buf2 + tmp[0] + tmp[1], element_size);
278  buf1_offset += element_size;
279  }
280  }
281  }
282  else{
283  for(size_t i=0;i<array_info[0].length;i++){
284  tmp[0] = stride_offset[0] * i;
285  for(size_t j=0;j<array_info[1].length;j++){
286  tmp[1] = stride_offset[1] * j;
287  for(size_t k=0;k<array_info[2].length;k++){
288  tmp[2] = stride_offset[2] * k;
289  memcpy(buf1 + buf1_offset, buf2 + tmp[0] + tmp[1] + tmp[2], element_size);
290  buf1_offset += element_size;
291  }
292  }
293  }
294  }
295  break;
296  case _XMP_UNPACK:
297  if(array_info[2].stride == 1){
298  element_size *= array_info[2].length;
299  for(size_t i=0;i<array_info[0].length;i++){
300  tmp[0] = stride_offset[0] * i;
301  for(size_t j=0;j<array_info[1].length;j++){
302  tmp[1] = stride_offset[1] * j;
303  memcpy(buf1 + tmp[0] + tmp[1], buf2 + buf1_offset, element_size);
304  buf1_offset += element_size;
305  }
306  }
307  }
308  else{
309  for(size_t i=0;i<array_info[0].length;i++){
310  tmp[0] = stride_offset[0] * i;
311  for(size_t j=0;j<array_info[1].length;j++){
312  tmp[1] = stride_offset[1] * j;
313  for(size_t k=0;k<array_info[2].length;k++){
314  tmp[2] = stride_offset[2] * k;
315  memcpy(buf1 + tmp[0] + tmp[1] + tmp[2], buf2 + buf1_offset, element_size);
316  buf1_offset += element_size;
317  }
318  }
319  }
320  }
321  break;
322  case _XMP_SCALAR_MCOPY:
323  for(size_t i=0;i<array_info[0].length;i++){
324  tmp[0] = stride_offset[0] * i;
325  for(size_t j=0;j<array_info[1].length;j++){
326  tmp[1] = stride_offset[1] * j;
327  for(size_t k=0;k<array_info[2].length;k++){
328  tmp[2] = stride_offset[2] * k;
329  if(buf1 + tmp[0] + tmp[1] + tmp[2] != buf2)
330  memcpy(buf1 + tmp[0] + tmp[1] + tmp[2], buf2, element_size);
331  }
332  }
333  }
334  break;
335  }
336 }

◆ _XMP_stride_memcpy_4dim()

void _XMP_stride_memcpy_4dim ( char *  buf1,
const char *  buf2,
const _XMP_array_section_t array_info,
size_t  element_size,
const int  flag 
)
348 {
349  size_t buf1_offset = 0;
350  size_t tmp[4], stride_offset[4];
351 
352  for(int i=0;i<4;i++)
353  stride_offset[i] = array_info[i].stride * array_info[i].distance;
354 
355  switch (flag){
356  case _XMP_PACK:
357  if(array_info[3].stride == 1){
358  element_size *= array_info[3].length;
359  for(size_t i=0;i<array_info[0].length;i++){
360  tmp[0] = stride_offset[0] * i;
361  for(size_t j=0;j<array_info[1].length;j++){
362  tmp[1] = stride_offset[1] * j;
363  for(size_t k=0;k<array_info[2].length;k++){
364  tmp[2] = stride_offset[2] * k;
365  memcpy(buf1 + buf1_offset, buf2 + tmp[0] + tmp[1] + tmp[2], element_size);
366  buf1_offset += element_size;
367  }
368  }
369  }
370  }
371  else{
372  for(size_t i=0;i<array_info[0].length;i++){
373  tmp[0] = stride_offset[0] * i;
374  for(size_t j=0;j<array_info[1].length;j++){
375  tmp[1] = stride_offset[1] * j;
376  for(size_t k=0;k<array_info[2].length;k++){
377  tmp[2] = stride_offset[2] * k;
378  for(size_t m=0;m<array_info[3].length;m++){
379  tmp[3] = stride_offset[3] * m;
380  memcpy(buf1 + buf1_offset, buf2 + tmp[0] + tmp[1] + tmp[2] + tmp[3], element_size);
381  buf1_offset += element_size;
382  }
383  }
384  }
385  }
386  }
387  break;
388  case _XMP_UNPACK:
389  if(array_info[3].stride == 1){
390  element_size *= array_info[3].length;
391  for(size_t i=0;i<array_info[0].length;i++){
392  tmp[0] = stride_offset[0] * i;
393  for(size_t j=0;j<array_info[1].length;j++){
394  tmp[1] = stride_offset[1] * j;
395  for(size_t k=0;k<array_info[2].length;k++){
396  tmp[2] = stride_offset[2] * k;
397  memcpy(buf1 + tmp[0] + tmp[1] + tmp[2], buf2 + buf1_offset, element_size);
398  buf1_offset += element_size;
399  }
400  }
401  }
402  }
403  else{
404  for(size_t i=0;i<array_info[0].length;i++){
405  tmp[0] = stride_offset[0] * i;
406  for(size_t j=0;j<array_info[1].length;j++){
407  tmp[1] = stride_offset[1] * j;
408  for(size_t k=0;k<array_info[2].length;k++){
409  tmp[2] = stride_offset[2] * k;
410  for(size_t m=0;m<array_info[3].length;m++){
411  tmp[3] = stride_offset[3] * m;
412  memcpy(buf1 + tmp[0] + tmp[1] + tmp[2] + tmp[3],
413  buf2 + buf1_offset, element_size);
414  buf1_offset += element_size;
415  }
416  }
417  }
418  }
419  }
420  break;
421  case _XMP_SCALAR_MCOPY:
422  for(size_t i=0;i<array_info[0].length;i++){
423  tmp[0] = stride_offset[0] * i;
424  for(size_t j=0;j<array_info[1].length;j++){
425  tmp[1] = stride_offset[1] * j;
426  for(size_t k=0;k<array_info[2].length;k++){
427  tmp[2] = stride_offset[2] * k;
428  for(size_t m=0;m<array_info[3].length;m++){
429  tmp[3] = stride_offset[3] * m;
430  if(buf1 + tmp[0] + tmp[1] + tmp[2] + tmp[3] != buf2)
431  memcpy(buf1 + tmp[0] + tmp[1] + tmp[2] + tmp[3],
432  buf2, element_size);
433  }
434  }
435  }
436  }
437  break;
438  }
439 }

◆ _XMP_stride_memcpy_5dim()

void _XMP_stride_memcpy_5dim ( char *  buf1,
const char *  buf2,
const _XMP_array_section_t array_info,
size_t  element_size,
const int  flag 
)
451 {
452  size_t buf1_offset = 0;
453  size_t tmp[5], stride_offset[5];
454 
455  for(int i=0;i<5;i++)
456  stride_offset[i] = array_info[i].stride * array_info[i].distance;
457 
458  switch (flag){
459  case _XMP_PACK:
460  if(array_info[4].stride == 1){
461  element_size *= array_info[4].length;
462  for(size_t i=0;i<array_info[0].length;i++){
463  tmp[0] = stride_offset[0] * i;
464  for(size_t j=0;j<array_info[1].length;j++){
465  tmp[1] = stride_offset[1] * j;
466  for(size_t k=0;k<array_info[2].length;k++){
467  tmp[2] = stride_offset[2] * k;
468  for(size_t m=0;m<array_info[3].length;m++){
469  tmp[3] = stride_offset[3] * m;
470  memcpy(buf1 + buf1_offset, buf2 + tmp[0] + tmp[1] + tmp[2] + tmp[3],
471  element_size);
472  buf1_offset += element_size;
473  }
474  }
475  }
476  }
477  }
478  else{
479  for(size_t i=0;i<array_info[0].length;i++){
480  tmp[0] = stride_offset[0] * i;
481  for(size_t j=0;j<array_info[1].length;j++){
482  tmp[1] = stride_offset[1] * j;
483  for(size_t k=0;k<array_info[2].length;k++){
484  tmp[2] = stride_offset[2] * k;
485  for(size_t m=0;m<array_info[3].length;m++){
486  tmp[3] = stride_offset[3] * m;
487  for(size_t n=0;n<array_info[4].length;n++){
488  tmp[4] = stride_offset[4] * n;
489  memcpy(buf1 + buf1_offset, buf2 + tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4],
490  element_size);
491  buf1_offset += element_size;
492  }
493  }
494  }
495  }
496  }
497  }
498  break;
499  case _XMP_UNPACK:
500  if(array_info[4].stride == 1){
501  element_size *= array_info[4].length;
502  for(size_t i=0;i<array_info[0].length;i++){
503  tmp[0] = stride_offset[0] * i;
504  for(size_t j=0;j<array_info[1].length;j++){
505  tmp[1] = stride_offset[1] * j;
506  for(size_t k=0;k<array_info[2].length;k++){
507  tmp[2] = stride_offset[2] * k;
508  for(size_t m=0;m<array_info[3].length;m++){
509  tmp[3] = stride_offset[3] * m;
510  memcpy(buf1 + tmp[0] + tmp[1] + tmp[2] + tmp[3],
511  buf2 + buf1_offset, element_size);
512  buf1_offset += element_size;
513  }
514  }
515  }
516  }
517  }
518  else{
519  for(size_t i=0;i<array_info[0].length;i++){
520  tmp[0] = stride_offset[0] * i;
521  for(size_t j=0;j<array_info[1].length;j++){
522  tmp[1] = stride_offset[1] * j;
523  for(size_t k=0;k<array_info[2].length;k++){
524  tmp[2] = stride_offset[2] * k;
525  for(size_t m=0;m<array_info[3].length;m++){
526  tmp[3] = stride_offset[3] * m;
527  for(size_t n=0;n<array_info[4].length;n++){
528  tmp[4] = stride_offset[4] * n;
529  memcpy(buf1 + tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4],
530  buf2 + buf1_offset, element_size);
531  buf1_offset += element_size;
532  }
533  }
534  }
535  }
536  }
537  }
538  break;
539  case _XMP_SCALAR_MCOPY:
540  for(size_t i=0;i<array_info[0].length;i++){
541  tmp[0] = stride_offset[0] * i;
542  for(size_t j=0;j<array_info[1].length;j++){
543  tmp[1] = stride_offset[1] * j;
544  for(size_t k=0;k<array_info[2].length;k++){
545  tmp[2] = stride_offset[2] * k;
546  for(size_t m=0;m<array_info[3].length;m++){
547  tmp[3] = stride_offset[3] * m;
548  for(size_t n=0;n<array_info[4].length;n++){
549  tmp[4] = stride_offset[4] * n;
550  if(buf1 + tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4] != buf2)
551  memcpy(buf1 + tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4],
552  buf2, element_size);
553  }
554  }
555  }
556  }
557  }
558  break;
559  }
560 }

◆ _XMP_stride_memcpy_6dim()

void _XMP_stride_memcpy_6dim ( char *  buf1,
const char *  buf2,
const _XMP_array_section_t array_info,
size_t  element_size,
const int  flag 
)
572 {
573  size_t buf1_offset = 0;
574  size_t tmp[6], stride_offset[6];
575 
576  for(int i=0;i<6;i++)
577  stride_offset[i] = array_info[i].stride * array_info[i].distance;
578 
579  switch (flag){
580  case _XMP_PACK:
581  if(array_info[5].stride == 1){
582  element_size *= array_info[5].length;
583  for(size_t i=0;i<array_info[0].length;i++){
584  tmp[0] = stride_offset[0] * i;
585  for(size_t j=0;j<array_info[1].length;j++){
586  tmp[1] = stride_offset[1] * j;
587  for(size_t k=0;k<array_info[2].length;k++){
588  tmp[2] = stride_offset[2] * k;
589  for(size_t m=0;m<array_info[3].length;m++){
590  tmp[3] = stride_offset[3] * m;
591  for(size_t n=0;n<array_info[4].length;n++){
592  tmp[4] = stride_offset[4] * n;
593  memcpy(buf1 + buf1_offset, buf2 + tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4],
594  element_size);
595  buf1_offset += element_size;
596  }
597  }
598  }
599  }
600  }
601  }
602  else{
603  for(size_t i=0;i<array_info[0].length;i++){
604  tmp[0] = stride_offset[0] * i;
605  for(size_t j=0;j<array_info[1].length;j++){
606  tmp[1] = stride_offset[1] * j;
607  for(size_t k=0;k<array_info[2].length;k++){
608  tmp[2] = stride_offset[2] * k;
609  for(size_t m=0;m<array_info[3].length;m++){
610  tmp[3] = stride_offset[3] * m;
611  for(size_t n=0;n<array_info[4].length;n++){
612  tmp[4] = stride_offset[4] * n;
613  for(size_t p=0;p<array_info[5].length;p++){
614  tmp[5] = stride_offset[5] * p;
615  memcpy(buf1 + buf1_offset, buf2 + tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4] + tmp[5],
616  element_size);
617  buf1_offset += element_size;
618  }
619  }
620  }
621  }
622  }
623  }
624  }
625  break;
626  case _XMP_UNPACK:
627  if(array_info[5].stride == 1){
628  element_size *= array_info[5].length;
629  for(size_t i=0;i<array_info[0].length;i++){
630  tmp[0] = stride_offset[0] * i;
631  for(size_t j=0;j<array_info[1].length;j++){
632  tmp[1] = stride_offset[1] * j;
633  for(size_t k=0;k<array_info[2].length;k++){
634  tmp[2] = stride_offset[2] * k;
635  for(size_t m=0;m<array_info[3].length;m++){
636  tmp[3] = stride_offset[3] * m;
637  for(size_t n=0;n<array_info[4].length;n++){
638  tmp[4] = stride_offset[4] * n;
639  memcpy(buf1 + tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4],
640  buf2 + buf1_offset, element_size);
641  buf1_offset += element_size;
642  }
643  }
644  }
645  }
646  }
647  }
648  else{
649  for(size_t i=0;i<array_info[0].length;i++){
650  tmp[0] = stride_offset[0] * i;
651  for(size_t j=0;j<array_info[1].length;j++){
652  tmp[1] = stride_offset[1] * j;
653  for(size_t k=0;k<array_info[2].length;k++){
654  tmp[2] = stride_offset[2] * k;
655  for(size_t m=0;m<array_info[3].length;m++){
656  tmp[3] = stride_offset[3] * m;
657  for(size_t n=0;n<array_info[4].length;n++){
658  tmp[4] = stride_offset[4] * n;
659  for(size_t p=0;p<array_info[5].length;p++){
660  tmp[5] = stride_offset[5] * p;
661  memcpy(buf1 + tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4] + tmp[5],
662  buf2 + buf1_offset, element_size);
663  buf1_offset += element_size;
664  }
665  }
666  }
667  }
668  }
669  }
670  }
671  break;
672  case _XMP_SCALAR_MCOPY:
673  for(size_t i=0;i<array_info[0].length;i++){
674  tmp[0] = stride_offset[0] * i;
675  for(size_t j=0;j<array_info[1].length;j++){
676  tmp[1] = stride_offset[1] * j;
677  for(size_t k=0;k<array_info[2].length;k++){
678  tmp[2] = stride_offset[2] * k;
679  for(size_t m=0;m<array_info[3].length;m++){
680  tmp[3] = stride_offset[3] * m;
681  for(size_t n=0;n<array_info[4].length;n++){
682  tmp[4] = stride_offset[4] * n;
683  for(size_t p=0;p<array_info[5].length;p++){
684  tmp[5] = stride_offset[5] * p;
685  if(buf1 + tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4] + tmp[5] != buf2)
686  memcpy(buf1 + tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4] + tmp[5],
687  buf2, element_size);
688  }
689  }
690  }
691  }
692  }
693  }
694  break;
695  }
696 }

◆ _XMP_stride_memcpy_7dim()

void _XMP_stride_memcpy_7dim ( char *  buf1,
const char *  buf2,
const _XMP_array_section_t array_info,
size_t  element_size,
const int  flag 
)
708 {
709  size_t buf1_offset = 0;
710  size_t tmp[7], stride_offset[7];
711 
712  for(int i=0;i<7;i++)
713  stride_offset[i] = array_info[i].stride * array_info[i].distance;
714 
715  switch (flag){
716  case _XMP_PACK:
717  if(array_info[6].stride == 1){
718  element_size *= array_info[6].length;
719  for(size_t i=0;i<array_info[0].length;i++){
720  tmp[0] = stride_offset[0] * i;
721  for(size_t j=0;j<array_info[1].length;j++){
722  tmp[1] = stride_offset[1] * j;
723  for(size_t k=0;k<array_info[2].length;k++){
724  tmp[2] = stride_offset[2] * k;
725  for(size_t m=0;m<array_info[3].length;m++){
726  tmp[3] = stride_offset[3] * m;
727  for(size_t n=0;n<array_info[4].length;n++){
728  tmp[4] = stride_offset[4] * n;
729  for(size_t p=0;p<array_info[5].length;p++){
730  tmp[5] = stride_offset[5] * p;
731  memcpy(buf1 + buf1_offset, buf2 + tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4] + tmp[5],
732  element_size);
733  buf1_offset += element_size;
734  }
735  }
736  }
737  }
738  }
739  }
740  }
741  else{
742  for(size_t i=0;i<array_info[0].length;i++){
743  tmp[0] = stride_offset[0] * i;
744  for(size_t j=0;j<array_info[1].length;j++){
745  tmp[1] = stride_offset[1] * j;
746  for(size_t k=0;k<array_info[2].length;k++){
747  tmp[2] = stride_offset[2] * k;
748  for(size_t m=0;m<array_info[3].length;m++){
749  tmp[3] = stride_offset[3] * m;
750  for(size_t n=0;n<array_info[4].length;n++){
751  tmp[4] = stride_offset[4] * n;
752  for(size_t p=0;p<array_info[5].length;p++){
753  tmp[5] = stride_offset[5] * p;
754  for(size_t q=0;q<array_info[6].length;q++){
755  tmp[6] = stride_offset[6] * q;
756  memcpy(buf1 + buf1_offset, buf2 + tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4] + tmp[5] + tmp[6],
757  element_size);
758  buf1_offset += element_size;
759  }
760  }
761  }
762  }
763  }
764  }
765  }
766  }
767  break;
768  case _XMP_UNPACK:
769  if(array_info[6].stride == 1){
770  element_size *= array_info[6].length;
771  for(size_t i=0;i<array_info[0].length;i++){
772  tmp[0] = stride_offset[0] * i;
773  for(size_t j=0;j<array_info[1].length;j++){
774  tmp[1] = stride_offset[1] * j;
775  for(size_t k=0;k<array_info[2].length;k++){
776  tmp[2] = stride_offset[2] * k;
777  for(size_t m=0;m<array_info[3].length;m++){
778  tmp[3] = stride_offset[3] * m;
779  for(size_t n=0;n<array_info[4].length;n++){
780  tmp[4] = stride_offset[4] * n;
781  for(size_t p=0;p<array_info[5].length;p++){
782  tmp[5] = stride_offset[5] * p;
783  memcpy(buf1 + tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4] + tmp[5],
784  buf2 + buf1_offset, element_size);
785  buf1_offset += element_size;
786  }
787  }
788  }
789  }
790  }
791  }
792  }
793  else{
794  for(size_t i=0;i<array_info[0].length;i++){
795  tmp[0] = stride_offset[0] * i;
796  for(size_t j=0;j<array_info[1].length;j++){
797  tmp[1] = stride_offset[1] * j;
798  for(size_t k=0;k<array_info[2].length;k++){
799  tmp[2] = stride_offset[2] * k;
800  for(size_t m=0;m<array_info[3].length;m++){
801  tmp[3] = stride_offset[3] * m;
802  for(size_t n=0;n<array_info[4].length;n++){
803  tmp[4] = stride_offset[4] * n;
804  for(size_t p=0;p<array_info[5].length;p++){
805  tmp[5] = stride_offset[5] * p;
806  for(size_t q=0;q<array_info[6].length;q++){
807  tmp[6] = stride_offset[6] * q;
808  memcpy(buf1 + tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4] + tmp[5] + tmp[6],
809  buf2 + buf1_offset, element_size);
810  buf1_offset += element_size;
811  }
812  }
813  }
814  }
815  }
816  }
817  }
818  }
819  break;
820  case _XMP_SCALAR_MCOPY:
821  for(size_t i=0;i<array_info[0].length;i++){
822  tmp[0] = stride_offset[0] * i;
823  for(size_t j=0;j<array_info[1].length;j++){
824  tmp[1] = stride_offset[1] * j;
825  for(size_t k=0;k<array_info[2].length;k++){
826  tmp[2] = stride_offset[2] * k;
827  for(size_t m=0;m<array_info[3].length;m++){
828  tmp[3] = stride_offset[3] * m;
829  for(size_t n=0;n<array_info[4].length;n++){
830  tmp[4] = stride_offset[4] * n;
831  for(size_t p=0;p<array_info[5].length;p++){
832  tmp[5] = stride_offset[5] * p;
833  for(size_t q=0;q<array_info[6].length;q++){
834  tmp[6] = stride_offset[6] * q;
835  if(buf1 + tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4] + tmp[5] + tmp[6] != buf2)
836  memcpy(buf1 + tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4] + tmp[5] + tmp[6],
837  buf2, element_size);
838  }
839  }
840  }
841  }
842  }
843  }
844  }
845  break;
846  }
847 }
_is_constant_stride_4dim
int _is_constant_stride_4dim(const _XMP_array_section_t *array_info)
Definition: xmp_coarray_utils.c:1444
_XMP_fatal_nomsg
void _XMP_fatal_nomsg()
Definition: xmp_util.c:48
_XMP_M_MIN
#define _XMP_M_MIN(a_, b_)
Definition: xmp_math_function.h:20
_is_constant_stride_6dim
int _is_constant_stride_6dim(const _XMP_array_section_t *array_info)
Definition: xmp_coarray_utils.c:1506
_XMP_array_section::length
long length
Definition: xmp_data_struct.h:377
_XMP_get_dim_of_allelmts
int _XMP_get_dim_of_allelmts(const int dims, const _XMP_array_section_t *array_info)
Definition: xmp_coarray_utils.c:120
_XMP_calc_copy_chunk
size_t _XMP_calc_copy_chunk(const int, const _XMP_array_section_t *)
Definition: xmp_coarray_local.c:22
_shift_queue_t::num
int num
How many shifts are in this queue.
Definition: xmp_coarray_gasnet.c:10
_is_the_same_shape_except_for_start
int _is_the_same_shape_except_for_start(const _XMP_array_section_t *array1_info, const _XMP_array_section_t *array2_info, const int array1_dims, const int array2_dims)
Definition: xmp_coarray_utils.c:1600
_XMP_N_INT_FALSE
#define _XMP_N_INT_FALSE
Definition: xmp_constant.h:5
_XMP_check_overlapping
_Bool _XMP_check_overlapping(const char *dst_start, const char *dst_end, const char *src_start, const char *src_end)
Definition: xmp_coarray_utils.c:63
_is_constant_stride_2dim
int _is_constant_stride_2dim(const _XMP_array_section_t *array_info)
Definition: xmp_coarray_utils.c:1405
_is_constant_stride_5dim
int _is_constant_stride_5dim(const _XMP_array_section_t *array_info)
Definition: xmp_coarray_utils.c:1473
_is_constant_stride_3dim
int _is_constant_stride_3dim(const _XMP_array_section_t *array_info)
Definition: xmp_coarray_utils.c:1422
_XMP_fatal
void _XMP_fatal(char *msg)
Definition: xmp_util.c:42
_XMP_array_section::stride
long stride
Definition: xmp_data_struct.h:378
_is_all_element
int _is_all_element(const _XMP_array_section_t *array_info, int dim)
Definition: xmp_coarray_utils.c:1369
_XMP_array_section::distance
long distance
Definition: xmp_data_struct.h:380
_is_constant_stride_1dim
int _is_constant_stride_1dim()
If 1dim array has a constant stride, return TRUE (Always TRUE)
Definition: xmp_coarray_utils.c:1395
_XMP_array_section::elmts
long elmts
Definition: xmp_data_struct.h:379
_check_round
int _check_round(const _XMP_array_section_t *array, const int dim)
Definition: xmp_coarray_utils.c:1387
_XMP_N_INT_TRUE
#define _XMP_N_INT_TRUE
Definition: xmp_constant.h:4
_is_constant_stride_7dim
int _is_constant_stride_7dim(const _XMP_array_section_t *array_info)
Definition: xmp_coarray_utils.c:1549