libxmp/libxmpf in Omni Compiler  1.3.4
xacc_util_cuda.c File Reference
#include "xmp_internal.h"
#include "xacc_internal.h"
Include dependency graph for xacc_util_cuda.c:

Functions

void _XACC_util_init (void)
 
void _XACC_queue_create (_XACC_queue_t *queue)
 
void _XACC_queue_destroy (_XACC_queue_t *queue)
 
void _XACC_queue_wait (_XACC_queue_t queue)
 
void _XACC_memory_alloc (_XACC_memory_t *memory, size_t size)
 
void _XACC_memory_free (_XACC_memory_t *memory)
 
void _XACC_memory_read (void *addr, _XACC_memory_t memory, size_t memory_offset, size_t size, _XACC_queue_t queue, bool is_blocking)
 
void _XACC_memory_write (_XACC_memory_t memory, size_t memory_offset, void *addr, size_t size, _XACC_queue_t queue, bool is_blocking)
 
void _XACC_memory_copy (_XACC_memory_t dst_memory, size_t dst_memory_offset, _XACC_memory_t src_memory, size_t src_memory_offset, size_t size, _XACC_queue_t queue, bool is_blocking)
 
void _XACC_host_malloc (void **ptr, size_t size)
 
void _XACC_host_free (void **ptr)
 
void * _XACC_memory_get_address (_XACC_memory_t memory)
 
void _XACC_memory_pack_vector (_XACC_memory_t dst_mem, size_t dst_offset, _XACC_memory_t src_mem, size_t src_offset, size_t blocklength, size_t stride, size_t count, size_t typesize, _XACC_queue_t queue, bool is_blocking)
 
void _XACC_memory_unpack_vector (_XACC_memory_t dst_mem, size_t dst_offset, _XACC_memory_t src_mem, size_t src_offset, size_t blocklength, size_t stride, size_t count, size_t typesize, _XACC_queue_t queue, bool is_blocking)
 
void _XACC_memory_pack_vector2 (_XACC_memory_t dst0_mem, size_t dst0_offset, _XACC_memory_t src0_mem, size_t src0_offset, size_t blocklength0, size_t stride0, size_t count0, _XACC_memory_t dst1_mem, size_t dst1_offset, _XACC_memory_t src1_mem, size_t src1_offset, size_t blocklength1, size_t stride1, size_t count1, size_t typesize, _XACC_queue_t queue, bool is_blocking)
 
void _XACC_memory_unpack_vector2 (_XACC_memory_t dst0_mem, size_t dst0_offset, _XACC_memory_t src0_mem, size_t src0_offset, size_t blocklength0, size_t stride0, size_t count0, _XACC_memory_t dst1_mem, size_t dst1_offset, _XACC_memory_t src1_mem, size_t src1_offset, size_t blocklength1, size_t stride1, size_t count1, size_t typesize, _XACC_queue_t queue, bool is_blocking)
 

Function Documentation

◆ _XACC_host_free()

void _XACC_host_free ( void **  ptr)
74 {
75  if(*ptr != NULL) CUDA_SAFE_CALL(cudaHostUnregister(*ptr));
76  _XMP_free(*ptr);
77 
78  //CUDA_SAFE_CALL(cudaFreeHost(*ptr));
79 
80  *ptr = NULL;
81 }
Here is the call graph for this function:

◆ _XACC_host_malloc()

void _XACC_host_malloc ( void **  ptr,
size_t  size 
)
66 {
67  *ptr = _XMP_alloc(size);
68  CUDA_SAFE_CALL(cudaHostRegister(*ptr, size, cudaHostRegisterDefault));
69 
70  //CUDA_SAFE_CALL(cudaHostAlloc(ptr, size, cudaHostAllocDefault));
71 }
Here is the call graph for this function:

◆ _XACC_memory_alloc()

void _XACC_memory_alloc ( _XACC_memory_t *  memory,
size_t  size 
)
27 {
28  CUDA_SAFE_CALL(cudaMalloc(memory, size));
29 }

◆ _XACC_memory_copy()

void _XACC_memory_copy ( _XACC_memory_t  dst_memory,
size_t  dst_memory_offset,
_XACC_memory_t  src_memory,
size_t  src_memory_offset,
size_t  size,
_XACC_queue_t  queue,
bool  is_blocking 
)
59 {
60  void *dst_addr = (char*)dst_memory + dst_memory_offset;
61  void *src_addr = (char*)src_memory + src_memory_offset;
62  memory_copy(dst_addr, src_addr, size, queue, is_blocking);
63 }

◆ _XACC_memory_free()

void _XACC_memory_free ( _XACC_memory_t *  memory)
32 {
33  CUDA_SAFE_CALL(cudaFree(*memory));
34  *memory = NULL;
35 }
Here is the caller graph for this function:

◆ _XACC_memory_get_address()

void* _XACC_memory_get_address ( _XACC_memory_t  memory)
84 {
85  return (void*)memory;
86 }

◆ _XACC_memory_pack_vector()

void _XACC_memory_pack_vector ( _XACC_memory_t  dst_mem,
size_t  dst_offset,
_XACC_memory_t  src_mem,
size_t  src_offset,
size_t  blocklength,
size_t  stride,
size_t  count,
size_t  typesize,
_XACC_queue_t  queue,
bool  is_blocking 
)
95 {
96  void _XMP_gpu_pack_vector_async(char * restrict dst, char * restrict src, int count, int blocklength, long stride, size_t typesize, void* async_id);
97 
98  _XMP_gpu_pack_vector_async((char * restrict)dst_mem + dst_offset,
99  (char * restrict)src_mem + src_offset,
100  count, blocklength, stride,
101  typesize,
102  &queue);
103 }
Here is the call graph for this function:

◆ _XACC_memory_pack_vector2()

void _XACC_memory_pack_vector2 ( _XACC_memory_t  dst0_mem,
size_t  dst0_offset,
_XACC_memory_t  src0_mem,
size_t  src0_offset,
size_t  blocklength0,
size_t  stride0,
size_t  count0,
_XACC_memory_t  dst1_mem,
size_t  dst1_offset,
_XACC_memory_t  src1_mem,
size_t  src1_offset,
size_t  blocklength1,
size_t  stride1,
size_t  count1,
size_t  typesize,
_XACC_queue_t  queue,
bool  is_blocking 
)
127 {
128  void _XMP_gpu_pack_vector2_async(char * restrict dst0, char * restrict src0, int blocklength0, long stride0,
129  char * restrict dst1, char * restrict src1, int blocklength1, long stride1,
130  int count, size_t typesize, cudaStream_t st);
131  if(count0 != count1){
132  _XACC_fatal("two counts of vectors must be same");
133  }
134 
135  char * restrict dst0 = (dst0_mem != NULL)? (char * restrict)dst0_mem + dst0_offset : NULL;
136  char * restrict src0 = (src0_mem != NULL)? (char * restrict)src0_mem + src0_offset : NULL;
137  char * restrict dst1 = (dst1_mem != NULL)? (char * restrict)dst1_mem + dst1_offset : NULL;
138  char * restrict src1 = (src1_mem != NULL)? (char * restrict)src1_mem + src1_offset : NULL;
139 
140  _XMP_gpu_pack_vector2_async(dst0,
141  src0,
142  blocklength0, stride0,
143  dst1,
144  src1,
145  blocklength1, stride1,
146  count0,
147  typesize,
148  queue);
149 }

◆ _XACC_memory_read()

void _XACC_memory_read ( void *  addr,
_XACC_memory_t  memory,
size_t  memory_offset,
size_t  size,
_XACC_queue_t  queue,
bool  is_blocking 
)
47 {
48  void *src_addr = (char*)memory + memory_offset;
49  memory_copy(addr, src_addr, size, queue, is_blocking);
50 }

◆ _XACC_memory_unpack_vector()

void _XACC_memory_unpack_vector ( _XACC_memory_t  dst_mem,
size_t  dst_offset,
_XACC_memory_t  src_mem,
size_t  src_offset,
size_t  blocklength,
size_t  stride,
size_t  count,
size_t  typesize,
_XACC_queue_t  queue,
bool  is_blocking 
)
110 {
111  void _XMP_gpu_unpack_vector_async(char * restrict dst, char * restrict src, int count, int blocklength, long stride, size_t typesize, void* async_id);
112  _XMP_gpu_unpack_vector_async((char * restrict)dst_mem + dst_offset,
113  (char * restrict)src_mem + src_offset,
114  count, blocklength, stride,
115  typesize,
116  &queue);
117 }
Here is the call graph for this function:

◆ _XACC_memory_unpack_vector2()

void _XACC_memory_unpack_vector2 ( _XACC_memory_t  dst0_mem,
size_t  dst0_offset,
_XACC_memory_t  src0_mem,
size_t  src0_offset,
size_t  blocklength0,
size_t  stride0,
size_t  count0,
_XACC_memory_t  dst1_mem,
size_t  dst1_offset,
_XACC_memory_t  src1_mem,
size_t  src1_offset,
size_t  blocklength1,
size_t  stride1,
size_t  count1,
size_t  typesize,
_XACC_queue_t  queue,
bool  is_blocking 
)
159 {
160  void _XMP_gpu_unpack_vector2_async(char * restrict dst0, char * restrict src0, int blocklength0, long stride0,
161  char * restrict dst1, char * restrict src1, int blocklength1, long stride1,
162  int count, size_t typesize, cudaStream_t st);
163  if(count0 != count1){
164  _XACC_fatal("two counts of vectors must be same");
165  }
166 
167  char * restrict dst0 = (dst0_mem != NULL)? (char * restrict)dst0_mem + dst0_offset : NULL;
168  char * restrict src0 = (src0_mem != NULL)? (char * restrict)src0_mem + src0_offset : NULL;
169  char * restrict dst1 = (dst1_mem != NULL)? (char * restrict)dst1_mem + dst1_offset : NULL;
170  char * restrict src1 = (src1_mem != NULL)? (char * restrict)src1_mem + src1_offset : NULL;
171 
172  _XMP_gpu_unpack_vector2_async(dst0,
173  src0,
174  blocklength0, stride0,
175  dst1,
176  src1,
177  blocklength1, stride1,
178  count0,
179  typesize,
180  queue);
181 }

◆ _XACC_memory_write()

void _XACC_memory_write ( _XACC_memory_t  memory,
size_t  memory_offset,
void *  addr,
size_t  size,
_XACC_queue_t  queue,
bool  is_blocking 
)
53 {
54  void *dst_addr = (char*)memory + memory_offset;
55  memory_copy(dst_addr, addr, size, queue, is_blocking);
56 }

◆ _XACC_queue_create()

void _XACC_queue_create ( _XACC_queue_t *  queue)
7 {
8  CUDA_SAFE_CALL(cudaStreamCreate(queue));
9 }

◆ _XACC_queue_destroy()

void _XACC_queue_destroy ( _XACC_queue_t *  queue)
12 {
13  if(*queue != _XACC_QUEUE_NULL){
14  CUDA_SAFE_CALL(cudaStreamDestroy(*queue));
15  *queue = _XACC_QUEUE_NULL;
16  }
17 }

◆ _XACC_queue_wait()

void _XACC_queue_wait ( _XACC_queue_t  queue)
20 {
21  CUDA_SAFE_CALL(cudaStreamSynchronize(queue));
22 }

◆ _XACC_util_init()

void _XACC_util_init ( void  )
4 {}
Here is the caller graph for this function:
_XACC_fatal
#define _XACC_fatal
Definition: xacc_internal.h:99
_XMP_alloc
void * _XMP_alloc(size_t size)
Definition: xmp_util.c:21
_XMP_free
void _XMP_free(void *p)
Definition: xmp_util.c:37
_XMP_gpu_pack_vector_async
void _XMP_gpu_pack_vector_async(char *restrict dst, char *restrict src, int count, int blocklength, long stride, size_t typesize, void *async_id)
_XMP_gpu_unpack_vector_async
void _XMP_gpu_unpack_vector_async(char *restrict dst, char *restrict src, int count, int blocklength, long stride, size_t typesize, void *async_id)