libxmp/libxmpf in Omni Compiler  1.3.4
xacc_internal.h File Reference
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
Include dependency graph for xacc_internal.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define _XACC_fatal   _XMP_fatal
 

Functions

void _XACC_init (void)
 
void _XACC_util_init (void)
 
void _XACC_queue_create (_XACC_queue_t *queue)
 
void _XACC_queue_destroy (_XACC_queue_t *queue)
 
void _XACC_queue_wait (_XACC_queue_t queue)
 
void _XACC_memory_alloc (_XACC_memory_t *memory, size_t size)
 
void _XACC_memory_free (_XACC_memory_t *memory)
 
void _XACC_memory_read (void *addr, _XACC_memory_t memory, size_t memory_offset, size_t size, _XACC_queue_t queue, bool is_blocking)
 
void _XACC_memory_write (_XACC_memory_t memory, size_t memory_offset, void *addr, size_t size, _XACC_queue_t queue, bool is_blocking)
 
void _XACC_memory_copy (_XACC_memory_t dst_memory, size_t dst_memory_offset, _XACC_memory_t src_memory, size_t src_memory_offset, size_t size, _XACC_queue_t queue, bool is_blocking)
 
void * _XACC_memory_get_address (_XACC_memory_t memory)
 
void _XACC_host_malloc (void **ptr, size_t size)
 
void _XACC_host_free (void **ptr)
 
void _XACC_memory_pack_vector (_XACC_memory_t dst_mem, size_t dst_offset, _XACC_memory_t src_mem, size_t src_offset, size_t blocklength, size_t stride, size_t count, size_t typesize, _XACC_queue_t queue, bool is_blocking)
 
void _XACC_memory_unpack_vector (_XACC_memory_t dst_mem, size_t dst_offset, _XACC_memory_t src_mem, size_t src_offset, size_t blocklength, size_t stride, size_t count, size_t typesize, _XACC_queue_t queue, bool is_blocking)
 
void _XACC_memory_pack_vector2 (_XACC_memory_t dst0_mem, size_t dst0_offset, _XACC_memory_t src0_mem, size_t src0_offset, size_t blocklength0, size_t stride0, size_t count0, _XACC_memory_t dst1_mem, size_t dst1_offset, _XACC_memory_t src1_mem, size_t src1_offset, size_t blocklength1, size_t stride1, size_t count1, size_t typesize, _XACC_queue_t queue, bool is_blocking)
 
void _XACC_memory_unpack_vector2 (_XACC_memory_t dst0_mem, size_t dst0_offset, _XACC_memory_t src0_mem, size_t src0_offset, size_t blocklength0, size_t stride0, size_t count0, _XACC_memory_t dst1_mem, size_t dst1_offset, _XACC_memory_t src1_mem, size_t src1_offset, size_t blocklength1, size_t stride1, size_t count1, size_t typesize, _XACC_queue_t queue, bool is_blocking)
 

Macro Definition Documentation

◆ _XACC_fatal

#define _XACC_fatal   _XMP_fatal

Function Documentation

◆ _XACC_host_free()

void _XACC_host_free ( void **  ptr)
109 {
110  _XMP_free(*ptr);
111  *ptr = NULL;
112 }
Here is the call graph for this function:

◆ _XACC_host_malloc()

void _XACC_host_malloc ( void **  ptr,
size_t  size 
)
105 {
106  *ptr = _XMP_alloc(size);
107 }
Here is the call graph for this function:

◆ _XACC_init()

void _XACC_init ( void  )
4 {
6 }
Here is the call graph for this function:

◆ _XACC_memory_alloc()

void _XACC_memory_alloc ( _XACC_memory_t *  memory,
size_t  size 
)
53 {
54  cl_int ret;
55  cl_context context = (cl_context)acc_get_current_opencl_context();
56 
57  cl_mem mem = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &ret);
58  CL_CHECK(ret);
59 
60  XACC_DEBUG("alloc %p, %zd", mem, size);
61  *memory = mem;
62 }

◆ _XACC_memory_copy()

void _XACC_memory_copy ( _XACC_memory_t  dst_memory,
size_t  dst_memory_offset,
_XACC_memory_t  src_memory,
size_t  src_memory_offset,
size_t  size,
_XACC_queue_t  queue,
bool  is_blocking 
)
91 {
92  CL_CHECK(clEnqueueCopyBuffer(queue, src_memory, dst_memory, src_memory_offset, dst_memory_offset, size, 0 /*num_wait_ev*/, NULL /*wait_ev_list*/, NULL /*ev*/));
93 
94  if(is_blocking){
95  _XACC_queue_wait(queue);
96  }
97 }
Here is the call graph for this function:

◆ _XACC_memory_free()

void _XACC_memory_free ( _XACC_memory_t *  memory)
65 {
66  if(*memory == NULL) return;
67 
68  XACC_DEBUG("free %p\n", *memory);
69  CL_CHECK(clReleaseMemObject(*memory));
70  *memory = NULL;
71 }
Here is the caller graph for this function:

◆ _XACC_memory_get_address()

void* _XACC_memory_get_address ( _XACC_memory_t  memory)
99 {
100  _XACC_fatal("cannot get raw address in OpenCL");
101  return NULL; //dummy
102 }

◆ _XACC_memory_pack_vector()

void _XACC_memory_pack_vector ( _XACC_memory_t  dst_mem,
size_t  dst_offset,
_XACC_memory_t  src_mem,
size_t  src_offset,
size_t  blocklength,
size_t  stride,
size_t  count,
size_t  typesize,
_XACC_queue_t  queue,
bool  is_blocking 
)
274 {
275  XACC_DEBUG("pack_vector, dst=%p, dst_off=%zd, src=%p, src_off=%zd, blklen=%zd, stride=%zd, count=%zd, typesize=%zd, queue=%p, is_blocking=%d\n",
276  dst_mem, dst_offset, src_mem, src_offset, blocklength, stride, count, typesize, queue, is_blocking);
277 
278  const int numThreads = 128; //must be 2^n
279 
280  CHECK_MULTIPLE(blocklength, typesize);
281  CHECK_MULTIPLE(stride, typesize);
282  CHECK_MULTIPLE(dst_offset, typesize);
283  CHECK_MULTIPLE(src_offset, typesize);
284  size_t blocklength_e = blocklength / typesize;
285  size_t stride_e = stride / typesize;
286  size_t dst_offset_e = dst_offset / typesize;
287  size_t src_offset_e = src_offset / typesize;
288 
289 #ifdef _XMP_XACC_PZCL
290  const int max_num_threads = 8192;
291  int num_threads = ADJUST_GLOBAL_WORK_SIZE(blocklength_e * count); //MIN(blocklength_e * count, max_num_threads);
292 
293  cl_uint work_dim = 1;
294  size_t global_work_size[] = {num_threads};
295  size_t local_work_size[] = {8};
296 
297  if(global_work_size[0] > 8192 || global_work_size[0] % 128 != 0){
298  _XMP_fatal("invalid global_work_size at pack vector");
299  }
300 #else
301  int bx = 1, by;
302  int tx = 1, ty;
303  if(blocklength_e >= numThreads){
304  tx = numThreads;
305  }else{
306  while(tx < blocklength_e){
307  tx <<= 1;
308  }
309  }
310  ty = numThreads / tx;
311  by = (count-1)/ty + 1;
312 
313  cl_uint work_dim = 2;
314  size_t global_work_size[] = {bx*tx, by*ty};
315  size_t local_work_size[] = {tx, ty};
316 #endif
317 
318  void *args[] = {&dst_mem, &dst_offset_e, &src_mem, &src_offset_e, &blocklength_e, &stride_e, &count};
319  size_t arg_sizes[] = {sizeof(dst_mem), sizeof(dst_offset_e), sizeof(src_mem), sizeof(src_offset_e), sizeof(blocklength_e), sizeof(stride_e), sizeof(count)};
320 
321  switch(typesize){
322  case 1:
323  XACC_DEBUG("pack_vector_8\n");
324  enqueue_kernel(queue, _kernels[_XACC_pack_vector_8], 7, args, arg_sizes, work_dim, global_work_size, local_work_size);
325  break;
326  case 2:
327  XACC_DEBUG("pack_vector_16\n");
328  enqueue_kernel(queue, _kernels[_XACC_pack_vector_16], 7, args, arg_sizes, work_dim, global_work_size, local_work_size);
329  break;
330  case 4:
331  XACC_DEBUG("pack_vector_32\n");
332  enqueue_kernel(queue, _kernels[_XACC_pack_vector_32], 7, args, arg_sizes, work_dim, global_work_size, local_work_size);
333  break;
334  case 8:
335  XACC_DEBUG("pack_vector_64\n");
336  enqueue_kernel(queue, _kernels[_XACC_pack_vector_64], 7, args, arg_sizes, work_dim, global_work_size, local_work_size);
337  break;
338  default:
339  {
340  void *args_default[] = {&dst_mem, &dst_offset, &src_mem, &src_offset, &blocklength, &stride, &count};
341  size_t arg_sizes_default[] = {sizeof(dst_mem), sizeof(dst_offset), sizeof(src_mem), sizeof(src_offset), sizeof(blocklength), sizeof(stride), sizeof(count)};
342 
343  XACC_DEBUG("pack_vector_default\n");
344  enqueue_kernel(queue, _kernels[_XACC_pack_vector_8], 7, args_default, arg_sizes_default, work_dim, global_work_size, local_work_size);
345  }
346  }
347 }
Here is the call graph for this function:

◆ _XACC_memory_pack_vector2()

void _XACC_memory_pack_vector2 ( _XACC_memory_t  dst0_mem,
size_t  dst0_offset,
_XACC_memory_t  src0_mem,
size_t  src0_offset,
size_t  blocklength0,
size_t  stride0,
size_t  count0,
_XACC_memory_t  dst1_mem,
size_t  dst1_offset,
_XACC_memory_t  src1_mem,
size_t  src1_offset,
size_t  blocklength1,
size_t  stride1,
size_t  count1,
size_t  typesize,
_XACC_queue_t  queue,
bool  is_blocking 
)
436 {
437 }

◆ _XACC_memory_read()

void _XACC_memory_read ( void *  addr,
_XACC_memory_t  memory,
size_t  memory_offset,
size_t  size,
_XACC_queue_t  queue,
bool  is_blocking 
)
74 {
75  cl_bool is_blocking_cl = is_blocking? CL_TRUE : CL_FALSE;
76 
77  XACC_DEBUG("clEnqueueReadBuffer(cq=%p, mem=%p, blocking=%d, offset=%zd, size=%zd, addr=%p)", queue, memory, is_blocking_cl, memory_offset, size, addr);
78  CL_CHECK(clEnqueueReadBuffer(queue, memory, is_blocking_cl, memory_offset, size, addr,
79  0 /*num_wait_ev*/, NULL /*wait_ev_list*/, NULL /*ev*/));
80 }

◆ _XACC_memory_unpack_vector()

void _XACC_memory_unpack_vector ( _XACC_memory_t  dst_mem,
size_t  dst_offset,
_XACC_memory_t  src_mem,
size_t  src_offset,
size_t  blocklength,
size_t  stride,
size_t  count,
size_t  typesize,
_XACC_queue_t  queue,
bool  is_blocking 
)
353 {
354  XACC_DEBUG("unpack_vector, dst=%p, dst_off=%zd, src=%p, src_off=%zd, blklen=%zd, stride=%zd, count=%zd, typesize=%zd, queue=%p, is_blocking=%d\n",
355  dst_mem, dst_offset, src_mem, src_offset, blocklength, stride, count, typesize, queue, is_blocking);
356 
357  const int numThreads = 128; //must be 2^n
358 
359  CHECK_MULTIPLE(blocklength, typesize);
360  CHECK_MULTIPLE(stride, typesize);
361  CHECK_MULTIPLE(dst_offset, typesize);
362  CHECK_MULTIPLE(src_offset, typesize);
363  size_t blocklength_e = blocklength / typesize;
364  size_t stride_e = stride / typesize;
365  size_t dst_offset_e = dst_offset / typesize;
366  size_t src_offset_e = src_offset / typesize;
367 
368 #ifdef _XMP_XACC_PZCL
369  const int max_num_threads = 8192;
370  int num_threads = ADJUST_GLOBAL_WORK_SIZE(blocklength_e * count); //MIN(blocklength_e * count, max_num_threads);
371 
372  cl_uint work_dim = 1;
373  size_t global_work_size[] = {num_threads};
374  size_t local_work_size[] = {8};
375 
376  if(global_work_size[0] > 8192 || global_work_size[0] % 128 != 0){
377  _XMP_fatal("invalid global_work_size at unpack vector");
378  }
379 #else
380  int bx = 1, by;
381  int tx = 1, ty;
382 
383  if(blocklength_e >= numThreads){
384  tx = numThreads;
385  }else{
386  while(tx < blocklength_e){
387  tx <<= 1;
388  }
389  }
390  ty = numThreads / tx;
391  by = (count-1)/ty + 1;
392 
393  cl_uint work_dim = 2;
394  size_t global_work_size[] = {bx*tx, by*ty};
395  size_t local_work_size[] = {tx, ty};
396 #endif
397 
398  void *args[] = {&dst_mem, &dst_offset_e, &src_mem, &src_offset_e, &blocklength_e, &stride_e, &count};
399  size_t arg_sizes[] = {sizeof(dst_mem), sizeof(dst_offset_e), sizeof(src_mem), sizeof(src_offset_e), sizeof(blocklength_e), sizeof(stride_e), sizeof(count)};
400 
401  switch(typesize){
402  case 1:
403  XACC_DEBUG("unpack_vector_8\n");
404  enqueue_kernel(queue, _kernels[_XACC_unpack_vector_8], 7, args, arg_sizes, work_dim, global_work_size, local_work_size);
405  break;
406  case 2:
407  XACC_DEBUG("unpack_vector_16\n");
408  enqueue_kernel(queue, _kernels[_XACC_unpack_vector_16], 7, args, arg_sizes, work_dim, global_work_size, local_work_size);
409  break;
410  case 4:
411  XACC_DEBUG("unpack_vector_32\n");
412  enqueue_kernel(queue, _kernels[_XACC_unpack_vector_32], 7, args, arg_sizes, work_dim, global_work_size, local_work_size);
413  break;
414  case 8:
415  XACC_DEBUG("unpack_vector_64\n");
416  enqueue_kernel(queue, _kernels[_XACC_unpack_vector_64], 7, args, arg_sizes, work_dim, global_work_size, local_work_size);
417  break;
418  default:
419  {
420  void *args_default[] = {&dst_mem, &dst_offset, &src_mem, &src_offset, &blocklength, &stride, &count};
421  size_t arg_sizes_default[] = {sizeof(dst_mem), sizeof(dst_offset), sizeof(src_mem), sizeof(src_offset), sizeof(blocklength), sizeof(stride), sizeof(count)};
422 
423  XACC_DEBUG("unpack_vector_default\n");
424  enqueue_kernel(queue, _kernels[_XACC_unpack_vector_8], 7, args_default, arg_sizes_default, work_dim, global_work_size, local_work_size);
425  }
426  }
427 }
Here is the call graph for this function:

◆ _XACC_memory_unpack_vector2()

void _XACC_memory_unpack_vector2 ( _XACC_memory_t  dst0_mem,
size_t  dst0_offset,
_XACC_memory_t  src0_mem,
size_t  src0_offset,
size_t  blocklength0,
size_t  stride0,
size_t  count0,
_XACC_memory_t  dst1_mem,
size_t  dst1_offset,
_XACC_memory_t  src1_mem,
size_t  src1_offset,
size_t  blocklength1,
size_t  stride1,
size_t  count1,
size_t  typesize,
_XACC_queue_t  queue,
bool  is_blocking 
)
446 {
447 }

◆ _XACC_memory_write()

void _XACC_memory_write ( _XACC_memory_t  memory,
size_t  memory_offset,
void *  addr,
size_t  size,
_XACC_queue_t  queue,
bool  is_blocking 
)
82 {
83  cl_bool is_blocking_cl = is_blocking? CL_TRUE : CL_FALSE;
84 
85  XACC_DEBUG("clEnqueueWriteBuffer(cq=%p, mem=%p, blocking=%d, offset=%zd, size=%zd, addr=%p)", queue, memory, is_blocking_cl, memory_offset, size, addr);
86  CL_CHECK(clEnqueueWriteBuffer(queue, memory, is_blocking_cl, memory_offset, size, addr,
87  0 /*num_wait_ev*/, NULL /*wait_ev_list*/, NULL /*ev*/));
88 }

◆ _XACC_queue_create()

void _XACC_queue_create ( _XACC_queue_t *  queue)
19 {
20  cl_int ret;
21 #ifdef _XMP_XACC_PZCL
22  *queue = (cl_command_queue)acc_get_opencl_queue(ACC_ASYNC_SYNC);
23 #else
24  cl_context context = (cl_context)acc_get_current_opencl_context();
25  cl_device_id device_id = (cl_device_id)acc_get_current_opencl_device();
26 
27  cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0 /*prop*/, &ret);
28  CL_CHECK(ret);
29 
30  XACC_DEBUG("queue create %p", command_queue);
31  *queue = command_queue;
32 #endif
33 }

◆ _XACC_queue_destroy()

void _XACC_queue_destroy ( _XACC_queue_t *  queue)
36 {
37 #ifndef _XMP_XACC_PZCL
38  XACC_DEBUG("queue destroy %p", *queue);
39  CL_CHECK(clFinish(*queue));
40  CL_CHECK(clReleaseCommandQueue(*queue));
41 #endif
42  *queue = NULL;
43 }

◆ _XACC_queue_wait()

void _XACC_queue_wait ( _XACC_queue_t  queue)
46 {
47  CL_CHECK(clFinish(queue));
48 }
Here is the caller graph for this function:

◆ _XACC_util_init()

void _XACC_util_init ( void  )
243 {
244  // _program = create_and_build_program_from_file("util.cl");
245  _program = create_and_build_program((char*)_kernel_src, strlen((char*)_kernel_src));
246 
247  create_kernels(_kernels, _program, _XACC_num_kernels, _kernel_names);
248 }
Here is the caller graph for this function:
_XACC_queue_wait
void _XACC_queue_wait(_XACC_queue_t queue)
Definition: xacc_util_cl.c:45
_XACC_fatal
#define _XACC_fatal
Definition: xacc_internal.h:99
_XMP_alloc
void * _XMP_alloc(size_t size)
Definition: xmp_util.c:21
_XACC_util_init
void _XACC_util_init(void)
Definition: xacc_util_cl.c:242
CHECK_MULTIPLE
#define CHECK_MULTIPLE(size, base)
Definition: xacc_util_cl.c:267
create_kernels
void create_kernels(cl_kernel kernels[], cl_program program, int num_kernels, const char *kernel_names[])
Definition: xacc_util_cl.c:207
_XMP_free
void _XMP_free(void *p)
Definition: xmp_util.c:37
XACC_DEBUG
#define XACC_DEBUG(...)
Definition: xmp_internal.h:768
_XMP_fatal
void _XMP_fatal(char *msg)
Definition: xmp_util.c:42