首页 > 其他分享 >opencl实现图像旋转(二维数组)。

opencl实现图像旋转(二维数组)。

时间:2022-12-16 15:33:31浏览次数:49  
标签:float const cl int 0.0 opencl uchar 二维 数组

改写自:https://zhuanlan.zhihu.com/p/451101452,该用零拷贝方案。

1、host函数

  1 #include <CL/cl.h>
  2 #include <stdio.h>
  3 #include <stdlib.h>
  4 #include <string.h>
  5 #include <math.h>
  6 #include <stdbool.h>
  7 //#include <fstream>
  8 //#include <string>
  9 //#include <iostream>
 10 
 11 #ifndef _WIN32
 12 #include <sys/time.h>
 13 #else
 14 #include <time.h>
 15 #include <windows.h>
 16 #endif
 17 
 18 #define HKA_EPS_F32     ( 1.192092890e-07f )            // 单精度浮点标准中最小差值
 19 //typedef unsigned __int64  UINT64;
 20 
 21 typedef unsigned long long  UINT64;
 22 typedef struct _HKA_POINT_I
 23 {
 24     int x;          // x坐标
 25     int y;          // y坐标
 26 }HKA_POINT_I;
 27 //cl_int ConvertToString(const char* pFileName, std::string& str);
 28 int AVMF_inv_mat(float* a);
 29 int AVMF_STITCH_get_rotate_matrix(float    rotatematrix[3][3], float    angle, HKA_POINT_I center);
 30 
 31 int sys_time_get_utc_msec(UINT64* puUtcMSec);
 32 #if 0
 33 template <typename T>
 34 void check(T result, char const *const func, const char *const file, int const line)
 35 {
 36     if (result)
 37     {
 38         fprintf(stderr, "CL error at %s:%d code=%d \"%s\" \n", file, line,
 39                 static_cast<unsigned int>(result), func);
 40         exit(result);
 41     }
 42 }
 43 
 44 #define CHECK_OPENCL_ERROR(val) check((val), #val, __FILE__, __LINE__)
 45 #endif
 46 void DataInit(cl_uchar *p_data, int width, int height);
 47 void DataCompare(cl_uchar *src1, cl_uchar *src2, int width, int height);
 48 
 49 cl_context CreateContext(cl_device_id *p_device);
 50 cl_command_queue CreateCommandQueue(cl_context context, cl_device_id device);
 51 cl_program CreateProgram(cl_context context, cl_device_id device, const char *source);
 52 cl_kernel CreateKernel(cl_program program, const char *kernel_name, cl_device_id device);
 53 void PrintProfilingInfo(cl_event event);
 54 
 55 bool CreateMemObject(cl_context context, cl_mem memobject[2], cl_uchar *img_ptr,
 56                      cl_uint image_size);
 57 void CleanUp(cl_context context, cl_command_queue commandqueue, cl_program program,
 58              cl_kernel kernel);
 59 
 60 void CpuTranspose(cl_uchar *src, cl_uchar *dst, int src_width, int src_height, int antiRotateMatrixInt[9]);
 61 void PrintMatrix(cl_uchar *matrix, int width, int height);
 62 
 63 char *ClUtilReadFileToString(const char *filename);
 64 void ClUtilWriteStringToFile(const cl_uchar *text, size_t text_length, char *filename);
 65 //void PrintDuration(timeval *start, const char *str, int loop_count);
 66 void CheckClStatus(cl_int ret, const char *failure_msg);
 67 
 68 int main()
 69 {
 70     cl_device_id device;
 71     cl_context context;
 72     cl_command_queue command_queue;
 73     cl_program program;
 74     cl_kernel kernel;
 75     cl_mem buffer_src;
 76     cl_mem buffer_dst;
 77     cl_int err_num = CL_SUCCESS;
 78     cl_uint bufferSizeInBytes;
 79     cl_int    iStatus = 0;                        // 函数返回状态
 80     //string    strSource = "";                        // 用于存储cl文件中的代码
 81     cl_mem buffer_matrix = NULL;
 82     //timeval start;
 83     UINT64 t1, t2;
 84     FILE *fInput = NULL;
 85     FILE *fOutput = NULL;
 86     const int c_loop_count = 10;
 87 
 88     /* 图像参数 */
 89     int width                   = 1280;
 90     int height                  = 960;
 91     bufferSizeInBytes           = width * height * sizeof(cl_uchar);
 92     cl_uchar *hostSrcBuffer     = (cl_uchar *)malloc(bufferSizeInBytes);
 93     cl_uchar *cpuDstBuffer      = (cl_uchar *)malloc(bufferSizeInBytes);
 94     //cl_uchar *gpuDstBuffer      = (cl_uchar *)malloc(bufferSizeInBytes);
 95     //memset(gpuDstBuffer, 0, bufferSizeInBytes);
 96 
 97     //旋转参数
 98     float    rotatematrix[3][3] = { {0.0f,0.0f,0.0f} };/* 旋转矩阵 */
 99     HKA_POINT_I center = { width / 2,height / 2 };  /* 旋转中心 */
100     float antiRotateMatrix[9];      /* 旋转矩阵逆矩阵 */
101     int antiRotateMatrixInt[9];     /* 旋转矩阵逆矩阵整数化 */
102     AVMF_STITCH_get_rotate_matrix(rotatematrix, -3.1415926 / 4, center);
103     memcpy((void*)antiRotateMatrix, (void*)rotatematrix, sizeof(rotatematrix));
104     /* 求逆矩阵 */
105     if (AVMF_inv_mat(antiRotateMatrix) < 0)
106     {
107         printf("AVMF_inv_mat err \n");
108     }
109     /* 整数化 */
110     int index = 0;
111     for (index = 0; index < 9; index++)
112     {
113         antiRotateMatrixInt[index] = (int)(antiRotateMatrix[index] * (1 << 8));
114     }
115     antiRotateMatrixInt[2] += (1 << (8 - 1));
116     antiRotateMatrixInt[5] += (1 << (8 - 1));
117 
118 
119 
120 
121     // Step 1-3 查询平台设备并创建context
122     context = CreateContext(&device);
123     if (NULL == context)
124     {
125         printf("MainError:Create Context Failed!\n");
126         return -1;
127     }
128 
129     // Step 4 创建command queue
130     command_queue = CreateCommandQueue(context, device);
131     if (NULL == command_queue)
132     {
133         printf("MainError:Create CommandQueue Failed!\n");
134         return -1;
135     }
136 
137     // 读取OpenCL C源代码
138     char *device_source_str = ClUtilReadFileToString("kerneltest.cl");
139     if (NULL == device_source_str)
140     {
141         printf("MainError:ClUtilReadFileToString Failed!\n");
142         return -1;
143     }
144     program                 = CreateProgram(context, device, device_source_str);
145     if (NULL == program)
146     {
147         printf("MainError:Create Program Failed!\n");
148         return -1;
149     }
150 
151     // Step 6 创建编译kernel
152     kernel = CreateKernel(program, "image_rotate_matrix", device);
153     if (NULL == kernel)
154     {
155         printf("MainError:Create Kernel Failed!\n");
156         return -1;
157     }
158 
159     // Step 7 创建内存对象
160 #if 0
161     /* 传统方案 */
162     sys_time_get_utc_msec(&t1);
163     buffer_src = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
164                                 bufferSizeInBytes, hostSrcBuffer, &err_num);
165     CheckClStatus(err_num, "Create src buffer");
166     buffer_dst = clCreateBuffer(context, CL_MEM_WRITE_ONLY, bufferSizeInBytes, NULL, &err_num);
167     CheckClStatus(err_num, "Create dst buffer");
168     buffer_matrix = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
169         sizeof(antiRotateMatrixInt), antiRotateMatrixInt, &err_num);
170     CheckClStatus(err_num, "Create matrix buffer");
171     
172     sys_time_get_utc_msec(&t2);
173     printf(" clCreateBuffer cost %llu \n", (t2 - t1));
174 #endif    
175 
176     /* ZERO COPY方案 */
177     sys_time_get_utc_msec(&t1);
178     buffer_src = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, bufferSizeInBytes, NULL, &err_num);
179     CheckClStatus(err_num, "Create src buffer");
180     buffer_dst = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, bufferSizeInBytes, NULL, &err_num);
181     CheckClStatus(err_num, "Create dst buffer");
182     /* 旋转矩阵暂不使用零拷贝方案 */
183     buffer_matrix = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(antiRotateMatrixInt), antiRotateMatrixInt, &err_num);
184     CheckClStatus(err_num, "Create matrix buffer");
185     
186     sys_time_get_utc_msec(&t2);
187     printf(" clCreateBuffer cost %llu \n", (t2 - t1));
188 
189     /* 建立输入数据显存到内存的映射 */
190     sys_time_get_utc_msec(&t1);
191     cl_uchar *hostPtrSrc = (cl_uchar *)clEnqueueMapBuffer(
192                                                             command_queue,
193                                                             buffer_src,
194                                                             CL_TRUE,
195                                                             CL_MAP_WRITE,
196                                                             0,
197                                                             bufferSizeInBytes,
198                                                             0, NULL, NULL, &iStatus);
199     sys_time_get_utc_msec(&t2);
200    
201     printf(" clEnqueueWriteBuffer cost %llu %d \n", (t2 - t1), iStatus);
202 
203     sys_time_get_utc_msec(&t1);
204     /* 建立输出数据显存到内存的映射 */
205     cl_uchar *hostPtrDst = (cl_uchar *)clEnqueueMapBuffer(
206                                                             command_queue,
207                                                             buffer_dst,
208                                                             CL_TRUE,
209                                                             CL_MAP_WRITE,
210                                                             0,
211                                                             bufferSizeInBytes,
212                                                             0, NULL, NULL, &iStatus);
213     sys_time_get_utc_msec(&t2);
214 
215     // Step 8 设置kernelArg
216     err_num = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buffer_src);
217     err_num |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &buffer_dst);
218     err_num |= clSetKernelArg(kernel, 2, sizeof(int), &width);
219     err_num |= clSetKernelArg(kernel, 3, sizeof(int), &height);
220     err_num |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &buffer_matrix);
221     CheckClStatus(err_num, "Set Kernel Arg");
222 
223     size_t global_work_size[3];
224     size_t local_work_size[3];
225 
226     // 设置NDRange尺寸
227 #if defined(QCOM_DEVICE)
228     local_work_size[0] = 32;
229     local_work_size[1] = 32;
230 #elif defined(MTK_DEVICE)
231     local_work_size[0] = 16;
232     local_work_size[1] = 16;
233 #else
234     local_work_size[0] = 16;
235     local_work_size[1] = 16;
236 #endif
237     local_work_size[2] = 0;
238 
239     global_work_size[0] =
240         (width + local_work_size[0] - 1) / local_work_size[0] * local_work_size[0];
241     global_work_size[1] =
242         (height + local_work_size[1] - 1) / local_work_size[1] * local_work_size[1];
243     global_work_size[2] = 0;
244 
245     printf("global_work_size=(%zu,%zu)\n", global_work_size[0], global_work_size[1]);
246     printf("local_work_size=(%zu,%zu)\n", local_work_size[0], local_work_size[1]);
247 
248 
249     fInput = fopen("./input/test_in.nv12", "r");
250     if (NULL == fInput || NULL == hostSrcBuffer)
251     {
252         printf("open file failed \n");
253         return -1;
254     }
255     //fread(hostSrcBuffer, sizeof(char), bufferSizeInBytes, fInput);
256     fread((void *)hostPtrSrc, sizeof(char), bufferSizeInBytes, fInput);
257     fclose(fInput);
258     printf("Matrix Width =%d Height=%d\n", width, height);
259     sys_time_get_utc_msec(&t1);
260     for (int i = 0; i < c_loop_count; i++)
261     {
262         CpuTranspose(hostPtrSrc, cpuDstBuffer, width, height, antiRotateMatrixInt);
263     }
264     sys_time_get_utc_msec(&t2);
265     printf(" CpuTranspose cost %llu \n", (t2 - t1) / c_loop_count);
266     /* 存储CPU数据 */
267     fOutput = fopen("./output/cpu_test_out.nv12", "wb");
268     if (NULL == fOutput)
269     {
270         printf("open file failed \n");
271         return -1;
272     }
273     fwrite(cpuDstBuffer, sizeof(char), bufferSizeInBytes, fOutput);
274     fclose(fOutput);
275 
276     //fseek(fInput, 0, SEEK_SET);
277     //fread((void *)hostPtrSrc, sizeof(char), bufferSizeInBytes, fInput);
278     //fclose(fInput);
279 
280     //gettimeofday(&start, NULL);
281     for (int i = 0; i < c_loop_count; i++)
282     {
283         // Step 9 入队kernel执行
284         cl_event kernel_event = NULL;
285         err_num = clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, global_work_size,
286                                          local_work_size, 0, NULL, &kernel_event);
287         CheckClStatus(err_num, "ClEnqueueNDRangeKernel");
288         // Step 10 同步
289         err_num = clWaitForEvents(1, &kernel_event);
290         CheckClStatus(err_num, "ClWaitForEvents");
291         PrintProfilingInfo(kernel_event);
292         clReleaseEvent(kernel_event);
293         sleep(1);
294     }
295 
296     /* 比较CPU数据与GPU数据是否有差异 */
297     DataCompare(cpuDstBuffer, hostPtrDst, width, height);
298 
299     /* 存储GPU数据 */
300     fOutput = fopen("./output/gpu_test_out.nv12", "wb");
301     if (NULL == fOutput)
302     {
303         printf("open file failed \n");
304         return -1;
305     }
306     fwrite((void*)hostPtrDst, sizeof(char), bufferSizeInBytes, fOutput);
307     fclose(fOutput);
308 
309     iStatus = clEnqueueUnmapMemObject(
310                                         command_queue,
311                                         buffer_src,
312                                         (void *) hostPtrSrc,
313                                         0, NULL, NULL);
314     iStatus = clEnqueueUnmapMemObject(
315                                         command_queue,
316                                         buffer_dst,
317                                         (void *) hostPtrDst,
318                                         0, NULL, NULL);
319     free(device_source_str);
320     free(hostSrcBuffer);
321     free(cpuDstBuffer);
322     //free(gpuDstBuffer);
323     clReleaseMemObject(buffer_src);
324     clReleaseMemObject(buffer_dst);
325 
326     CleanUp(context, command_queue, program, kernel);
327     return 0;
328 
329 }
330 
331 void DataInit(cl_uchar *p_data, int width, int height)
332 {
333     cl_uchar cnt = 0;
334     for (int i = 0; i < width * height; i++)
335     {
336         *p_data = cnt;
337         cnt++;
338         p_data++;
339     }
340 }
341 
342 void PrintMatrix(cl_uchar *matrix, int width, int height)
343 {
344     for (int i = 0; i < height; i++)
345     {
346         for (int j = 0; j < width; j++)
347         {
348             printf("%d ", matrix[i * width + j]);
349         }
350         printf("\n");
351     }
352 }
353 
354 void CpuTranspose(cl_uchar *src, cl_uchar *dst, int src_width, int src_height, int matrix[9])
355 {
356     for (int src_row = 0; src_row < src_height; src_row++)
357     {
358         for (int src_col = 0; src_col < src_width; src_col++)
359         {
360             int xpos = matrix[0] * src_col + matrix[1] * src_row + matrix[2];
361             int ypos = matrix[3] * src_col + matrix[4] * src_row + matrix[5];
362 
363             xpos = (xpos >> 8);
364             ypos = (ypos >> 8);
365             if ((xpos >= 0) && (xpos < src_width) && (ypos >= 0) && (ypos < src_height))  //Bound Checking 
366             {
367                 dst[src_row * src_width + src_col] = src[ypos * src_width + xpos];
368             }
369         }
370     }
371 }
372 
373 cl_context CreateContext(cl_device_id *p_device)
374 {
375     cl_int err_num;
376     cl_uint num_platform;
377     cl_platform_id platform_id;
378     cl_context context = NULL;
379     err_num            = clGetPlatformIDs(1, &platform_id, &num_platform);
380     if (CL_SUCCESS != err_num || num_platform <= 0)
381     {
382         printf("failed to find any opencl platform. \n");
383         return NULL;
384     }
385     err_num = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, p_device, NULL);
386     if (CL_SUCCESS != err_num)
387     {
388         printf("there is no gpu.\n");
389         return NULL;
390     }
391     context = clCreateContext(NULL, 1, p_device, NULL, NULL, &err_num);
392     if (CL_SUCCESS != err_num)
393     {
394         printf("create context error.\n");
395         return NULL;
396     }
397     return context;
398 }
399 
400 cl_command_queue CreateCommandQueue(cl_context context, cl_device_id device)
401 {
402     cl_command_queue_properties queue_prop[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0};
403     cl_command_queue command_queue           = NULL;
404     command_queue = clCreateCommandQueueWithProperties(context, device, queue_prop, NULL);
405     if (NULL == command_queue)
406     {
407         printf("create command queue failed.\n");
408     }
409     return command_queue;
410 }
411 
412 cl_program CreateProgram(cl_context context, cl_device_id device, const char *source)
413 {
414     cl_int err_num;
415     cl_program program;
416 
417     program = clCreateProgramWithSource(context, 1, (const char **)&source, NULL, NULL);
418     if (NULL == program)
419     {
420         printf("create program failed.\n ");
421         return NULL;
422     }
423     err_num = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
424     if (CL_SUCCESS != err_num)
425     {
426         clReleaseProgram(program);
427         return NULL;
428     }
429     return program;
430 }
431 
432 cl_kernel CreateKernel(cl_program program, const char *kernel_name, cl_device_id device)
433 {
434     int err_num;
435     cl_kernel kernel;
436     kernel = clCreateKernel(program, kernel_name, &err_num);
437     if (err_num != CL_SUCCESS)
438     {
439         printf("create kernel failed.\n ");
440         return NULL;
441     }
442     size_t max_work_group_size;
443     size_t perferred_work_group_size_multiple;
444     err_num = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t),
445                                        &max_work_group_size, NULL);
446     err_num |=
447         clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
448                                  sizeof(size_t), &perferred_work_group_size_multiple, NULL);
449     if (err_num != CL_SUCCESS)
450     {
451         printf("Get kernel info failed.\n ");
452         return NULL;
453     }
454     printf("Kernel %s max workgroup size=%zu\n", kernel_name, max_work_group_size);
455     printf("Kernel %s perferred workgroup size multiple=%zu\n", kernel_name,
456            perferred_work_group_size_multiple);
457     return kernel;
458 }
459 
460 void PrintProfilingInfo(cl_event event)
461 {
462     cl_ulong t_queued;
463     cl_ulong t_submitted;
464     cl_ulong t_started;
465     cl_ulong t_ended;
466     cl_ulong t_completed;
467 
468     clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &t_queued, NULL);
469     clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &t_submitted,
470                             NULL);
471     clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &t_started, NULL);
472     clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &t_ended, NULL);
473     clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_COMPLETE, sizeof(cl_ulong), &t_completed,
474                             NULL);
475 
476     printf("queue -> submit : %fus\n", (t_submitted - t_queued) * 1e-3);
477     printf("submit -> start : %fus\n", (t_started - t_submitted) * 1e-3);
478     printf("start -> end : %fus\n", (t_ended - t_started) * 1e-3);
479     printf("end -> finish : %f  %f  %fus\n", t_completed * 1e-3, t_ended * 1e-3, (t_completed - t_ended) * 1e-3);
480 }
481 
482 void CleanUp(cl_context context, cl_command_queue commandqueue, cl_program program,
483              cl_kernel kernel)
484 {
485 
486     if (NULL != kernel)
487     {
488         clReleaseKernel(kernel);
489     }
490     if (NULL != program)
491     {
492         clReleaseProgram(program);
493     }
494     if (NULL != commandqueue)
495     {
496         clReleaseCommandQueue(commandqueue);
497     }
498     if (NULL != context)
499     {
500         clReleaseContext(context);
501     }
502 }
503 
504 void CheckClStatus(cl_int ret, const char *failure_msg)
505 {
506     if (ret != CL_SUCCESS)
507     {
508         fprintf(stderr, "Error %d with %s\n", ret, failure_msg);
509         exit(ret);
510     }
511     return;
512 }
513 
514 //void PrintDuration(timeval *begin, const char *function_name, int loop_count)
515 //{
516 //    timeval current;
517 //    gettimeofday(&current, NULL);
518 //    uint64_t time_in_microseconds =
519 //        (current.tv_sec - begin->tv_sec) * 1e6 + (current.tv_usec - begin->tv_usec);
520 //    printf("%s consume average time: %ld us\n", function_name, time_in_microseconds / loop_count);
521 //    return;
522 //}
523 // 将cl文件代码转为字符串
524 
525 #if 0
526 cl_int ConvertToString(const char* pFileName, std::string& Str)
527 {
528     size_t        uiSize = 0;
529     size_t        uiFileSize = 0;
530     char* pStr = NULL;
531 
532     /*FILE* pFile = fopen(pFileName, "r");
533     if (!pFile) return -1;
534     fseek(pFile, 0L, SEEK_END);
535     int size = ftell(pFile);
536     fclose(pFile);*/
537 
538 
539     std::fstream fFile(pFileName, (std::fstream::in | std::fstream::binary));
540 
541     if (fFile.is_open())
542     {
543         fFile.seekg(0, std::fstream::end);
544         uiSize = uiFileSize = (size_t)fFile.tellg();  // 获得文件大小
545         fFile.seekg(0, std::fstream::beg);
546         pStr = new char[uiSize + 1];
547 
548         if (NULL == pStr)
549         {
550             fFile.close();
551             return 0;
552         }
553 
554         fFile.read(pStr, uiFileSize);                // 读取uiFileSize字节
555         fFile.close();
556         pStr[uiSize] = '\0';
557         Str = pStr;
558 
559         delete[] pStr;
560 
561         return 0;
562     }
563 
564     printf("Error: Failed to open cl file %s \n", pFileName);
565 
566     return -1;
567 }
568 #endif
569 
570 char *ClUtilReadFileToString(const char *filename)
571 {
572 
573     FILE *fp;
574     char *fileData;
575     long fileSize;
576 
577     /* Open the file */
578     fp = fopen(filename, "rb");
579     if (!fp)
580     {
581         printf("Could not open file: %s\n", filename);
582         exit(-1);
583     }
584 
585     /* Determine the file size */
586     if (fseek(fp, 0, SEEK_END))
587     {
588         printf("Error reading the file\n");
589         exit(-1);
590     }
591     fileSize = ftell(fp);
592     if (fileSize < 0)
593     {
594         printf("Error reading the file\n");
595         exit(-1);
596     }
597     if (fseek(fp, 0, SEEK_SET))
598     {
599         printf("Error reading the file\n");
600         exit(-1);
601     }
602 
603     /* Read the contents */
604     fileData = (char *)malloc(fileSize + 1);
605     if (!fileData)
606     {
607         exit(-1);
608     }
609     if (fread(fileData, fileSize, 1, fp) != 1)
610     {
611         printf("Error reading the file\n");
612         exit(-1);
613     }
614 
615     /* Terminate the string */
616     fileData[fileSize] = '\0';
617 
618     /* Close the file */
619     if (fclose(fp))
620     {
621         printf("Error closing the file\n");
622         exit(-1);
623     }
624 
625     return fileData;
626 }
627 
628 void ClUtilWriteStringToFile(const cl_uchar *text, size_t text_length, char *filename)
629 {
630     FILE *fp = fopen(filename, "wt+");
631     if (NULL == fp)
632         return;
633     fwrite(text, 1, text_length, fp);
634     fclose(fp);
635 }
636 
637 void DataCompare(cl_uchar *src1, cl_uchar *src2, int width, int height)
638 {
639     for (int i = 0; i < height; i++)
640     {
641         for (int j = 0; j < width; j++)
642         {
643             int idx = i * width + j;
644             if (src1[idx] != src2[idx])
645             {
646                 printf("Mismatch at (%d,%d), A= %d,B= %d\n", i, j, src1[idx], src2[idx]);
647                 return;
648             }
649         }
650     }
651     printf("A and B match!\n");
652     return;
653 }
654 
655 /***************************************************************************************************
656 * 功 能:求3x3矩阵逆矩阵,使用伴随矩阵的方式直接计算
657 * 参 数:
658 *         a     - I/O   源矩阵
659 * 返回值:无
660 * 备  注:
661 ***************************************************************************************************/
662 int AVMF_inv_mat(float* a)
663 {
664     float det = 0.0f;
665     float recip_det = 0.0f;
666     float a00 = 0.0f;
667     float a01 = 0.0f;
668     float a02 = 0.0f;
669     float a10 = 0.0f;
670     float a11 = 0.0f;
671     float a12 = 0.0f;
672     float a20 = 0.0f;
673     float a21 = 0.0f;
674     float a22 = 0.0f;
675 
676     a00 = a[0];
677     a01 = a[1];
678     a02 = a[2];
679     a10 = a[3];
680     a11 = a[4];
681     a12 = a[5];
682     a20 = a[6];
683     a21 = a[7];
684     a22 = a[8];
685 
686     // 计算3x3矩阵行列式
687     det = a00 * a11 * a22 + a01 * a12 * a20 + a02 * a21 * a10;
688     det -= a02 * a11 * a20 + a00 * a21 * a12 + a01 * a10 * a22;
689 
690     // 除零保护
691     if (HKA_EPS_F32 > fabs(det))
692     {
693         printf("error \n");
694         return -1;
695     }
696 
697     recip_det = 1.0f / det;
698 
699     // 根据伴随矩阵除行列式算出逆矩阵,伴随矩阵已转置 det不为0,不做除零保护
700     a[0] = (a11 * a22 - a21 * a12) * recip_det;
701     a[1] = -(a01 * a22 - a21 * a02) * recip_det;
702     a[2] = (a01 * a12 - a11 * a02) * recip_det;
703     a[3] = -(a10 * a22 - a20 * a12) * recip_det;
704     a[4] = (a00 * a22 - a20 * a02) * recip_det;
705     a[5] = -(a00 * a12 - a10 * a02) * recip_det;
706     a[6] = (a10 * a21 - a20 * a11) * recip_det;
707     a[7] = -(a00 * a21 - a20 * a01) * recip_det;
708     a[8] = (a00 * a11 - a10 * a01) * recip_det;
709 
710     return 0;
711 }
712 
713 /***************************************************************************************************
714 * 功 能:生成2D全景图
715 * 参 数:*
716 *        modu_enable                - I  2D类型,融合使能开关-----heyunyun
717 *        sub_img                    - I  各镜头拼接子图
718 *        weight                     - I  各子图权重表
719 *        car_img                    - I  车模图像
720 *        dst                        - O  2D全景图
721 * 返回值:状态码
722 * 备  注:
723 ***************************************************************************************************/
724 
725 int AVMF_STITCH_get_rotate_matrix(
726     float    rotatematrix[3][3],
727     float    angle,
728     HKA_POINT_I center)
729 {
730     int i = 0, j = 0, k = 0;
731     float temp = 0.0f;
732 
733     float    translate_l[3][3] = { { 1.0f, 0.0f, center.x }, { 0.0f, 1.0f, center.y }, { 0.0f, 0.0f, 1.0f } };
734     float    rotate[3][3] = { { cos(angle), -sin(angle), 0.0f }, { sin(angle), cos(angle), 0.0f }, { 0.0f, 0.0f, 1.0f } };
735     float    translate_r[3][3] = { { 1.0f, 0.0f, -center.x }, { 0.0f, 1.0f, -center.y }, { 0.0f, 0.0f, 1.0f } };
736 
737     float    ans[3][3] = { {0.0f, 0.0f, 0.0f} };
738 
739     for (i = 0; i < 3; ++i)
740     {
741         for (j = 0; j < 3; ++j)
742         {
743             temp = 0.0f;
744             for (k = 0; k < 3; ++k)
745             {
746                 temp += translate_l[i][k] * rotate[k][j];
747             }
748             ans[i][j] = temp;
749         }
750     }
751 
752     for (i = 0; i < 3; ++i)
753     {
754         for (j = 0; j < 3; ++j)
755         {
756             temp = 0.0f;
757             for (k = 0; k < 3; ++k)
758             {
759                 temp += ans[i][k] * translate_r[k][j];
760             }
761             rotatematrix[i][j] = temp;
762         }
763     }
764     return 0;
765 }
766 
767 
768 /***************************************************************************************************
769 * 函数名:sys_time_get_utc_msec
770 * 功  能:读取系统时间 毫秒
771 * 参  数:
772 * 返回值:寄存器读数
773 * 备  注:
774 ***************************************************************************************************/
775 int sys_time_get_utc_msec(UINT64* puUtcMSec)
776 {
777 #ifndef _WIN32
778     int iRet = 0;
779     struct timeval stTimeVal = { 0 };
780     if (NULL == puUtcMSec)
781     {
782         return iRet;
783     }
784     iRet = gettimeofday(&stTimeVal, NULL);
785     if (iRet < 0)
786     {
787         return iRet;
788     }
789     //*puUtcMSec = (1000000 * stTimeVal.tv_sec + stTimeVal.tv_usec) / 1000;
790     *puUtcMSec = (1000000 * stTimeVal.tv_sec + stTimeVal.tv_usec);
791     return iRet;
792 
793 #else
794 #define EPOCHFILETIME   (116444736000000000UL)
795     FILETIME ft;
796     LARGE_INTEGER li;
797     UINT64 tt = 0;
798     GetSystemTimeAsFileTime(&ft);
799     li.LowPart = ft.dwLowDateTime;
800     li.HighPart = ft.dwHighDateTime;
801     // 从1970年1月1日0:0:0:000到现在的微秒数(UTC时间)
802     tt = (li.QuadPart - EPOCHFILETIME) / 10;
803 
804     //*puUtcMSec = (tt / 1000);
805     *puUtcMSec = (tt);
806     return 0;
807 #endif
808 }
opencl_test.c

2、内核函数

 1 __kernel void TransposeKernel(__global uchar *src, __global uchar *dst, int width, int height)
 2 {
 3     uint g_idx = get_global_id(0);
 4     uint g_idy = get_global_id(1);
 5     if ((g_idx >= width) || (g_idy >= height))
 6     {
 7         return;
 8     }
 9     //dst[g_idx * height + g_idy] = src[g_idy * width + g_idx];
10     dst[g_idy * width + g_idx] = src[g_idy * width + g_idx];
11 }
12 
13 __kernel  void image_rotate(    
14     __global uchar * src_data, 
15     __global uchar * dest_data, 
16     //Data in global memory
17     int W,    
18     int H,              
19     //Image Dimensions
20     float sinTheta, 
21     float cosTheta )                    
22     //Rotation Parameters
23 {    
24     //Thread gets its index within index space
25     const int ix = get_global_id(0); 
26     const int iy = get_global_id(1);    
27 
28     int xc = W/2;
29     int yc = H/2;
30 
31     int xpos =  ( ix-xc)*cosTheta - (iy-yc)*sinTheta+xc;    
32     int ypos =  ( ix-xc)*sinTheta + (iy-yc)*cosTheta+yc; 
33 
34     if ((xpos>=0) && (xpos< W)  && (ypos>=0) && (ypos< H))  //Bound Checking 
35     {
36         dest_data[ypos*W+xpos]= src_data[iy*W+ix]; 
37     }
38 }
39 
40 __kernel  void image_rotate_matrix(    
41     __global uchar * src_data, 
42     __global uchar * dest_data, 
43     //Data in global memory
44     int W,    
45     int H,              
46     //Image Dimensions
47     __global int * matrix)                    
48     //Rotation Parameters
49 {    
50     //Thread gets its index within index space
51     const int ix = get_global_id(0); 
52     const int iy = get_global_id(1);    
53 
54     //dest_data[iy*W+ix]= src_data[iy*W+ix];
55     int xpos =  matrix[0] * ix + matrix[1] * iy + matrix[2];
56     int ypos =  matrix[3] * ix + matrix[4] * iy + matrix[5];
57     
58     xpos = (xpos >> 8);
59     ypos = (ypos >> 8);
60     if ((xpos>=0) && (xpos< W)  && (ypos>=0) && (ypos< H))  //Bound Checking 
61     {
62         dest_data[iy*W+ix] = src_data[ypos*W+xpos]; 
63     }
64     if(ix == 640 && iy == 480)
65     {
66         printf("\n %d %d %d %d %d %d %d %d \n", matrix[0], matrix[1], matrix[2], matrix[3], matrix[4], matrix[5], xpos, ypos);
67     }
68 }
kerneltest.cl

 

标签:float,const,cl,int,0.0,opencl,uchar,二维,数组
From: https://www.cnblogs.com/peifx/p/16987516.html

相关文章

  • 代码随想录-数组
    代码随想录数组数组--二分查找题目:力扣题目链接给定一个n个元素有序的(升序)整型数组nums和一个目标值target,写一个函数搜索nums中的target,如果目标值存在返回......
  • opencl优化-Zero Copy
    转自:https://www.cnblogs.com/willhua/tag/OpenCL/有两种方式实现从主机到CL设备的数据传递,第一种:1cl_meminput=clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(f......
  • 集合转数组 toArray
    使用集合转数组的方法,必须使用集合的toArray(T[]array),传入的是类型完全一致、长度为0的空数组。反例:直接使用toArray无参方法存在问题,此方法返回值只能是Object[]......
  • 删除/去掉数组中一个元素或者多个元素
    <?php$source="322,377,3322";//按逗号分离字符串$arr=explode(',',$source);$tmp="322";for($i=0;$i<count($arr);$i++){if($tmp==$arr[$i])unset($arr[$i......
  • LeetCode 53_最大子数组和
    LeetCode53:最大子数组和题目给你一个整数数组nums,请你找出一个具有最大和的连续子数组(子数组最少包含一个元素),返回其最大和。子数组是数组中的一个连续部分。示例......
  • 数组的概念使用
    数组的语法:1、 Java语言中的数组是一种引用数据类型。不属于基本数据类型。数组的父类是object。2、 数组是一个容器,可以容纳多个元素。(数组是一个数据的集合。)3、 数组......
  • 【二维码图像矫正增强】基于MATLAB的二维码图像矫正增强处理仿真
    1.软件版本matlab2013b2.算法流程概述通过形态学处理获得二维码部分的图像区域及边界;采用凸包算法来计算边界上的点集;然后根据点集来寻找二维码的四个顶点,然后透视变......
  • C语言中将二维数组作为函数参数来传递
    c语言中经常需要通过函数传递二维数组,有三种方法可以实现,如下:方法一, 形参给出第二维的长度。例如:#include<stdio.h>voidfunc(intn,char str[][5]){ inti; f......
  • [LeetCode]004-寻找两个正序数组的中位数
    >>>传送门题目给定两个大小分别为m和n的正序(从小到大)数组 nums1和 nums2。请你找出并返回这两个正序数组的中位数。算法的时间复杂度应该为O(log(m+n))。......
  • Java-数组4-笔记
    1.数组的作用就是一个容器,用于在程序中存储一批同种类型的数据2.数组的定义静态初始化数组格式1数据类型[]数组名称=new数据类型[]{元素1,元素2,元素3,....}格式2数据类型[......