運(yùn)行hello.cpp & 運(yùn)行vadd.cpp
矩陣乘法
#include <iostream>
#include <fstream>
#include <cmath>
#include <cstring>
#if defined(__APPLE__) || defined(__MACOSX)
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
// 把文本文件讀入一個(gè) string 中
int convertToString(const char *filename, std::string &s) {
size_t size;
char *str;
std::fstream f(filename, (std::fstream::in | std::fstream::binary));
if (f.is_open()) {
size_t fileSize;
f.seekg(0, std::fstream::end);
size = fileSize = (size_t) f.tellg();
f.seekg(0, std::fstream::beg);
str = new char[size + 1];
f.read(str, fileSize);
f.close();
str[size] = '\0';
s = str;
delete[] str;
return 0;
}
printf("Error: Failed to open file %s\n", filename);
return 1;
}
int main(int argc, char *argv[]) {
double cputime, gputime;
clock_t timestamp;
const int W = 100;
const int mat_size = W * W;
// 在 host 內(nèi)存中創(chuàng)建三個(gè)緩沖區(qū)
float *const buf1 = (float *) malloc(mat_size * sizeof(float));
float *const buf2 = (float *) malloc(mat_size * sizeof(float));
float *const buf = (float *) malloc(mat_size * sizeof(float));
float *const op_data = (float *) malloc(mat_size * sizeof(float));
// 初始化矩陣
srand((unsigned int) time(NULL));
for (int i = 0; i < mat_size; i++)
buf1[i] = float(rand() % 1000) * M_PI;
srand((unsigned int) time(NULL) + 1000);
for (int i = 0; i < mat_size; i++)
buf2[i] = float(rand() % 1000) * M_PI;
// 時(shí)間戳
timestamp = clock();
for (int i = 0; i < mat_size; i++) {
float tmp = 0.0;
for (int k = 0; k < W; k++)
tmp += buf1[i * W + k] * buf2[k * W + i];
buf[i * W + i] = tmp;
}
cputime = (double) (clock() - timestamp) / CLOCKS_PER_SEC * 1000;
printf("串行執(zhí)行時(shí)間:%8.3f ms\n", cputime);
cl_platform_id platform;
cl_event prof_event;
// 創(chuàng)建平臺(tái)對(duì)象
clGetPlatformIDs(1, &platform, NULL);
cl_device_id device;
// 創(chuàng)建 GPU 設(shè)備
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL);
// 創(chuàng)建 context
cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
// 創(chuàng)建命令隊(duì)列
cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, NULL);
// 創(chuàng)建三個(gè) OpenCL 內(nèi)存對(duì)象
cl_mem objects[3];
objects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * mat_size, buf1,
NULL);
objects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * mat_size, buf2,
NULL);
objects[2] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * mat_size, buf,
NULL);
const char *filename = "mul.cl";
std::string sourceStr;
convertToString(filename, sourceStr);
const char *source = sourceStr.c_str();
size_t sourceSize[] = {strlen(source)};
cl_program program = clCreateProgramWithSource(context, 1, &source, sourceSize, NULL);
// 編譯程序?qū)ο? clBuildProgram(program, 1, &device, NULL, NULL, NULL);
// 創(chuàng)建 Kernel 對(duì)象
cl_kernel kernel = clCreateKernel(program, "matrix_mult", NULL);
// 設(shè)置 Kernel 參數(shù)
clSetKernelArg(kernel, 0, sizeof(int), &W);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &objects[0]);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &objects[1]);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &objects[2]);
//執(zhí)行 kernel
cl_ulong ev_start_time = (cl_ulong) 0;
cl_ulong ev_end_time = (cl_ulong) 0;
size_t global[1];
global[0] = (size_t) W;
clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global, NULL, 0, NULL, &prof_event);
clFinish(queue);
//讀取時(shí)間
clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &ev_start_time, NULL);
clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &ev_end_time, NULL);
gputime = (double) (ev_end_time - ev_start_time) * 1e-6;
printf("OpenCL 執(zhí)行時(shí)間:%8.3f ms\n", gputime);
//數(shù)據(jù)拷回 host 內(nèi)存
clEnqueueReadBuffer(queue, objects[2], CL_TRUE, 0, sizeof(float) * mat_size, op_data, 0, NULL, NULL);
// 驗(yàn)證 GPU 計(jì)算結(jié)果
for (int i = 0; i < mat_size; i++) {
if (fabs(buf[i] - op_data[i]) > 0.0001) {
printf("check failed\n");
break;
}
}
if (buf1)
free(buf1);
if (buf2)
free(buf2);
if (buf)
free(buf);
if (op_data)
free(op_data);
// 刪除 OpenCL 資源對(duì)象
clReleaseMemObject(objects[2]);
clReleaseMemObject(objects[1]);
clReleaseMemObject(objects[0]);
clReleaseProgram(program);
clReleaseCommandQueue(queue);
clReleaseContext(context);
return 0;
}