利用VS2019+MKL加速矩陣運(yùn)算。
軟件版本
Intel OneAP 2022.0.2
Visual Studio 2019 Community
工程配置
-
新建空工程
image.png -
打開OneAPI選項(xiàng)
image.png
- 加入代碼(官方示例)
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include "mkl.h"
void print_arr(int N, char* name, double* array);
void init_arr(int N, double* a);
void Dgemm_multiply(double* a, double* b, double* c, int N);
int main(int argc, char* argv[])
{
clock_t start, stop;
int i, j;
int N;
double* a;
double* b;
double* c;
if (argc < 2)
{
printf("Enter matrix size N=");
//please enter small number first to ensure that the
//multiplication is correct! and then you may enter
//a "reasonably" large number say like 500 or even 1000
scanf("%d", &N);
}
else
{
N = atoi(argv[1]);
}
a = (double*)malloc(sizeof(double) * N * N);
b = (double*)malloc(sizeof(double) * N * N);
c = (double*)malloc(sizeof(double) * N * N);
init_arr(N, a);
init_arr(N, b);
//DGEMM Multiply
//reallocate to force cash to be flushed
a = (double*)malloc(sizeof(double) * N * N);
b = (double*)malloc(sizeof(double) * N * N);
c = (double*)malloc(sizeof(double) * N * N);
init_arr(N, a);
init_arr(N, b);
start = clock();
//for(i=0;i<1000;i++)
Dgemm_multiply(a, b, c, N);
stop = clock();
printf("Dgemm_multiply(). Elapsed time = %g seconds\n",
((double)(stop - start)) / CLOCKS_PER_SEC);
//print simple test case of data to be sure multiplication is correct
if (N < 7) {
print_arr(N, "a", a);
print_arr(N, "b", b);
print_arr(N, "c", c);
}
free(a);
free(b);
free(c);
return 0;
}
//DGEMM way. The PREFERED way, especially for large matrices
void Dgemm_multiply(double* a, double* b, double* c, int N)
{
double alpha = 1.0, beta = 0.;
int incx = 1;
int incy = N;
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, N, N, N, alpha, b, N, a, N, beta, c, N);
}
//initialize array with random data
void init_arr(int N, double* a)
{
int i, j;
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
a[i * N + j] = (i + j + 1) % 10; //keep all entries less than 10. pleasing to the eye!
}
}
}
//print array to std out
void print_arr(int N, char* name, double* array)
{
int i, j;
printf("\n%s\n", name);
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
printf("%g\t", array[N * i + j]);
}
printf("\n");
}
}
運(yùn)行和調(diào)試
-
偶然出現(xiàn)的奇怪問題合搅,在設(shè)置OneAPI方式為Sequential的時(shí)候燃逻,程序一直可以運(yùn)行旁涤,但是OneAPI設(shè)置為Parallel后,輸入的N只能是小于15(多次實(shí)測),不然就會(huì)報(bào)錯(cuò)!
Parallel N=14
Parallel N=15
錯(cuò)誤 解決方法窘俺,在debug出來的exe文件夾下放入libiomp5md.dll
點(diǎn)亮“在看”
歡迎關(guān)注、收藏复凳、轉(zhuǎn)載哈!
最有意思的GNSS NEWS…不定期更新…