今天我們談談一個“土豪”算法——Strasen矩陣算法
之說以說它“土豪”就是因為其帶來了巨大的空間開銷刊殉。
先來考察一個問題:請用三次實數(shù)乘法計算復數(shù)a+bi和c+di相乘选泻。
由于:
a×(c+d)=ac+ad=s1 焰情;
b×(c-d)=bc-bd=s2 ;
d×(a+b)=ad+bd=s3 错维;
故有實部:s1 -s3 =ac-bd,
虛部:s2+ s3 =ad+bc敷待。
這樣,四次的乘法就變成三次乘法和悦。
Strassen矩陣乘法也是如此退疫,把A,B,C矩陣分解為n/2×n/2子矩陣,進行7次遞歸計算n/2×n/2矩陣的乘法鸽素,其運行時間的遞歸式:
T(n)= Θ(1)???????????? if n=1;
??????7T(n/2)+Θ(n^2 )??? if n>1;
令:
S1=B12-B22;
S2=A11+A12;
S3=A21+A22;
S4=B21-B22;
S5=A11+A22;
S6=B11+B22;
S7=A12-A22;
S8=B21+B22;
S9=A11-A21;
S10=B11+B12;
那么:?P1= A11·S1 = A11·(B12-B22)
P2= B22·S2 = B22·(A11+A12)
P3= B11·S3 = B11·(A21+A22)
P4= A22·S4 = A22·(B21-B22)
P5= S5·S6 = (A11+A22)·(B11+B22)
P6= S7·S8 = (A12-A22)·(B21+B22)
P7= S9·S10 = (A11-A21)·(B11+B12)
C11= P5 + P4 - P2 + P6=A11×B11+A12×B21
C12= P1 + P2=A11×B12+A12×B22
C21= P3 + P4=A21×B11+A22×B21
C22= P5 + P1 – P3 – P7=A21×B21+A22×B22
Strassen算法的具體實現(xiàn)(C語言):
int Strassen(int **A, int **B, int **Result, int Size){
?if (Size == 1){
??//直接計算C11
??Result[0][0] = A[0][0] * B[0][0];
??return 0;
?}
?int NewSize = Size / 2;
?/*分塊矩陣*/
?int **A11, **A12, **A21, **A22;
?int **B11, **B12, **B21, **B22;
?int **C11, **C12, **C21, **C22;
?int **P1, **P2, **P3, **P4, **P5, **P6, **P7;
?/*存放數(shù)組A褒繁、B(i、j)的臨時變量*/
?int **AResult, **BResult;
?A11 = new int*[NewSize];
?A12 = new int*[NewSize];
?A21 = new int*[NewSize];
?A22 = new int*[NewSize];
?B11 = new int*[NewSize];
?B12 = new int*[NewSize];
?B21 = new int*[NewSize];
?B22 = new int*[NewSize];
?C11 = new int*[NewSize];
?C12 = new int*[NewSize];
?C21 = new int*[NewSize];
?C22 = new int*[NewSize];
?P1 = new int*[NewSize];
?P2 = new int*[NewSize];
?P3 = new int*[NewSize];
?P4 = new int*[NewSize];
?P5 = new int*[NewSize];
?P6 = new int*[NewSize];
?P7 = new int*[NewSize];
?AResult = new int*[NewSize];
?BResult = new int*[NewSize];
?for (int i = 0; i < NewSize; i++)
?{
??A11[i] = new int[NewSize];
??A12[i] = new int[NewSize];
??A21[i] = new int[NewSize];
??A22[i] = new int[NewSize];
??B11[i] = new int[NewSize];
??B12[i] = new int[NewSize];
??B21[i] = new int[NewSize];
??B22[i] = new int[NewSize];
??C11[i] = new int[NewSize];
??C12[i] = new int[NewSize];
??C21[i] = new int[NewSize];
??C22[i] = new int[NewSize];
??P1[i] = new int[NewSize];
??P2[i] = new int[NewSize];
??P3[i] = new int[NewSize];
??P4[i] = new int[NewSize];
??P5[i] = new int[NewSize];
??P6[i] = new int[NewSize];
??P7[i] = new int[NewSize];
??AResult[i] = new int[NewSize];
??BResult[i] = new int[NewSize];
?}
?//對分塊矩陣賦值
?for (int i = 0; i < NewSize; i++)
?{
??for (int j = 0; j < NewSize; j++)
??{
???A11[i][j] = A[i][j];
???A12[i][j] = A[i][j + NewSize];
???A21[i][j] = A[i + NewSize][j];
???A22[i][j] = A[i + NewSize][j + NewSize];
???B11[i][j] = B[i][j];
???B12[i][j] = B[i][j + NewSize];
???B21[i][j] = B[i + NewSize][j];
???B22[i][j] = B[i + NewSize][j + NewSize];
??}
?}
?//計算P1 = A11*(B12-B22)
?Sub(B12, B22, BResult, NewSize);
?Strassen(A11, BResult, P1, NewSize);
?//計算P2 = (A11+A12)*B22
?Add(A11, A12, AResult, NewSize);
?Strassen(AResult, B22, P2, NewSize);
?//計算P3 = (A21+A22)*B11
?Add(A21, A22, AResult, NewSize);
?Strassen(AResult, B11, P3, NewSize);
?//計算P4 = A22*(B21-B11)
?Sub(B21, B11, BResult, NewSize);
?Strassen(A22, BResult, P4, NewSize);
?//計算P5 = (A11+A22)*(B11+B22)
?Add(A11, A22, AResult, NewSize);
?Add(B11, B22, BResult, NewSize);
?Strassen(AResult, BResult, P5, NewSize);
?//計算P6 = (A12-A22)*(B21+B22)
?Sub(A12, A22, AResult, NewSize);
?Add(B21, B22, BResult, NewSize);
?Strassen(AResult, BResult, P6, NewSize);
?//計算P7 = (A11-A21)*(B11+B12)
?Sub(A11, A21, AResult, NewSize);
?Add(B11, B12, BResult, NewSize);
?Strassen(AResult, BResult, P7, NewSize);
?//計算C11馍忽,C12棒坏,C21,C22
?//C11 = P5 + P4 - P2 + P6;
?Add(P5, P4, AResult, NewSize);
?Sub(AResult, P2, BResult, NewSize);
?Add(BResult, P6, C11, NewSize);
?//C12=P1+P2
?Add(P1, P2, C12, NewSize);
?//C21=P3+P4
?Add(P3, P4, C21, NewSize);
?//C22=P5+P1-P3-P7
?Add(P5, P1, C22, NewSize);
?Sub(C22, P3, C22, NewSize);
?Sub(C22, P7, C22, NewSize);
?//合并C11遭笋,C12俊抵,C21,C22
?for (int i = 0; i < NewSize; i++)
?{
??for (int j = 0; j < NewSize; j++)
??{
???Result[i][j] = C11[i][j];
???Result[i][j + NewSize] = C12[i][j];
???Result[i + NewSize][j] = C21[i][j];
???Result[i + NewSize][j + NewSize] = C22[i][j];
??}
?}
?//刪除數(shù)組坐梯,回收資源
?for (int i = 0; i < NewSize; i++){
??delete[] A11[i]; delete[] A12[i]; delete[] A21[i]; delete[] A22[i];
??delete[] B11[i]; delete[] B12[i]; delete[] B21[i]; delete[] B22[i];
??delete[] C11[i]; delete[] C12[i]; delete[] C21[i]; delete[] C22[i];
??delete[] P1[i]; delete[] P2[i]; delete[] P3[i]; delete[] P4[i]; delete[] P5[i]; delete[] P6[i]; delete[] P7[i];
??delete[] AResult[i]; delete[] BResult[i];
?}
?delete[] A11; delete[] A12; delete[] A21; delete[] A22;
?delete[] B11; delete[] B12; delete[] B21; delete[] B22;
?delete[] C11; delete[] C12; delete[] C21; delete[] C22;
?delete[] P1; delete[] P2; delete[] P3; delete[] P4; delete[] P5; delete[] P6; delete[] P7;
?delete[] AResult; delete[] BResult;
?return 0;
}
//矩陣相加
void Add(int **A, int **B, int **Q, int Size){
?for (int i = 0; i < Size; i++){
??for (int j = 0; j < Size; j++){
???Q[i][j] = A[i][j] + B[i][j];
??}
?}
}
//矩陣相減
void Sub(int**A, int**B, int **Q, int Size){
?for (int i = 0; i < Size; i++){
??for (int j = 0; j < Size; j++){
???Q[i][j] = A[i][j] - B[i][j];
??}
?}
}
演示結(jié)果:
?
與暴力求解相比:
?? for(i=0;i<m;i++)
???? for(j=0;j<m;j++){
???????? C[i][j]=0;
???? ?for(k=0;k<n;k++)
??????????? C[i][j]+=A[i][k]*B[k][j];????????????
}???
其運行時間(n^lg7,2.80<lg7<2.81)比暴力求解(n3)稍快徽诲,但其精度較低(在處理小數(shù)計算時),且消耗了大量存儲空間吵血。
最后附上源代碼:https://github.com/LRC-cheng/Algorithms_Practise.git