diff --git a/.gitignore b/.gitignore index bbf313b..5b10f1f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,9 @@ +# DBJ +*.ilk +*.pdb +.vs +.vscode + # Object files *.o *.ko diff --git a/Matrix.c b/Matrix.c deleted file mode 100644 index 5cb41a5..0000000 --- a/Matrix.c +++ /dev/null @@ -1,75 +0,0 @@ -#include -#include "Matrix.h" - -static int **Matrix_Allocate(uint32_t row, uint32_t column) -{ - int **matrix = (int **) calloc(row + row * column, sizeof(**matrix)); - int *matrixTemp = (int *) (matrix + row); - - for (uint32_t i = 0; i < row; i++) { - matrix[i] = matrixTemp; - matrixTemp += column; - } - - return matrix; -} - -__attribute__((always_inline)) -inline void Matrix_Free(void *ptr) -{ - Matrix *matrix = (Matrix *) ptr; - return free(matrix->values); -} - -static Matrix Matrix_Addition(Matrix res, const Matrix a, const Matrix b) -{ - for (uint32_t i = 0; i < res.row; i++) - for (uint32_t j = 0; j < res.column; j++) - res.values[i][j] = a.values[i][j] + b.values[i][j]; - - return res; -} - -static Matrix Matrix_Subtract(Matrix res, const Matrix a, const Matrix b) -{ - for (uint32_t i = 0; i < res.row; i++) - for (uint32_t j = 0; j < res.column; j++) - res.values[i][j] = a.values[i][j] - b.values[i][j]; - - return res; -} - -static Matrix Matrix_Multiply(Matrix res, const Matrix a, const Matrix b) -{ - int m1 = (a.values[0][0] + a.values[1][1]) * (b.values[0][0] + b.values[1][1]); - int m2 = (a.values[1][0] + a.values[1][1]) * b.values[0][0]; - int m3 = (b.values[0][1] - b.values[1][1]) * a.values[0][0]; - int m4 = (b.values[1][0] - b.values[0][0]) * a.values[1][1]; - int m5 = (a.values[0][0] + a.values[0][1]) * b.values[1][1]; - int m6 = (a.values[1][0] - a.values[0][0]) * (b.values[0][0] + b.values[0][1]); - int m7 = (a.values[0][1] - a.values[1][1]) * (b.values[1][0] + b.values[1][1]); - - res.values[0][0] = m1 + m4 - m5 + m7; - res.values[0][1] = m3 + m5; - res.values[1][0] = m2 + m4; - res.values[1][1] = m1 - m2 + m3 + m6; - - return res; -} - -void Matrix_Initializer(Matrix *matrix, uint32_t row, uint32_t column) -{ - matrix->row = row; - matrix->column = column; - matrix->values = NULL; - matrix->New = Matrix_Allocate; - matrix->Delete = free; - - matrix->values = matrix->New(row, column); -} - -Matrix_Arith Naive_Matrix_Arith __attribute__((section("Matrix_Arith"))) = { - .Addition = Matrix_Addition, - .Subtract = Matrix_Subtract, - .Multiply = Matrix_Multiply, -}; diff --git a/Matrix.h b/Matrix.h deleted file mode 100644 index 6521458..0000000 --- a/Matrix.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef MATRIX_H_ -#define MATRIX_H_ - -#include - -#define MATRIX_ARITH_BEGIN __start_Matrix_Arith -#define MATRIX_ARITH_END __stop_Matrix_Arith - -#define MATRIX_INITIALIZER(X, ROW, COLUMN) \ - Matrix_Initializer(&(X) ,(ROW), (COLUMN)) - -#define autofree \ - __attribute__((cleanup(Matrix_Free))) - -enum { NAIVE_ARITHMETIC, }; - -typedef struct _Matrix { - uint32_t row; - uint32_t column; - int **values; - - int **(*New)(uint32_t row, uint32_t column); - void (*Delete)(void *); -} Matrix; - -typedef struct _Matrix_Arith { - Matrix (*Addition)(Matrix, const Matrix, const Matrix); - Matrix (*Subtract)(Matrix, const Matrix, const Matrix); - Matrix (*Multiply)(Matrix, const Matrix, const Matrix); -} Matrix_Arith; - -void Matrix_Initializer(Matrix *, uint32_t, uint32_t); -void Matrix_Free(void *); - -extern Matrix_Arith Naive_Matrix_Arith; - -extern Matrix_Arith __start_Matrix_Arith[]; -extern Matrix_Arith __stop_Matrix_Arith[]; - -#endif /* MATRIX_H_ */ diff --git a/README.md b/README.md index 1ad5120..4b29b98 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,18 @@ -# Strassen Algorithm +## 100% REFACTORED, SIMPLE, CORRECT AND WORKING +``` +Refactoring (c) 2021 by dbj@dbj.org -- https://dbj.org/license_dbj +``` -Implementation of the Strassen Algorithm in C +Clang or GCC are required. Built on WIN10 using `TDM-GCC-64` or `clang 11.0.0`. + +  +  + +Original README + +### Strassen Algorithm + +Implementation of the Strassen Algorithm in C, by Ken Dai aka [`MetalheadKen`](https://github.com/MetalheadKen) The Strassen algorithm, is an algorithm for matrix multiplication. It is faster than the standard matrix multiplication algorithm, but would be slower than the diff --git a/Strassen_Algorithm.c b/Strassen_Algorithm.c index f9b7b7b..bcdd918 100644 --- a/Strassen_Algorithm.c +++ b/Strassen_Algorithm.c @@ -1,164 +1,193 @@ -#ifdef _cplusplus -extern "C" { - #include - #include - #include - #include "Matrix.h" -} -#else - #include - #include - #include - #include "Matrix.h" -#endif -#define LOG2(LENGTH) \ - (8 * sizeof(unsigned int) - Count_Leading_Zero((LENGTH)) - 1) +#include +#include +#include +#include "dbj_matrix.h" -int Count_Leading_Zero(unsigned int number); +#define MATRIX_ROWS 2 +#define MATRIX_COLS 4 -Matrix Strassen(Matrix, const Matrix, const Matrix, int); +/* Check if value is the power of two or not */ +#define ISPOW2(V_) (ceil(log2(V_)) == floor(log2(V_))) -int main(int argc, char *argv[]) +static inline Matrix *strassen(Matrix *dest, const Matrix *srcA, const Matrix *srcB, int length) { - int dimensions, matrix_length; - printf("Please enter the dimensions of the matrix: "); - scanf("%d", &dimensions); + if (length == 2) + return matrix_ijk_matmul(dest, srcA, srcB); - if (dimensions <= 0) { printf("The number you entered is invalid."); exit(1); } + int len = length / 2; - /* Check if dimensions of matrix is the power of two or not */ - if ((dimensions & (dimensions - 1)) || (dimensions == 1)) - matrix_length = 2 << LOG2((unsigned int) dimensions); - else - matrix_length = dimensions; - - Matrix matrixA, matrixB, matrixC; - - MATRIX_INITIALIZER(matrixA, matrix_length, matrix_length); - MATRIX_INITIALIZER(matrixB, matrix_length, matrix_length); - MATRIX_INITIALIZER(matrixC, matrix_length, matrix_length); - - srand((unsigned int) time(NULL)); - for (int i = 0; i < dimensions; i++) { - for (int j = 0; j < dimensions; j++) { - matrixA.values[i][j] = rand() % 1000; - matrixB.values[i][j] = rand() % 1000; + matrix_autofree Matrix *a11 = matrix_new(len, len), *a12 = matrix_new(len, len), *a21 = matrix_new(len, len), *a22 = matrix_new(len, len), + *b11 = matrix_new(len, len), *b12 = matrix_new(len, len), *b21 = matrix_new(len, len), *b22 = matrix_new(len, len), *c11 = matrix_new(len, len), *c12 = matrix_new(len, len), *c21 = matrix_new(len, len), *c22 = matrix_new(len, len), *m1 = matrix_new(len, len), *m2 = matrix_new(len, len), *m3 = matrix_new(len, len), *m4 = matrix_new(len, len), *m5 = matrix_new(len, len), *m6 = matrix_new(len, len), *m7 = matrix_new(len, len), *temp1 = matrix_new(len, len), *temp2 = matrix_new(len, len); + + /* Divide matrix into four parts */ + FOR(i, len) + { + FOR(j, len) + { + MXY(a11, i, j) = MXY(srcA, i, j); + MXY(a12, i, j) = MXY(srcA, i, j + len); + MXY(a21, i, j) = MXY(srcA, i + len, j); + MXY(a22, i, j) = MXY(srcA, i + len, j + len); + + MXY(b11, i, j) = MXY(srcB, i, j); + MXY(b12, i, j) = MXY(srcB, i, j + len); + MXY(b21, i, j) = MXY(srcB, i + len, j); + MXY(b22, i, j) = MXY(srcB, i + len, j + len); } } - /* Matrix multiplication */ - Strassen(matrixC, matrixA, matrixB, matrixC.row); + /* Calculate seven formulas of strassen Algorithm */ + strassen(m1, matrix_addition(temp1, a11, a22), matrix_addition(temp2, b11, b22), len); + strassen(m2, matrix_addition(temp1, a21, a22), b11, len); + strassen(m3, a11, matrix_subtraction(temp1, b12, b22), len); + strassen(m4, a22, matrix_subtraction(temp1, b21, b11), len); + strassen(m5, matrix_addition(temp1, a11, a12), b22, len); + strassen(m6, matrix_subtraction(temp1, a21, a11), matrix_addition(temp2, b11, b12), len); + strassen(m7, matrix_subtraction(temp1, a12, a22), matrix_addition(temp2, b21, b22), len); - /* Print the answer of matrix multiplication */ - printf("Matrix A:\n"); - for(int i = 0; i < dimensions; i++) { - for (int j = 0; j < dimensions; j++) { - printf("%5d ", matrixA.values[i][j]); - } - printf("\n"); - } - - printf("\nMatrix B:\n"); - for(int i = 0; i < dimensions; i++) { - for (int j = 0; j < dimensions; j++) { - printf("%5d ", matrixB.values[i][j]); - } - printf("\n"); - } + /* Merge the answer of matrix dest */ + /* c11 = m1 + m4 - m5 + m7 = m1 + m4 - (m5 - m7) */ + matrix_subtraction(c11, matrix_addition(temp1, m1, m4), matrix_subtraction(temp2, m5, m7)); + matrix_addition(c12, m3, m5); + matrix_addition(c21, m2, m4); + matrix_addition(c22, matrix_subtraction(temp1, m1, m2), matrix_addition(temp2, m3, m6)); - printf("\nMatrix C:\n"); - for(int i = 0; i < dimensions; i++) { - for (int j = 0; j < dimensions; j++) { - printf("%10d ", matrixC.values[i][j]); + /* Store the answer of matrix multiplication */ + FOR(i, len) + { + FOR(j, len) + { + MXY(dest, i, j) = MXY(c11, i, j); + MXY(dest, i, j + len) = MXY(c12, i, j); + MXY(dest, i + len, j) = MXY(c21, i, j); + MXY(dest, i + len, j + len) = MXY(c22, i, j); } - printf("\n"); } - matrixA.Delete(matrixA.values); - matrixB.Delete(matrixB.values); - matrixC.Delete(matrixC.values); + return dest; +} - return 0; +/* +-------------------------------------------------------------------------------------------------------------------- +callbacks for matrix_for_each +*/ +static inline Matrix *ordinal_as_val(Matrix *mx, unsigned i, unsigned j) +{ + MXY(mx, i, j) = (i * mx->cols + j); + return mx; } -int Count_Leading_Zero(unsigned int number) +static inline Matrix *zoro(Matrix *mx, unsigned i, unsigned j) { - if (number == 0) return 32; - - int count = 0; - if (number <= 0x0000FFFF) { count += 16; number <<= 16; } - if (number <= 0x00FFFFFF) { count += 8; number <<= 8; } - if (number <= 0x0FFFFFFF) { count += 4; number <<= 4; } - if (number <= 0x3FFFFFFF) { count += 2; number <<= 2; } - if (number <= 0x7FFFFFFF) { count += 1; number <<= 1; } - - return count; + assert(mx && mx->values); + MXY(mx, i, j) = 0; + return mx; } -Matrix Strassen(Matrix dest, const Matrix srcA, const Matrix srcB, int length) +/* +-------------------------------------------------------------------------------------------------------------------- +nano app framework so that it is maleable to using it with UBENCH.H +*/ +typedef struct APP_DATA APP_DATA; +void app_start(const int, const char *const[]); +void app_end(); + +/* +-------------------------------------------------------------------------------------------------------------------- +*/ + +static struct APP_DATA { - Matrix_Arith *arith = &__start_Matrix_Arith[NAIVE_ARITHMETIC]; - - if (length == 2) return arith->Multiply(dest, srcA, srcB); + Matrix *matrixA; + Matrix *matrixB; + Matrix *matrixC; + Matrix *matrixR; +} *app_data = 0; - int len = length / 2; +void app_start(const int argc, const char *const argv[]) +{ + /* Check if dimensions of matrix are the power of two */ + if (!ISPOW2(MATRIX_ROWS)) + { + printf("\n%s\tERROR: Each matrix side must be a power of 2. " + "And current rows num:%d is not.", + argv[0], MATRIX_ROWS); + exit(EXIT_FAILURE); + } - autofree Matrix a11, a12, a21, a22, /* Matrix srcA divide four part */ - b11, b12, b21, b22, /* Matrix srcB divide four part */ - c11, c12, c21, c22, /* Matrix dest divide four part */ - m1, m2, m3, m4, m5, m6, m7, - temp1, temp2; - - /* Initializer the matrix */ - MATRIX_INITIALIZER(a11, len, len); MATRIX_INITIALIZER(a12, len, len); MATRIX_INITIALIZER(a21, len, len); MATRIX_INITIALIZER(a22, len, len); - MATRIX_INITIALIZER(b11, len, len); MATRIX_INITIALIZER(b12, len, len); MATRIX_INITIALIZER(b21, len, len); MATRIX_INITIALIZER(b22, len, len); - MATRIX_INITIALIZER(c11, len, len); MATRIX_INITIALIZER(c12, len, len); MATRIX_INITIALIZER(c21, len, len); MATRIX_INITIALIZER(c22, len, len); - MATRIX_INITIALIZER(m1, len, len); MATRIX_INITIALIZER(m2, len, len); MATRIX_INITIALIZER(m3, len, len); MATRIX_INITIALIZER(m4, len, len); - MATRIX_INITIALIZER(m5, len, len); MATRIX_INITIALIZER(m6, len, len); MATRIX_INITIALIZER(m7, len, len); - MATRIX_INITIALIZER(temp1, len, len); MATRIX_INITIALIZER(temp2, len, len); - - /* Divide matrix to four part */ - for (int i = 0; i < len; i++) { - for (int j = 0; j < len; j++) { - a11.values[i][j] = srcA.values[i][j]; - a12.values[i][j] = srcA.values[i][j + len]; - a21.values[i][j] = srcA.values[i + len][j]; - a22.values[i][j] = srcA.values[i + len][j + len]; - - b11.values[i][j] = srcB.values[i][j]; - b12.values[i][j] = srcB.values[i][j + len]; - b21.values[i][j] = srcB.values[i + len][j]; - b22.values[i][j] = srcB.values[i + len][j + len]; - } + if (!ISPOW2(MATRIX_COLS)) + { + printf("\n%s\tERROR: Each matrix side must be a power of 2. " + "And current cols num:%d is not.", + argv[0], MATRIX_COLS); + exit(EXIT_FAILURE); } - /* Calculate seven formulas of Strassen Algorithm */ - Strassen(m1, arith->Addition(temp1, a11, a22), arith->Addition(temp2, b11, b22), len); - Strassen(m2, arith->Addition(temp1, a21, a22), b11, len); - Strassen(m3, a11, arith->Subtract(temp1, b12, b22), len); - Strassen(m4, a22, arith->Subtract(temp1, b21, b11), len); - Strassen(m5, arith->Addition(temp1, a11, a12), b22, len); - Strassen(m6, arith->Subtract(temp1, a21, a11), arith->Addition(temp2, b11, b12), len); - Strassen(m7, arith->Subtract(temp1, a12, a22), arith->Addition(temp2, b21, b22), len); + app_data = calloc(1, sizeof(APP_DATA)); + assert(app_data); + app_data->matrixA = matrix_new(MATRIX_ROWS, MATRIX_COLS); + app_data->matrixB = matrix_new(MATRIX_ROWS, MATRIX_COLS); + app_data->matrixC = matrix_new(MATRIX_ROWS, MATRIX_COLS); + app_data->matrixR = matrix_new(MATRIX_ROWS, MATRIX_COLS); + + matrix_foreach(app_data->matrixA, ordinal_as_val); + matrix_foreach(app_data->matrixB, ordinal_as_val); + matrix_foreach(app_data->matrixC, zoro); + matrix_foreach(app_data->matrixR, zoro); +} +void app_end() +{ - /* Merge the answer of matrix dest */ - /* c11 = m1 + m4 - m5 + m7 = m1 + m4 - (m5 - m7) */ - arith->Subtract(c11, arith->Addition(temp1, m1, m4), arith->Subtract(temp2, m5, m7)); - arith->Addition(c12, m3, m5); - arith->Addition(c21, m2, m4); - arith->Addition(c22, arith->Subtract(temp1, m1, m2), arith->Addition(temp2, m3, m6)); - - /* Store the answer of matrix multiplication */ - for (int i = 0; i < len; i++) { - for (int j = 0; j < len; j++) { - dest.values[i][j] = c11.values[i][j]; - dest.values[i][j + len] = c12.values[i][j]; - dest.values[i + len][j] = c21.values[i][j]; - dest.values[i + len][j + len] = c22.values[i][j]; - } + printf("\n\nMatrix multiplication\n\nC = strassen(A, B)\nR = ijk_matmul(A,B)\n\n"); + if (MATRIX_ROWS < 9) + { + printf("\n\nAfter multiplications:\n"); + matrix_print("Matrix A:", app_data->matrixA); + matrix_print("Matrix B:", app_data->matrixB); + matrix_print("Matrix C:", app_data->matrixC); + matrix_print("Matrix R:", app_data->matrixR); } - return dest; + printf("\n\nC is Strassen result and R is ijk_matmul result. They should be equal. "); + + if (!matrix_equal(app_data->matrixC, app_data->matrixR)) + printf("Unfortunately they are not.\n\n"); + else + printf("And indeed they are.\n\n"); + + matrix_free(&app_data->matrixA); + matrix_free(&app_data->matrixB); + matrix_free(&app_data->matrixC); + matrix_free(&app_data->matrixR); + + free(app_data); + app_data = 0; +} +/* +-------------------------------------------------------------------------------------------------------------------- +*/ +static void do_strassen() +{ + /* Matrix multiplication */ + app_data->matrixC = strassen(app_data->matrixC, app_data->matrixA, app_data->matrixB, MATRIX_ROWS); +} + +static void do_ijk() +{ + app_data->matrixR = matrix_ijk_matmul(app_data->matrixR, app_data->matrixA, app_data->matrixB); +} + +/* +-------------------------------------------------------------------------------------------------------------------- +*/ + +int main(int argc, const char *const argv[]) +{ + app_start(argc, argv); + do_strassen(); + do_ijk(); + app_end(); + return 0; } diff --git a/dbj_matrix.h b/dbj_matrix.h new file mode 100644 index 0000000..16f0cfc --- /dev/null +++ b/dbj_matrix.h @@ -0,0 +1,129 @@ +#ifndef MATRIX_H_ +#define MATRIX_H_ + +#include +#include +#include + +typedef struct _Matrix +{ + unsigned rows; + unsigned cols; + int *values; +} Matrix; + +#define MXY_NOT_POINTER_(M_, R_, C_) ((int(*)[M_.rows])(M_.values))[R_][C_] + +// mx as a pointer +#define MXY(M_, R_, C_) MXY_NOT_POINTER_(((Matrix)*M_), R_, C_) + +#define FOR(C_, N_) for (unsigned C_ = 0; C_ < N_; ++C_) + +/* + Matrix mx = { 3,2, calloc(3 * 2, sizeof(int)) } ; + + MXY(mx,1,1) = 42; + + assert( MXY(mx,1,1) == 42) ; +*/ + +static inline void matrix_free(Matrix **ptr) +{ + assert(*ptr && (*ptr)->values); + free((*ptr)->values); + free(*ptr); + *ptr = 0; +} + +#define matrix_autofree __attribute__((cleanup(matrix_free))) + +static inline Matrix *matrix_addition(Matrix *res, const Matrix *a, const Matrix *b) +{ + assert(res && a && b); + FOR(i, res->rows) + FOR(j, res->cols) + // res->values[i][j] = a->values[i][j] + b->values[i][j]; + MXY(res, i, j) = MXY(a, i, j) + MXY(b, i, j); + + return res; +} + +static inline Matrix *matrix_subtraction(Matrix *res, const Matrix *a, const Matrix *b) +{ + assert(res && a && b); + FOR(i, res->rows) + FOR(j, res->cols) + MXY(res, i, j) = MXY(a, i, j) - MXY(b, i, j); + + return res; +} + +static inline int matrix_equal(const Matrix *a, const Matrix *b) +{ + assert(a && b); + + if (a->rows != b->rows) + return 0; + if (a->cols != b->cols) + return 0; + + FOR(i, a->rows) + FOR(j, a->cols) + if (MXY(a, i, j) != MXY(b, i, j)) + return 0; + + return 1; +} + +static Matrix *matrix_ijk_matmul(Matrix *C, const Matrix *A, const Matrix *B) +{ + assert(A && B && C); + FOR(i, A->rows) + { + FOR(j, B->cols) + { + MXY(C, i, j) = 0; + FOR(k, A->cols) + { + MXY(C, i, j) += MXY(A, i, k) * MXY(B, k, j); + } + } + } + return C; +} + +static inline Matrix *matrix_new(uint32_t rows, uint32_t cols) +{ + Matrix *matrix = calloc(1, sizeof(Matrix)); + matrix->rows = rows; + matrix->cols = cols; + matrix->values = calloc(rows * cols, sizeof(int)); + return matrix; +} + +static void matrix_print(const char *prompt, Matrix *mx) +{ + assert(prompt && mx); + printf("%s\n", prompt); + FOR(i, mx->rows) + { + FOR(j, mx->cols) + { + printf(" %5d ", MXY(mx, i, j)); + } + printf("\n"); + } +} + +static void inline matrix_foreach(Matrix *mx, Matrix *(*callback)(Matrix *, unsigned, unsigned)) +{ + FOR(i, mx->rows) + { + FOR(j, mx->cols) + { + callback(mx, i, j); + } + } +} + +#endif /* MATRIX_H_ */