-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmatmul_method_slowest_2_fastest.py
107 lines (105 loc) · 4.01 KB
/
matmul_method_slowest_2_fastest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# Title: Matrix Multiplication From Slowest to the Faster CPU based Method
# Author: Ajay Khanna |Git: Ajaykhanna|Twitter: @samdig| LinkedIn: ajay-khanna|
# Date: Feb.20.2020
# Place: UC Merced
# Lab: Dr. Isborn
# Architecture: Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz/32.0GB
# OS: Windows 11 21H2
# Python: 3.9.7
# Importing Time & PyTorch
import time
import torch
#------------------------------------------------------
# Method #1: Using Pure Python
# Matrix Multiplication in Pure Python but with PyTorch
# Function for Matrix Multiplication
def matmul(a,b):
tic = time.perf_counter()
a_rows, a_cols = a.shape
b_rows, b_cols = b.shape
assert a_cols == b_rows
c = torch.zeros(a_rows, b_cols)
# For loop for Element-wise Operation
#
for i in range(a_rows):
for j in range(b_cols):
for k in range(a_cols):
c[i, j] += a[i, k] * b[k, j]
toc = time.perf_counter()
print(f"Method #1: Brute Force took {toc - tic:0.4e} seconds")
return c
#------------------------------------------------------
# Using PyTorch Elementwise Operation
# This operation allows one inner loop
# to be elemenated
# I think it is aka Reduction in OpenMP
# Method #2: Using PyTorch Element-wise operation Method
def matmul_reduction(a, b):
tic = time.perf_counter()
a_rows, a_cols = a.shape
b_rows, b_cols = b.shape
assert a_cols == b_rows
c = torch.zeros(a_rows, b_cols)
#
for i in range(a_rows):
for j in range(b_cols):
# Using PyTorch Element-wise operation
c[i, j] = (a[i, :] * b[:, j]).sum()
toc = time.perf_counter()
print(f"Method #2: Reduction method took {toc - tic:0.4e} seconds")
return c
#------------------------------------------------------
# Method #3: Using PyTorch Broadcast Method
# Elemenating Another Inner Loop
def matmul_broadcasting(a, b):
tic = time.perf_counter()
a_rows, a_cols = a.shape
b_rows, b_cols = b.shape
assert a_cols == b_rows
c = torch.zeros(a_rows, b_cols)
for i in range(a_rows):
# Using PyTorch Element-wise operation
c[i] = (a[i].unsqueeze(-1) * b).sum(dim=0)
toc = time.perf_counter()
print(f"Method #3: Broadcasting method took {toc - tic:0.4e} seconds")
return c
#------------------------------------------------------
# Method #4: Using Einstein Method
def matmul_einstein(a, b):
tic = time.perf_counter()
c = torch.einsum('ik,kj->ij', a, b)
toc = time.perf_counter()
print(f"Method #4: Einstein method took {toc - tic:0.4e} seconds")
return c
#------------------------------------------------------
# Method #5: PyTorch MatMul Boosted by Assembly Language
def matmul_pytorch(a, b):
tic = time.perf_counter()
c = a.matmul(b)
toc = time.perf_counter()
print(f"Method #5: PyTorch method took {toc - tic:0.4e} seconds")
return c
#------------------------------------------------------
# Generating Random Matrix
a_matrix = torch.rand(100, 100)
b_matrix = torch.rand(100, 100)
#
# Printing the Result of Random Matrix Multiplication
print('Method #1: Matrix Multiplication in Pure Python: \n',
matmul(a_matrix, b_matrix), '\n')
print('Method #2: Using PyTorch Element-wise operation Method: \n',
matmul_reduction(a_matrix, b_matrix), '\n')
print('Method #3: Using PyTorch Broadcast Method: \n',
matmul_broadcasting(a_matrix, b_matrix), '\n')
print('Method #4: Using Einstein Method: \n',
matmul_einstein(a_matrix, b_matrix),'\n')
print('Method #5: PyTorch MatMul Boosted by Assembly Language: \n',
matmul_pytorch(a_matrix, b_matrix),'\n')
#------------------------------------------------------
# Results
# Method #1: Brute Force took 2.2534e+01 seconds
# Method #2: Reduction method took 2.7637e-01 seconds
# Method #3: Broadcasting method took 3.6313e-03 seconds
# Method #4: Einstein method took 1.5798e-03 seconds
# Method #5: PyTorch method took 1.7720e-04 seconds
# Inspired by this article: "https://towardsdatascience.com/matrix-multiplication-the-pytorch-way-c0ad724402ed"