@@ -81,8 +81,108 @@ void testSanity(BLAS blas) {
81
81
blas .dgemm ("T" , "N" , M , N , K , 0.0 , dgeAT , K , dgeB , K , 1.0 , dgeCcopy = dgeC .clone (), M );
82
82
assertArrayEquals (expected , dgeCcopy , depsilon );
83
83
84
- f2j .dgemm ("T" , "T" , M , N , K , 0.0 , dgeAT , K , dgeBT , N , 1.0 , expected = dgeC .clone (), M );
85
- blas .dgemm ("T" , "T" , M , N , K , 0.0 , dgeAT , K , dgeBT , N , 1.0 , dgeCcopy = dgeC .clone (), M );
84
+ f2j .dgemm ("T" , "T" , M /2 , N , K , 0.0 , dgeAT , K , dgeBT , N , 1.0 , expected = dgeC .clone (), M /2 );
85
+ blas .dgemm ("T" , "T" , M /2 , N , K , 0.0 , dgeAT , K , dgeBT , N , 1.0 , dgeCcopy = dgeC .clone (), M /2 );
86
+ assertArrayEquals (expected , dgeCcopy , depsilon );
87
+
88
+ f2j .dgemm ("N" , "N" , M /2 , N , K , 1.0 , dgeA , M /2 , dgeB , K , 2.0 , expected = dgeC .clone (), M /2 );
89
+ blas .dgemm ("N" , "N" , M /2 , N , K , 1.0 , dgeA , M /2 , dgeB , K , 2.0 , dgeCcopy = dgeC .clone (), M /2 );
90
+ assertArrayEquals (expected , dgeCcopy , depsilon );
91
+
92
+ f2j .dgemm ("N" , "T" , M /2 , N , K , 1.0 , dgeA , M /2 , dgeBT , N , 2.0 , expected = dgeC .clone (), M /2 );
93
+ blas .dgemm ("N" , "T" , M /2 , N , K , 1.0 , dgeA , M /2 , dgeBT , N , 2.0 , dgeCcopy = dgeC .clone (), M /2 );
94
+ assertArrayEquals (expected , dgeCcopy , depsilon );
95
+
96
+ f2j .dgemm ("T" , "N" , M /2 , N , K , 1.0 , dgeAT , K , dgeB , K , 2.0 , expected = dgeC .clone (), M /2 );
97
+ blas .dgemm ("T" , "N" , M /2 , N , K , 1.0 , dgeAT , K , dgeB , K , 2.0 , dgeCcopy = dgeC .clone (), M /2 );
98
+ assertArrayEquals (expected , dgeCcopy , depsilon );
99
+
100
+ f2j .dgemm ("T" , "T" , M /2 , N , K , 1.0 , dgeAT , K , dgeBT , N , 2.0 , expected = dgeC .clone (), M /2 );
101
+ blas .dgemm ("T" , "T" , M /2 , N , K , 1.0 , dgeAT , K , dgeBT , N , 2.0 , dgeCcopy = dgeC .clone (), M /2 );
102
+ assertArrayEquals (expected , dgeCcopy , depsilon );
103
+
104
+ f2j .dgemm ("N" , "N" , M /2 , N , K , 1.0 , dgeA , M /2 , dgeB , K , 0.0 , expected = dgeC .clone (), M /2 );
105
+ blas .dgemm ("N" , "N" , M /2 , N , K , 1.0 , dgeA , M /2 , dgeB , K , 0.0 , dgeCcopy = dgeC .clone (), M /2 );
106
+ assertArrayEquals (expected , dgeCcopy , depsilon );
107
+
108
+ f2j .dgemm ("N" , "T" , M /2 , N , K , 1.0 , dgeA , M /2 , dgeBT , N , 0.0 , expected = dgeC .clone (), M /2 );
109
+ blas .dgemm ("N" , "T" , M /2 , N , K , 1.0 , dgeA , M /2 , dgeBT , N , 0.0 , dgeCcopy = dgeC .clone (), M /2 );
110
+ assertArrayEquals (expected , dgeCcopy , depsilon );
111
+
112
+ f2j .dgemm ("T" , "N" , M /2 , N , K , 1.0 , dgeAT , K , dgeB , K , 0.0 , expected = dgeC .clone (), M /2 );
113
+ blas .dgemm ("T" , "N" , M /2 , N , K , 1.0 , dgeAT , K , dgeB , K , 0.0 , dgeCcopy = dgeC .clone (), M /2 );
114
+ assertArrayEquals (expected , dgeCcopy , depsilon );
115
+
116
+ f2j .dgemm ("T" , "T" , M /2 , N , K , 1.0 , dgeAT , K , dgeBT , N , 0.0 , expected = dgeC .clone (), M /2 );
117
+ blas .dgemm ("T" , "T" , M /2 , N , K , 1.0 , dgeAT , K , dgeBT , N , 0.0 , dgeCcopy = dgeC .clone (), M /2 );
118
+ assertArrayEquals (expected , dgeCcopy , depsilon );
119
+
120
+ f2j .dgemm ("N" , "N" , M /2 , N , K , 0.0 , dgeA , M /2 , dgeB , K , 1.0 , expected = dgeC .clone (), M /2 );
121
+ blas .dgemm ("N" , "N" , M /2 , N , K , 0.0 , dgeA , M /2 , dgeB , K , 1.0 , dgeCcopy = dgeC .clone (), M /2 );
122
+ assertArrayEquals (expected , dgeCcopy , depsilon );
123
+
124
+ f2j .dgemm ("N" , "T" , M /2 , N , K , 0.0 , dgeA , M /2 , dgeBT , N , 1.0 , expected = dgeC .clone (), M /2 );
125
+ blas .dgemm ("N" , "T" , M /2 , N , K , 0.0 , dgeA , M /2 , dgeBT , N , 1.0 , dgeCcopy = dgeC .clone (), M /2 );
126
+ assertArrayEquals (expected , dgeCcopy , depsilon );
127
+
128
+ f2j .dgemm ("T" , "N" , M /2 , N , K , 0.0 , dgeAT , K , dgeB , K , 1.0 , expected = dgeC .clone (), M /2 );
129
+ blas .dgemm ("T" , "N" , M /2 , N , K , 0.0 , dgeAT , K , dgeB , K , 1.0 , dgeCcopy = dgeC .clone (), M /2 );
130
+ assertArrayEquals (expected , dgeCcopy , depsilon );
131
+
132
+ f2j .dgemm ("T" , "T" , M /2 , N , K , 0.0 , dgeAT , K , dgeBT , N , 1.0 , expected = dgeC .clone (), M /2 );
133
+ blas .dgemm ("T" , "T" , M /2 , N , K , 0.0 , dgeAT , K , dgeBT , N , 1.0 , dgeCcopy = dgeC .clone (), M /2 );
134
+ assertArrayEquals (expected , dgeCcopy , depsilon );
135
+
136
+ f2j .dgemm ("T" , "T" , M , N /2 , K , 0.0 , dgeAT , K , dgeBT , N /2 , 1.0 , expected = dgeC .clone (), M );
137
+ blas .dgemm ("T" , "T" , M , N /2 , K , 0.0 , dgeAT , K , dgeBT , N /2 , 1.0 , dgeCcopy = dgeC .clone (), M );
138
+ assertArrayEquals (expected , dgeCcopy , depsilon );
139
+
140
+ f2j .dgemm ("N" , "N" , M , N /2 , K , 1.0 , dgeA , M , dgeB , K , 2.0 , expected = dgeC .clone (), M );
141
+ blas .dgemm ("N" , "N" , M , N /2 , K , 1.0 , dgeA , M , dgeB , K , 2.0 , dgeCcopy = dgeC .clone (), M );
142
+ assertArrayEquals (expected , dgeCcopy , depsilon );
143
+
144
+ f2j .dgemm ("N" , "T" , M , N /2 , K , 1.0 , dgeA , M , dgeBT , N /2 , 2.0 , expected = dgeC .clone (), M );
145
+ blas .dgemm ("N" , "T" , M , N /2 , K , 1.0 , dgeA , M , dgeBT , N /2 , 2.0 , dgeCcopy = dgeC .clone (), M );
146
+ assertArrayEquals (expected , dgeCcopy , depsilon );
147
+
148
+ f2j .dgemm ("T" , "N" , M , N /2 , K , 1.0 , dgeAT , K , dgeB , K , 2.0 , expected = dgeC .clone (), M );
149
+ blas .dgemm ("T" , "N" , M , N /2 , K , 1.0 , dgeAT , K , dgeB , K , 2.0 , dgeCcopy = dgeC .clone (), M );
150
+ assertArrayEquals (expected , dgeCcopy , depsilon );
151
+
152
+ f2j .dgemm ("T" , "T" , M , N /2 , K , 1.0 , dgeAT , K , dgeBT , N /2 , 2.0 , expected = dgeC .clone (), M );
153
+ blas .dgemm ("T" , "T" , M , N /2 , K , 1.0 , dgeAT , K , dgeBT , N /2 , 2.0 , dgeCcopy = dgeC .clone (), M );
154
+ assertArrayEquals (expected , dgeCcopy , depsilon );
155
+
156
+ f2j .dgemm ("N" , "N" , M , N /2 , K , 1.0 , dgeA , M , dgeB , K , 0.0 , expected = dgeC .clone (), M );
157
+ blas .dgemm ("N" , "N" , M , N /2 , K , 1.0 , dgeA , M , dgeB , K , 0.0 , dgeCcopy = dgeC .clone (), M );
158
+ assertArrayEquals (expected , dgeCcopy , depsilon );
159
+
160
+ f2j .dgemm ("N" , "T" , M , N /2 , K , 1.0 , dgeA , M , dgeBT , N /2 , 0.0 , expected = dgeC .clone (), M );
161
+ blas .dgemm ("N" , "T" , M , N /2 , K , 1.0 , dgeA , M , dgeBT , N /2 , 0.0 , dgeCcopy = dgeC .clone (), M );
162
+ assertArrayEquals (expected , dgeCcopy , depsilon );
163
+
164
+ f2j .dgemm ("T" , "N" , M , N /2 , K , 1.0 , dgeAT , K , dgeB , K , 0.0 , expected = dgeC .clone (), M );
165
+ blas .dgemm ("T" , "N" , M , N /2 , K , 1.0 , dgeAT , K , dgeB , K , 0.0 , dgeCcopy = dgeC .clone (), M );
166
+ assertArrayEquals (expected , dgeCcopy , depsilon );
167
+
168
+ f2j .dgemm ("T" , "T" , M , N /2 , K , 1.0 , dgeAT , K , dgeBT , N /2 , 0.0 , expected = dgeC .clone (), M );
169
+ blas .dgemm ("T" , "T" , M , N /2 , K , 1.0 , dgeAT , K , dgeBT , N /2 , 0.0 , dgeCcopy = dgeC .clone (), M );
170
+ assertArrayEquals (expected , dgeCcopy , depsilon );
171
+
172
+ f2j .dgemm ("N" , "N" , M , N /2 , K , 0.0 , dgeA , M , dgeB , K , 1.0 , expected = dgeC .clone (), M );
173
+ blas .dgemm ("N" , "N" , M , N /2 , K , 0.0 , dgeA , M , dgeB , K , 1.0 , dgeCcopy = dgeC .clone (), M );
174
+ assertArrayEquals (expected , dgeCcopy , depsilon );
175
+
176
+ f2j .dgemm ("N" , "T" , M , N /2 , K , 0.0 , dgeA , M , dgeBT , N /2 , 1.0 , expected = dgeC .clone (), M );
177
+ blas .dgemm ("N" , "T" , M , N /2 , K , 0.0 , dgeA , M , dgeBT , N /2 , 1.0 , dgeCcopy = dgeC .clone (), M );
178
+ assertArrayEquals (expected , dgeCcopy , depsilon );
179
+
180
+ f2j .dgemm ("T" , "N" , M , N /2 , K , 0.0 , dgeAT , K , dgeB , K , 1.0 , expected = dgeC .clone (), M );
181
+ blas .dgemm ("T" , "N" , M , N /2 , K , 0.0 , dgeAT , K , dgeB , K , 1.0 , dgeCcopy = dgeC .clone (), M );
182
+ assertArrayEquals (expected , dgeCcopy , depsilon );
183
+
184
+ f2j .dgemm ("T" , "T" , M , N /2 , K , 0.0 , dgeAT , K , dgeBT , N /2 , 1.0 , expected = dgeC .clone (), M );
185
+ blas .dgemm ("T" , "T" , M , N /2 , K , 0.0 , dgeAT , K , dgeBT , N /2 , 1.0 , dgeCcopy = dgeC .clone (), M );
86
186
assertArrayEquals (expected , dgeCcopy , depsilon );
87
187
}
88
188
}
0 commit comments