44from benchmark .hail .utils import XFail
55
66
7- @pytest .mark .benchmark ()
7+ @pytest .mark .benchmark (mds = 1.1 , instances = 25 , iterations = 15 , burn_in_iterations = 8 )
88def benchmark_matrix_table_decode_and_count (profile25_mt ):
99 mt = hl .read_matrix_table (str (profile25_mt ))
1010 mt ._force_count_rows ()
1111
1212
13- @pytest .mark .benchmark ()
13+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 20 , burn_in_iterations = 5 )
1414def benchmark_matrix_table_decode_and_count_just_gt (profile25_mt ):
1515 mt = hl .read_matrix_table (str (profile25_mt )).select_entries ('GT' )
1616 mt ._force_count_rows ()
1717
1818
19- @pytest .mark .benchmark ()
19+ @pytest .mark .benchmark (mds = 1.1 , instances = 25 , iterations = 10 , burn_in_iterations = 20 )
2020def benchmark_matrix_table_array_arithmetic (profile25_mt ):
2121 mt = hl .read_matrix_table (str (profile25_mt ))
2222 mt = mt .filter_rows (mt .alleles .length () == 2 )
2323 mt .select_entries (dosage = hl .pl_dosage (mt .PL )).select_rows ()._force_count_rows ()
2424
2525
26- @pytest .mark .benchmark ()
26+ @pytest .mark .benchmark (mds = 1.1 , instances = 25 , iterations = 5 , burn_in_iterations = 10 )
2727def benchmark_matrix_table_entries_table (profile25_mt ):
2828 mt = hl .read_matrix_table (str (profile25_mt ))
2929 mt .entries ()._force_count ()
3030
3131
32- @pytest .mark .benchmark ()
32+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 10 , burn_in_iterations = 10 )
3333def benchmark_matrix_table_entries_table_no_key (profile25_mt ):
3434 mt = hl .read_matrix_table (str (profile25_mt )).key_rows_by ().key_cols_by ()
3535 mt .entries ()._force_count ()
3636
3737
38- @pytest .mark .benchmark ()
38+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 10 , burn_in_iterations = 30 )
3939def benchmark_matrix_table_rows_force_count (profile25_mt ):
4040 ht = hl .read_matrix_table (str (profile25_mt )).rows ().key_by ()
4141 ht ._force_count ()
4242
4343
44- @pytest .mark .benchmark ()
44+ @pytest .mark .benchmark (mds = 1.1 , instances = 15 , iterations = 10 , burn_in_iterations = 15 )
4545def benchmark_matrix_table_show (profile25_mt ):
4646 mt = hl .read_matrix_table (str (profile25_mt ))
4747 mt .show (100 )
4848
4949
50- @pytest .mark .benchmark ()
50+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 10 , burn_in_iterations = 15 )
5151def benchmark_matrix_table_rows_show (profile25_mt ):
5252 mt = hl .read_matrix_table (str (profile25_mt ))
5353 mt .rows ().show (100 )
5454
5555
56- @pytest .mark .benchmark ()
56+ @pytest .mark .benchmark (mds = 1.1 , instances = 15 , iterations = 15 , burn_in_iterations = 16 )
5757def benchmark_matrix_table_cols_show (profile25_mt ):
5858 mt = hl .read_matrix_table (str (profile25_mt ))
5959 mt .cols ().show (100 )
6060
6161
62- @pytest .mark .benchmark ()
62+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 15 , burn_in_iterations = 10 )
6363def benchmark_matrix_table_take_entry (profile25_mt ):
6464 mt = hl .read_matrix_table (str (profile25_mt ))
6565 mt .GT .take (100 )
6666
6767
68- @pytest .mark .benchmark ()
68+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 15 , burn_in_iterations = 15 )
6969def benchmark_matrix_table_entries_show (profile25_mt ):
7070 mt = hl .read_matrix_table (str (profile25_mt ))
7171 mt .entries ().show ()
7272
7373
74- @pytest .mark .benchmark ()
74+ @pytest .mark .benchmark (mds = 1.1 , instances = 10 , iterations = 20 , burn_in_iterations = 10 )
7575def benchmark_matrix_table_take_row (profile25_mt ):
7676 mt = hl .read_matrix_table (str (profile25_mt ))
7777 mt .info .AF .take (100 )
7878
7979
80- @pytest .mark .benchmark ()
80+ @pytest .mark .benchmark (mds = 1.1 , instances = 15 , iterations = 20 , burn_in_iterations = 10 )
8181def benchmark_matrix_table_take_col (profile25_mt ):
8282 mt = hl .read_matrix_table (str (profile25_mt ))
8383 mt .s .take (100 )
8484
8585
86- @pytest .mark .benchmark ()
86+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 10 , burn_in_iterations = 8 )
8787def benchmark_write_range_matrix_table_p100 (tmp_path ):
8888 mt = hl .utils .range_matrix_table (n_rows = 1_000_000 , n_cols = 10 , n_partitions = 100 )
8989 mt = mt .annotate_entries (x = mt .col_idx + mt .row_idx )
9090 mt .write (str (tmp_path / 'tmp.mt' ))
9191
9292
93- @pytest .mark .benchmark ()
93+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 15 , burn_in_iterations = 15 )
9494def benchmark_write_profile_mt (profile25_mt , tmp_path ):
9595 hl .read_matrix_table (str (profile25_mt )).write (str (tmp_path / 'tmp.mt' ))
9696
9797
98- @pytest .mark .benchmark ()
98+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 20 , burn_in_iterations = 9 )
9999def benchmark_matrix_table_rows_is_transition (profile25_mt ):
100100 ht = hl .read_matrix_table (str (profile25_mt )).rows ().key_by ()
101101 ht .select (is_snp = hl .is_snp (ht .alleles [0 ], ht .alleles [1 ]))._force_count ()
102102
103103
104- @pytest .mark .benchmark ()
104+ @pytest .mark .benchmark (mds = 1.1 , instances = 15 , iterations = 20 , burn_in_iterations = 6 )
105105def benchmark_matrix_table_filter_entries (profile25_mt ):
106106 mt = hl .read_matrix_table (str (profile25_mt ))
107107 mt .filter_entries ((mt .GQ > 8 ) & (mt .DP > 2 ))._force_count_rows ()
108108
109109
110- @pytest .mark .benchmark ()
110+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 10 , burn_in_iterations = 3 )
111111def benchmark_matrix_table_filter_entries_unfilter (profile25_mt ):
112112 mt = hl .read_matrix_table (str (profile25_mt ))
113113 mt .filter_entries ((mt .GQ > 8 ) & (mt .DP > 2 )).unfilter_entries ()._force_count_rows ()
@@ -164,27 +164,27 @@ def many_aggs(mt):
164164 return {f'x{ i } ' : expr for i , expr in enumerate (aggs )}
165165
166166
167- @pytest .mark .benchmark ()
167+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 5 , burn_in_iterations = 4 )
168168def benchmark_matrix_table_many_aggs_row_wise (profile25_mt ):
169169 mt = hl .read_matrix_table (str (profile25_mt ))
170170 mt = mt .annotate_rows (** many_aggs (mt ))
171171 mt .rows ()._force_count ()
172172
173173
174- @pytest .mark .benchmark ()
174+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 5 , burn_in_iterations = 10 )
175175def benchmark_matrix_table_many_aggs_col_wise (profile25_mt ):
176176 mt = hl .read_matrix_table (str (profile25_mt ))
177177 mt = mt .annotate_cols (** many_aggs (mt ))
178178 mt .cols ()._force_count ()
179179
180180
181- @pytest .mark .benchmark ()
181+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 15 , burn_in_iterations = 8 )
182182def benchmark_matrix_table_aggregate_entries (profile25_mt ):
183183 mt = hl .read_matrix_table (str (profile25_mt ))
184184 mt .aggregate_entries (hl .agg .stats (mt .GQ ))
185185
186186
187- @pytest .mark .benchmark ()
187+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 10 , burn_in_iterations = 8 )
188188def benchmark_matrix_table_call_stats_star_star (profile25_mt ):
189189 mt = hl .read_matrix_table (str (profile25_mt ))
190190 mt .annotate_rows (** hl .agg .call_stats (mt .GT , mt .alleles ))._force_count_rows ()
@@ -242,60 +242,60 @@ def benchmark_gnomad_coverage_stats_optimized(gnomad_dp_sim):
242242 mt .rows ()._force_count ()
243243
244244
245- @pytest .mark .benchmark ()
245+ @pytest .mark .benchmark (mds = 1.1 , instances = 15 , iterations = 20 , burn_in_iterations = 10 )
246246def benchmark_per_row_stats_star_star (gnomad_dp_sim ):
247247 mt = hl .read_matrix_table (str (gnomad_dp_sim ))
248248 mt .annotate_rows (** hl .agg .stats (mt .x ))._force_count_rows ()
249249
250250
251- @pytest .mark .benchmark ()
251+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 10 , burn_in_iterations = 10 )
252252def benchmark_read_decode_gnomad_coverage (gnomad_dp_sim ):
253253 hl .read_matrix_table (str (gnomad_dp_sim ))._force_count_rows ()
254254
255255
256- @pytest .mark .benchmark ()
256+ @pytest .mark .benchmark (mds = 1.1 , instances = 10 , iterations = 5 , burn_in_iterations = 10 )
257257def benchmark_import_bgen_force_count_just_gp (sim_ukb_bgen , sim_ukb_sample ):
258258 mt = hl .import_bgen (str (sim_ukb_bgen ), sample_file = str (sim_ukb_sample ), entry_fields = ['GP' ], n_partitions = 8 )
259259 mt ._force_count_rows ()
260260
261261
262- @pytest .mark .benchmark ()
262+ @pytest .mark .benchmark (mds = 1.1 , instances = 10 , iterations = 5 , burn_in_iterations = 20 )
263263def benchmark_import_bgen_force_count_all (sim_ukb_bgen , sim_ukb_sample ):
264264 mt = hl .import_bgen (
265265 str (sim_ukb_bgen ), sample_file = str (sim_ukb_sample ), entry_fields = ['GT' , 'GP' , 'dosage' ], n_partitions = 8
266266 )
267267 mt ._force_count_rows ()
268268
269269
270- @pytest .mark .benchmark ()
270+ @pytest .mark .benchmark (mds = 1.1 , instances = 10 , iterations = 5 , burn_in_iterations = 12 )
271271@pytest .mark .xfail (raises = TimeoutError , reason = XFail .Timeout )
272272def benchmark_import_bgen_info_score (sim_ukb_bgen , sim_ukb_sample ):
273273 mt = hl .import_bgen (str (sim_ukb_bgen ), sample_file = str (sim_ukb_sample ), entry_fields = ['GP' ], n_partitions = 8 )
274274 mt = mt .annotate_rows (info_score = hl .agg .info_score (mt .GP ))
275275 mt .rows ().select ('info_score' )._force_count ()
276276
277277
278- @pytest .mark .benchmark ()
278+ @pytest .mark .benchmark (mds = 1.1 , instances = 10 , iterations = 5 , burn_in_iterations = 18 )
279279def benchmark_import_bgen_filter_count (sim_ukb_bgen , sim_ukb_sample ):
280280 mt = hl .import_bgen (str (sim_ukb_bgen ), sample_file = str (sim_ukb_sample ), entry_fields = ['GT' , 'GP' ], n_partitions = 8 )
281281 mt = mt .filter_rows (mt .alleles == ['A' , 'T' ])
282282 mt ._force_count_rows ()
283283
284284
285- @pytest .mark .benchmark ()
285+ @pytest .mark .benchmark (mds = 1.1 , instances = 15 , iterations = 20 , burn_in_iterations = 3 )
286286def benchmark_export_range_matrix_table_entry_field_p100 (tmp_path ):
287287 mt = hl .utils .range_matrix_table (n_rows = 1_000_000 , n_cols = 10 , n_partitions = 100 )
288288 mt = mt .annotate_entries (x = mt .col_idx + mt .row_idx )
289289 mt .x .export (str (tmp_path / 'result.txt' ))
290290
291291
292- @pytest .mark .benchmark ()
292+ @pytest .mark .benchmark (mds = 1.2 , instances = 10 , iterations = 10 , burn_in_iterations = 8 )
293293def benchmark_export_range_matrix_table_row_p100 (tmp_path ):
294294 mt = hl .utils .range_matrix_table (n_rows = 1_000_000 , n_cols = 10 , n_partitions = 100 )
295295 mt .row .export (str (tmp_path / 'result.txt' ))
296296
297297
298- @pytest .mark .benchmark ()
298+ @pytest .mark .benchmark (mds = 1.2 , instances = 15 , iterations = 25 , burn_in_iterations = 15 )
299299def benchmark_export_range_matrix_table_col_p100 (tmp_path ):
300300 mt = hl .utils .range_matrix_table (n_rows = 1_000_000 , n_cols = 10 , n_partitions = 100 )
301301 mt .col .export (str (tmp_path / 'result.txt' ))
@@ -309,7 +309,7 @@ def benchmark_large_range_matrix_table_sum():
309309 mt .annotate_cols (foo = hl .agg .sum (mt .x ))._force_count_cols ()
310310
311311
312- @pytest .mark .benchmark ()
312+ @pytest .mark .benchmark (mds = 1.2 , instances = 10 , iterations = 5 , burn_in_iterations = 7 )
313313def benchmark_kyle_sex_specific_qc (profile25_mt ):
314314 mt = hl .read_matrix_table (str (profile25_mt ))
315315 mt = mt .annotate_cols (sex = hl .if_else (hl .rand_bool (0.5 ), 'Male' , 'Female' ))
@@ -350,14 +350,14 @@ def benchmark_kyle_sex_specific_qc(profile25_mt):
350350 mt .rows ()._force_count ()
351351
352352
353- @pytest .mark .benchmark ()
353+ @pytest .mark .benchmark (mds = 1.1 , instances = 25 , iterations = 10 , burn_in_iterations = 5 )
354354def benchmark_matrix_table_scan_count_rows_2 ():
355355 mt = hl .utils .range_matrix_table (n_rows = 200_000_000 , n_cols = 10 , n_partitions = 16 )
356356 mt = mt .annotate_rows (x = hl .scan .count ())
357357 mt ._force_count_rows ()
358358
359359
360- @pytest .mark .benchmark ()
360+ @pytest .mark .benchmark (mds = 1.3 , instances = 20 , iterations = 10 , burn_in_iterations = 20 )
361361def benchmark_matrix_table_scan_count_cols_2 ():
362362 mt = hl .utils .range_matrix_table (n_cols = 10_000_000 , n_rows = 10 )
363363 mt = mt .annotate_cols (x = hl .scan .count ())
@@ -372,14 +372,14 @@ def benchmark_matrix_multi_write_nothing(tmp_path):
372372 hl .experimental .write_matrix_tables (mts , str (tmp_path / 'multi-write' ))
373373
374374
375- @pytest .mark .benchmark ()
375+ @pytest .mark .benchmark (mds = 1.1 , instances = 25 , iterations = 10 , burn_in_iterations = 5 )
376376def benchmark_mt_localize_and_collect (profile25_mt ):
377377 mt = hl .read_matrix_table (str (profile25_mt ))
378378 ht = mt .localize_entries ("ent" )
379379 ht .head (150 ).collect ()
380380
381381
382- @pytest .mark .benchmark ()
382+ @pytest .mark .benchmark (mds = 1.1 , instances = 20 , iterations = 15 , burn_in_iterations = 5 )
383383def benchmark_mt_group_by_memory_usage (random_doubles_mt ):
384384 mt = hl .read_matrix_table (str (random_doubles_mt ))
385385 mt = mt .group_rows_by (new_idx = mt .row_idx % 3 ).aggregate (x = hl .agg .mean (mt .x ))
0 commit comments