diff --git a/speed-bench/m5_max_q2q4_imatrix.csv b/speed-bench/m5_max_q2q4_imatrix.csv new file mode 100644 index 000000000..b84bf647a --- /dev/null +++ b/speed-bench/m5_max_q2q4_imatrix.csv @@ -0,0 +1,15 @@ +ctx_tokens,prefill_tokens,prefill_tps,gen_tokens,gen_tps,kvcache_bytes +2048,2048,413.85,128,34.42,52184460 +18432,16384,405.31,128,28.42,277693836 +34816,16384,374.49,128,27.75,503203212 +51200,16384,333.84,128,26.79,728712588 +67584,16384,298.66,128,25.75,954221964 +83968,16384,269.69,128,25.43,1179731340 +100352,16384,248.99,128,24.36,1405240716 +116736,16384,230.49,128,23.63,1630750092 +133120,16384,215.12,128,22.37,1856259468 +149504,16384,198.15,128,21.70,2081768844 +165888,16384,187.32,128,20.72,2307278220 +182272,16384,176.49,128,20.16,2532787596 +198656,16384,165.14,128,19.54,2758296972 +200000,1344,157.02,128,19.37,2776775308 diff --git a/speed-bench/m5_max_q2q4_imatrix_ts.svg b/speed-bench/m5_max_q2q4_imatrix_ts.svg new file mode 100644 index 000000000..219dbafa0 --- /dev/null +++ b/speed-bench/m5_max_q2q4_imatrix_ts.svg @@ -0,0 +1,52 @@ + + + + +M5 Max (128GB) q2-q4-imatrix t/s + +0 + +100 + +200 + +300 + +400 + +500 +0 +10 +20 +30 +40 + +0 + +50k + +100k + +150k + +200k + + + +ctx size +prefill t/s +generation t/s + + + + +prefill + +generation +