diff --git a/speed-bench/m5_max_40_gpu.csv b/speed-bench/m5_max_40_gpu.csv new file mode 100644 index 00000000..d3cc636b --- /dev/null +++ b/speed-bench/m5_max_40_gpu.csv @@ -0,0 +1,33 @@ +ctx_tokens,prefill_tokens,prefill_tps,gen_tokens,gen_tps,kvcache_bytes +2048,2048,349.88,128,29.72,52184460 +4096,2048,338.07,128,31.07,80373132 +6144,2048,334.14,128,30.75,108561804 +8192,2048,331.16,128,30.76,136750476 +10240,2048,323.85,128,30.43,164939148 +12288,2048,312.27,128,30.41,193127820 +14336,2048,310.32,128,30.07,221316492 +16384,2048,313.19,128,30.08,249505164 +18432,2048,310.01,128,29.72,277693836 +20480,2048,306.72,128,29.84,305882508 +22528,2048,303.13,128,29.56,334071180 +24576,2048,301.19,128,29.53,362259852 +26624,2048,295.78,128,29.24,390448524 +28672,2048,283.19,128,29.26,418637196 +30720,2048,282.88,128,28.97,446825868 +32768,2048,279.47,128,28.91,475014540 +34816,2048,269.97,128,28.62,503203212 +36864,2048,264.96,128,28.56,531391884 +38912,2048,262.98,128,28.33,559580556 +40960,2048,260.56,128,26.30,587769228 +43008,2048,250.09,128,27.97,615957900 +45056,2048,247.52,128,28.04,644146572 +47104,2048,245.00,128,26.86,672335244 +49152,2048,238.96,128,27.02,700523916 +51200,2048,235.29,128,26.73,728712588 +53248,2048,233.92,128,26.24,756901260 +55296,2048,229.71,128,26.06,785089932 +57344,2048,227.64,128,25.84,813278604 +59392,2048,224.30,128,25.25,841467276 +61440,2048,220.99,128,25.43,869655948 +63488,2048,218.38,128,25.01,897844620 +65536,2048,216.46,128,24.84,926033292 \ No newline at end of file diff --git a/speed-bench/m5_max_40_gpu_high_power.csv b/speed-bench/m5_max_40_gpu_high_power.csv new file mode 100644 index 00000000..300ce622 --- /dev/null +++ b/speed-bench/m5_max_40_gpu_high_power.csv @@ -0,0 +1,33 @@ +ctx_tokens,prefill_tokens,prefill_tps,gen_tokens,gen_tps,kvcache_bytes +2048,2048,372.15,128,31.48,52184460 +4096,2048,337.42,128,31.10,80373132 +6144,2048,333.76,128,30.77,108561804 +8192,2048,330.69,128,30.79,136750476 +10240,2048,322.36,128,30.46,164939148 +12288,2048,310.95,128,30.45,193127820 +14336,2048,310.05,128,30.15,221316492 +16384,2048,312.23,128,30.06,249505164 +18432,2048,308.68,128,29.69,277693836 +20480,2048,306.42,128,29.84,305882508 +22528,2048,302.78,128,29.55,334071180 +24576,2048,300.89,128,29.55,362259852 +26624,2048,295.56,128,29.20,390448524 +28672,2048,292.72,128,29.27,418637196 +30720,2048,289.74,128,28.96,446825868 +32768,2048,287.67,128,28.93,475014540 +34816,2048,282.43,128,28.67,503203212 +36864,2048,280.37,128,28.62,531391884 +38912,2048,277.36,128,28.47,559580556 +40960,2048,275.66,128,28.48,587769228 +43008,2048,271.20,128,28.22,615957900 +45056,2048,268.62,128,28.21,644146572 +47104,2048,265.80,128,27.98,672335244 +49152,2048,263.89,128,28.05,700523916 +51200,2048,260.43,128,27.81,728712588 +53248,2048,258.63,128,27.83,756901260 +55296,2048,255.69,128,27.46,785089932 +57344,2048,254.13,128,27.55,813278604 +59392,2048,250.31,128,27.31,841467276 +61440,2048,248.56,128,27.33,869655948 +63488,2048,245.97,128,27.08,897844620 +65536,2048,244.73,128,26.97,926033292 \ No newline at end of file diff --git a/speed-bench/m5_max_40_gpu_high_power_ts.svg b/speed-bench/m5_max_40_gpu_high_power_ts.svg new file mode 100644 index 00000000..e97e2d3f --- /dev/null +++ b/speed-bench/m5_max_40_gpu_high_power_ts.svg @@ -0,0 +1,48 @@ + + + + +M5 Max 40 Gpu High Power t/s + +0 + +100 + +200 + +300 + +400 +0 +10 +20 +30 +40 + +0 + +20k + +40k + +60k + + + +ctx size +prefill t/s +generation t/s + + + + +prefill + +generation + diff --git a/speed-bench/m5_max_40_gpu_ts.svg b/speed-bench/m5_max_40_gpu_ts.svg new file mode 100644 index 00000000..0279588d --- /dev/null +++ b/speed-bench/m5_max_40_gpu_ts.svg @@ -0,0 +1,48 @@ + + + + +M5 Max 40 Gpu t/s + +0 + +100 + +200 + +300 + +400 +0 +10 +20 +30 +40 + +0 + +20k + +40k + +60k + + + +ctx size +prefill t/s +generation t/s + + + + +prefill + +generation +