diff --git a/aiter/configs/a8w8_blockscale_tuned_gemm.csv b/aiter/configs/a8w8_blockscale_tuned_gemm.csv index 11aa0c8ea8..c5486656df 100644 --- a/aiter/configs/a8w8_blockscale_tuned_gemm.csv +++ b/aiter/configs/a8w8_blockscale_tuned_gemm.csv @@ -4,3 +4,84 @@ cu_num,M,N,K,libtype,kernelId,splitK,us,kernelName,tflops,bw,errRatio 256,20480,512,7168,cktile,11,0,106.7764,a8w8_blockscale_cktile_192x256x128_4x2x1_16x16x128_intrawave_0x1x0_1,1407.84,1605.62,0.0 256,128,1024,4096,ck,8,0,13.7599,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,78.03,361.97,0.0 256,128,4096,1280,ck,7,0,7.4194,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,180.9,870.06,0.0 + +64,16,512,7168,ck,5,0,21.0677,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,5.57,180.42,0.0 +64,32,512,7168,ck,7,0,21.2326,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,11.06,185.19,0.0 +64,64,512,7168,ck,5,0,21.5607,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,21.79,194.53,0.0 +64,128,512,7168,ck,7,0,22.3868,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,41.97,210.78,0.0 +64,256,512,7168,ck,5,0,25.4464,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,73.84,226.64,0.0 +64,512,512,7168,ck,5,0,37.5173,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,100.17,209.62,0.0 +64,1024,512,7168,ck,5,0,70.283,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,106.94,171.57,0.0 +64,1536,512,7168,ck,4,0,83.7976,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,134.54,193.95,0.0 +64,2048,512,7168,ck,4,0,110.0607,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,136.58,185.78,0.0 +64,4096,512,7168,ck,4,0,217.6474,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,138.14,171.03,0.0 +64,8192,512,7168,ck,4,0,420.1874,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,143.1,168.45,0.0 +64,16384,512,7168,ck,4,0,847.0249,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,141.98,162.79,0.0 +64,20480,512,7168,ck,4,0,1063.6592,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,141.33,161.18,0.0 +64,16,576,7168,ck,7,0,27.5569,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,4.79,154.66,0.0 +64,32,576,7168,ck,7,0,27.8319,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,9.49,157.91,0.0 +64,64,576,7168,ck,7,0,27.8765,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,18.96,167.21,0.0 +64,128,576,7168,ck,5,0,31.8357,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,33.2,163.14,0.0 +64,256,576,7168,ck,5,0,42.258,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,50.02,148.11,0.0 +64,512,576,7168,ck,5,0,66.3617,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,63.71,126.41,0.0 +64,1024,576,7168,ck,5,0,122.2805,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,69.15,103.44,0.0 +64,1536,576,7168,ck,5,0,174.2874,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,72.77,97.01,0.0 +64,2048,576,7168,ck,5,0,227.9122,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,74.2,92.88,0.0 +64,4096,576,7168,ck,5,0,443.798,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,76.21,86.09,0.0 +64,8192,576,7168,ck,5,0,882.2476,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,76.67,81.93,0.0 +64,16384,576,7168,ck,5,0,1705.0677,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,79.35,82.37,0.0 +64,20480,576,7168,ck,5,0,2175.1492,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,77.75,80.23,0.0 +64,128,1024,4096,ck,5,0,17.9983,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,59.66,276.73,0.0 +64,16,1536,7168,ck,7,0,25.3706,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,13.89,440.43,0.0 +64,32,1536,7168,ck,7,0,25.5001,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,27.63,444.62,0.0 +64,64,1536,7168,ck,5,0,33.3487,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,42.26,349.8,0.0 +64,128,1536,7168,ck,5,0,36.524,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,77.17,337.33,0.0 +64,256,1536,7168,ck,4,0,54.0994,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,104.2,251.97,0.0 +64,512,1536,7168,ck,4,0,91.6311,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,123.04,177.37,0.0 +64,1024,1536,7168,ck,4,0,163.4359,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,137.97,131.52,0.0 +64,1536,1536,7168,ck,4,0,240.0444,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,140.9,111.39,0.0 +64,2048,1536,7168,ck,4,0,315.9935,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,142.72,101.21,0.0 +64,4096,1536,7168,ck,4,0,617.6339,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,146.03,85.74,0.0 +64,8192,1536,7168,ck,4,0,1202.2416,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,150.04,78.93,0.0 +64,16384,1536,7168,ck,4,0,2443.3914,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,147.65,73.17,0.0 +64,20480,1536,7168,ck,4,0,3101.2909,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,145.41,71.17,0.0 +64,128,4096,1280,ck,5,0,17.2714,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,77.71,373.76,0.0 +64,16,4608,7168,ck,7,0,62.3389,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,16.96,534.05,0.0 +64,32,4608,7168,ck,5,0,65.7963,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,32.13,509.97,0.0 +64,64,4608,7168,ck,5,0,72.0885,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,58.65,472.73,0.0 +64,128,4608,7168,ck,5,0,94.9716,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,89.03,369.87,0.0 +64,256,4608,7168,ck,4,0,127.4667,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,132.67,292.03,0.0 +64,512,4608,7168,ck,4,0,240.8659,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,140.42,171.96,0.0 +64,1024,4608,7168,ck,4,0,466.6247,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,144.97,106.74,0.0 +64,1536,4608,7168,ck,4,0,686.5133,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,147.8,84.77,0.0 +64,2048,4608,7168,ck,4,0,913.8289,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,148.05,72.86,0.0 +64,4096,4608,7168,ck,4,0,1829.1106,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,147.93,54.75,0.0 +64,8192,4608,7168,ck,4,0,3699.2334,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,146.29,45.21,0.0 +64,16384,4608,7168,ck,4,0,7372.5394,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,146.81,40.89,0.0 +64,20480,4608,7168,ck,4,0,9277.5367,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,145.83,39.73,0.0 +64,16,7168,256,ck,5,0,7.0275,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,8.36,294.34,0.0 +64,16,7168,2304,ck,5,0,30.0406,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,17.59,558.62,0.0 +64,32,7168,256,ck,5,0,7.5259,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,15.6,305.87,0.0 +64,32,7168,2304,ck,5,0,32.0259,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,33.0,532.31,0.0 +64,64,7168,256,ck,7,0,8.5336,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,27.52,324.47,0.0 +64,64,7168,2304,ck,5,0,35.3908,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,59.73,496.74,0.0 +64,128,7168,256,ck,5,0,10.2,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,46.06,363.02,0.0 +64,128,7168,2304,ck,5,0,44.4092,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,95.2,419.85,0.0 +64,256,7168,256,ck,5,0,16.5328,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,56.83,336.94,0.0 +64,256,7168,2304,ck,4,0,70.939,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,119.2,292.86,0.0 +64,512,7168,256,ck,5,0,30.8461,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,60.92,301.69,0.0 +64,512,7168,2304,ck,4,0,136.0203,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,124.33,184.05,0.0 +64,1024,7168,256,ck,5,0,58.1541,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,64.62,288.5,0.0 +64,1024,7168,2304,ck,4,0,251.6477,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,134.41,133.34,0.0 +64,1536,7168,256,ck,5,0,85.6123,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,65.85,283.23,0.0 +64,1536,7168,2304,ck,4,0,373.3302,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,135.9,112.7,0.0 +64,2048,7168,256,ck,5,0,113.2028,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,66.4,280.2,0.0 +64,2048,7168,2304,ck,4,0,498.9962,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,135.56,101.39,0.0 +64,4096,7168,256,ck,5,0,203.805,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,73.76,302.27,0.0 +64,4096,7168,2304,ck,4,0,982.7803,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,137.66,86.16,0.0 +64,8192,7168,256,ck,5,0,415.4376,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,72.37,292.16,0.0 +64,8192,7168,2304,ck,4,0,1966.7933,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,137.58,77.71,0.0 +64,16384,7168,256,ck,5,0,831.8664,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,72.28,289.6,0.0 +64,16384,7168,2304,ck,4,0,4046.2315,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,133.75,71.46,0.0 +64,20480,7168,256,ck,5,0,1051.0383,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,71.51,286.08,0.0 +64,20480,7168,2304,ck,4,0,5103.8124,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,132.54,70.01,0.0 diff --git a/aiter/configs/model_configs/a8w8_blockscale_tuned_gemm_qwen3-30B-A3B.csv b/aiter/configs/model_configs/a8w8_blockscale_tuned_gemm_qwen3-30B-A3B.csv new file mode 100644 index 0000000000..8bd4277811 --- /dev/null +++ b/aiter/configs/model_configs/a8w8_blockscale_tuned_gemm_qwen3-30B-A3B.csv @@ -0,0 +1,190 @@ +cu_num,M,N,K,libtype,kernelId,splitK,us,kernelName,tflops,bw,errRatio +64,1,2560,2048,ck,7,0,14.4982,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,0.72,362.12,0.0 +64,2,2560,2048,ck,7,0,13.2106,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,1.59,397.95,0.0 +64,4,2560,2048,ck,7,0,13.2387,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,3.17,398.19,0.0 +64,8,2560,2048,ck,7,0,13.3632,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,6.28,396.63,0.0 +64,16,2560,2048,ck,7,0,13.5734,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,12.36,394.71,0.0 +64,24,2560,2048,ck,7,0,15.0946,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,16.67,358.73,0.0 +64,32,2560,2048,ck,7,0,15.0626,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,22.28,363.3,0.0 +64,40,2560,2048,ck,7,0,14.8926,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,28.16,371.3,0.0 +64,48,2560,2048,ck,7,0,15.0006,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,33.55,372.45,0.0 +64,56,2560,2048,ck,5,0,16.2953,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,36.04,346.38,0.0 +64,64,2560,2048,ck,5,0,16.6975,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,40.19,341.47,0.0 +64,72,2560,2048,ck,5,0,17.5013,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,43.14,329.06,0.0 +64,80,2560,2048,ck,5,0,16.9866,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,49.38,342.41,0.0 +64,88,2560,2048,ck,5,0,17.5711,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,52.52,334.28,0.0 +64,96,2560,2048,ck,5,0,17.7821,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,56.61,333.54,0.0 +64,104,2560,2048,ck,5,0,20.1394,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,54.15,297.35,0.0 +64,112,2560,2048,ck,5,0,19.6822,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,59.67,307.17,0.0 +64,120,2560,2048,ck,5,0,19.7946,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,63.57,308.32,0.0 +64,128,2560,2048,ck,4,0,20.3629,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,65.91,302.53,0.0 +64,136,2560,2048,ck,4,0,20.2235,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,70.52,307.45,0.0 +64,144,2560,2048,ck,4,0,20.5388,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,73.52,305.52,0.0 +64,152,2560,2048,ck,5,0,24.1061,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,66.12,262.69,0.0 +64,160,2560,2048,ck,5,0,24.4666,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,68.57,261.16,0.0 +64,168,2560,2048,ck,5,0,26.4759,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,66.54,243.51,0.0 +64,176,2560,2048,ck,5,0,26.9985,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,68.36,240.92,0.0 +64,184,2560,2048,ck,5,0,29.9572,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,64.4,219.04,0.0 +64,192,2560,2048,ck,5,0,31.164,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,64.6,212.4,0.0 +64,200,2560,2048,ck,5,0,31.631,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,66.3,211.07,0.0 +64,208,2560,2048,ck,5,0,32.4961,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,67.12,207.22,0.0 +64,216,2560,2048,ck,5,0,32.7541,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,69.15,207.34,0.0 +64,224,2560,2048,ck,5,0,33.2781,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,70.58,205.8,0.0 +64,232,2560,2048,ck,5,0,33.4494,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,72.73,206.46,0.0 +64,240,2560,2048,ck,5,0,33.6857,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,74.71,206.71,0.0 +64,248,2560,2048,ck,5,0,27.8009,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,93.54,252.53,0.0 +64,256,2560,2048,ck,5,0,27.8933,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,96.24,253.75,0.0 +64,272,2560,2048,ck,5,0,36.6092,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,77.91,196.47,0.0 +64,288,2560,2048,ck,4,0,38.5222,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,78.39,189.69,0.0 +64,304,2560,2048,ck,4,0,38.0532,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,83.77,195.04,0.0 +64,320,2560,2048,ck,5,0,42.657,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,78.66,176.68,0.0 +64,336,2560,2048,ck,5,0,44.1037,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,79.88,173.48,0.0 +64,352,2560,2048,ck,5,0,44.9421,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,82.13,172.8,0.0 +64,368,2560,2048,ck,5,0,46.4629,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,83.05,169.61,0.0 +64,384,2560,2048,ck,4,0,48.7284,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,82.63,164.08,0.0 +64,400,2560,2048,ck,4,0,49.2078,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,85.24,164.81,0.0 +64,416,2560,2048,ck,5,0,50.9392,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,85.63,161.46,0.0 +64,432,2560,2048,ck,4,0,52.1469,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,86.87,159.92,0.0 +64,448,2560,2048,ck,4,0,53.5107,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,87.79,157.99,0.0 +64,464,2560,2048,ck,4,0,50.9425,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,95.51,168.21,0.0 +64,480,2560,2048,ck,5,0,53.6699,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,93.78,161.79,0.0 +64,496,2560,2048,ck,4,0,52.9997,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,98.13,166.0,0.0 +64,512,2560,2048,ck,5,0,51.4665,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,104.31,173.18,0.0 +64,8192,2560,2048,ck,4,0,661.0065,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,129.95,96.77,0.0 +64,16,2048,2048,ck,7,0,11.4142,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,11.76,376.08,0.0 +64,32,2048,2048,ck,7,0,12.081,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,22.22,363.46,0.0 +64,64,2048,2048,ck,7,0,13.6866,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,39.23,335.18,0.0 +64,128,2048,2048,ck,5,0,16.108,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,66.66,309.21,0.0 +64,256,2048,2048,ck,5,0,24.0856,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,89.16,239.44,0.0 +64,512,2048,2048,ck,5,0,42.558,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,100.92,172.47,0.0 +64,1024,2048,2048,ck,4,0,75.1753,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,114.27,139.48,0.0 +64,2048,2048,2048,ck,4,0,132.3238,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,129.83,126.79,0.0 +64,4096,2048,2048,ck,4,0,272.5465,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,126.07,107.73,0.0 +64,8192,2048,2048,ck,4,0,523.3344,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,131.31,104.19,0.0 +64,16384,2048,2048,ck,4,0,1049.3111,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,130.98,99.93,0.0 +64,20480,2048,2048,ck,4,0,1334.3078,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,128.75,97.45,0.0 +64,1,2048,2048,ck,7,0,12.39,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,0.68,339.02,0.0 +64,2,2048,2048,ck,7,0,11.5069,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,1.46,365.57,0.0 +64,4,2048,2048,ck,7,0,11.4914,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,2.92,367.13,0.0 +64,8,2048,2048,ck,7,0,11.4504,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,5.86,370.59,0.0 +64,16,2048,2048,ck,7,0,11.4601,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,11.71,374.57,0.0 +64,24,2048,2048,ck,7,0,12.2285,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,16.46,355.05,0.0 +64,32,2048,2048,ck,7,0,12.0861,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,22.21,363.3,0.0 +64,40,2048,2048,ck,7,0,13.8578,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,24.21,320.4,0.0 +64,48,2048,2048,ck,7,0,13.969,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,28.82,321.37,0.0 +64,56,2048,2048,ck,7,0,13.593,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,34.56,333.88,0.0 +64,64,2048,2048,ck,7,0,13.6859,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,39.23,335.2,0.0 +64,72,2048,2048,ck,5,0,15.8397,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,38.13,292.72,0.0 +64,80,2048,2048,ck,5,0,14.6585,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,45.78,319.67,0.0 +64,88,2048,2048,ck,5,0,14.9206,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,49.48,317.34,0.0 +64,96,2048,2048,ck,5,0,16.233,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,49.61,294.72,0.0 +64,104,2048,2048,ck,5,0,15.779,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,55.29,306.31,0.0 +64,112,2048,2048,ck,4,0,16.8734,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,55.68,289.36,0.0 +64,120,2048,2048,ck,5,0,16.0642,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,62.66,306.99,0.0 +64,128,2048,2048,ck,5,0,16.4074,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,65.44,303.57,0.0 +64,136,2048,2048,ck,5,0,17.5211,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,65.11,287.08,0.0 +64,144,2048,2048,ck,5,0,17.7004,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,68.24,286.94,0.0 +64,152,2048,2048,ck,5,0,17.4914,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,72.9,293.18,0.0 +64,160,2048,2048,ck,5,0,17.5621,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,76.42,294.8,0.0 +64,168,2048,2048,ck,4,0,18.7568,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,75.13,278.65,0.0 +64,176,2048,2048,ck,4,0,18.9101,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,78.07,278.99,0.0 +64,184,2048,2048,ck,5,0,19.6285,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,78.64,271.28,0.0 +64,192,2048,2048,ck,5,0,19.9932,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,80.56,268.79,0.0 +64,200,2048,2048,ck,5,0,23.2685,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,72.1,233.07,0.0 +64,208,2048,2048,ck,5,0,23.8354,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,73.2,229.59,0.0 +64,216,2048,2048,ck,5,0,25.3111,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,71.59,218.14,0.0 +64,224,2048,2048,ck,5,0,25.2653,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,74.37,220.48,0.0 +64,232,2048,2048,ck,5,0,25.1212,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,77.47,223.7,0.0 +64,240,2048,2048,ck,5,0,25.2065,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,79.87,224.9,0.0 +64,248,2048,2048,ck,5,0,23.8498,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,87.23,239.75,0.0 +64,256,2048,2048,ck,5,0,24.0041,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,89.46,240.26,0.0 +64,264,2048,2048,ck,5,0,26.923,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,82.26,216.04,0.0 +64,272,2048,2048,ck,5,0,28.8829,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,79.0,203.08,0.0 +64,280,2048,2048,ck,5,0,29.2356,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,80.34,202.31,0.0 +64,288,2048,2048,ck,5,0,29.7398,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,81.24,200.53,0.0 +64,296,2048,2048,ck,5,0,29.8122,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,83.29,201.69,0.0 +64,304,2048,2048,ck,5,0,29.9148,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,85.25,202.64,0.0 +64,312,2048,2048,ck,5,0,32.2663,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,81.11,189.4,0.0 +64,320,2048,2048,ck,5,0,32.5607,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,82.44,189.2,0.0 +64,328,2048,2048,ck,5,0,32.5465,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,84.54,190.79,0.0 +64,336,2048,2048,ck,5,0,33.1155,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,85.11,189.0,0.0 +64,344,2048,2048,ck,5,0,34.3755,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,83.95,183.5,0.0 +64,352,2048,2048,ck,5,0,34.2199,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,86.29,185.77,0.0 +64,360,2048,2048,ck,4,0,34.9894,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,86.31,183.09,0.0 +64,368,2048,2048,ck,4,0,35.059,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,88.05,184.13,0.0 +64,376,2048,2048,ck,5,0,35.5759,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,88.66,182.83,0.0 +64,384,2048,2048,ck,4,0,35.5943,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,90.5,184.12,0.0 +64,392,2048,2048,ck,5,0,39.0868,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,84.13,168.93,0.0 +64,400,2048,2048,ck,5,0,38.6522,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,86.81,172.1,0.0 +64,408,2048,2048,ck,5,0,39.0666,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,87.61,171.53,0.0 +64,416,2048,2048,ck,5,0,38.9967,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,89.49,173.1,0.0 +64,424,2048,2048,ck,5,0,40.9917,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,86.77,165.87,0.0 +64,432,2048,2048,ck,5,0,40.4778,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,89.53,169.19,0.0 +64,440,2048,2048,ck,5,0,41.8265,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,88.25,164.91,0.0 +64,448,2048,2048,ck,5,0,42.0404,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,89.39,165.24,0.0 +64,456,2048,2048,ck,5,0,42.5618,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,89.87,164.37,0.0 +64,464,2048,2048,ck,5,0,42.954,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,90.62,164.02,0.0 +64,472,2048,2048,ck,5,0,40.9894,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,96.6,173.08,0.0 +64,480,2048,2048,ck,5,0,41.7296,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,96.49,171.18,0.0 +64,488,2048,2048,ck,4,0,41.6525,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,98.28,172.68,0.0 +64,496,2048,2048,ck,4,0,41.9262,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,99.24,172.73,0.0 +64,504,2048,2048,ck,5,0,41.8391,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,101.05,174.26,0.0 +64,512,2048,2048,ck,5,0,42.2431,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,101.67,173.76,0.0 +64,1024,2048,2048,ck,4,0,70.2808,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,122.22,149.2,0.0 +64,2048,2048,2048,ck,4,0,134.0017,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,128.21,125.2,0.0 +64,4096,2048,2048,ck,4,0,263.3279,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,130.48,111.5,0.0 +64,8192,2048,2048,ck,4,0,520.1263,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,132.12,104.83,0.0 +64,16384,2048,2048,ck,4,0,1034.023,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,132.92,101.41,0.0 +64,20480,2048,2048,ck,4,0,1287.5347,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,133.43,100.99,0.0 +64,1,2560,2048,ck,7,0,14.4911,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,0.72,362.29,0.0 +64,2,2560,2048,ck,7,0,13.2125,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,1.59,397.9,0.0 +64,4,2560,2048,ck,7,0,13.2684,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,3.16,397.3,0.0 +64,8,2560,2048,ck,7,0,13.3721,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,6.27,396.36,0.0 +64,16,2560,2048,ck,7,0,13.5109,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,12.42,396.54,0.0 +64,24,2560,2048,ck,7,0,15.002,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,16.77,360.95,0.0 +64,32,2560,2048,ck,7,0,15.2073,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,22.06,359.84,0.0 +64,40,2560,2048,ck,7,0,14.9051,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,28.14,370.99,0.0 +64,48,2560,2048,ck,7,0,14.9905,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,33.58,372.7,0.0 +64,56,2560,2048,ck,5,0,16.3004,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,36.02,346.27,0.0 +64,64,2560,2048,ck,4,0,18.6764,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,35.93,305.29,0.0 +64,72,2560,2048,ck,5,0,17.5906,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,42.92,327.39,0.0 +64,80,2560,2048,ck,4,0,18.5466,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,45.23,313.61,0.0 +64,88,2560,2048,ck,5,0,17.5771,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,52.5,334.17,0.0 +64,96,2560,2048,ck,5,0,17.9183,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,56.18,331.0,0.0 +64,104,2560,2048,ck,5,0,19.1607,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,56.91,312.53,0.0 +64,112,2560,2048,ck,5,0,19.769,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,59.41,305.82,0.0 +64,120,2560,2048,ck,5,0,19.6431,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,64.06,310.7,0.0 +64,128,2560,2048,ck,4,0,20.135,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,66.66,305.95,0.0 +64,136,2560,2048,ck,4,0,20.2559,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,70.4,306.96,0.0 +64,144,2560,2048,ck,4,0,20.3461,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,74.21,308.42,0.0 +64,152,2560,2048,ck,5,0,24.0417,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,66.29,263.39,0.0 +64,160,2560,2048,ck,5,0,24.1438,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,69.49,264.65,0.0 +64,168,2560,2048,ck,5,0,26.5122,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,66.45,243.17,0.0 +64,176,2560,2048,ck,5,0,26.8536,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,68.72,242.22,0.0 +64,184,2560,2048,ck,5,0,29.9102,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,64.51,219.38,0.0 +64,192,2560,2048,ck,5,0,30.9032,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,65.15,214.19,0.0 +64,200,2560,2048,ck,5,0,31.3765,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,66.84,212.79,0.0 +64,208,2560,2048,ck,4,0,32.6094,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,66.88,206.5,0.0 +64,216,2560,2048,ck,5,0,32.6098,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,69.46,208.26,0.0 +64,224,2560,2048,ck,5,0,32.4252,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,72.44,211.21,0.0 +64,232,2560,2048,ck,5,0,33.0569,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,73.59,208.91,0.0 +64,240,2560,2048,ck,5,0,34.3665,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,73.23,202.62,0.0 +64,248,2560,2048,ck,5,0,27.542,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,94.42,254.9,0.0 +64,256,2560,2048,ck,5,0,27.9832,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,95.93,252.93,0.0 +64,272,2560,2048,ck,4,0,36.3576,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,78.45,197.83,0.0 +64,288,2560,2048,ck,4,0,37.6533,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,80.2,194.07,0.0 +64,304,2560,2048,ck,4,0,37.611,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,84.75,197.33,0.0 +64,320,2560,2048,ck,5,0,42.4025,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,79.13,177.74,0.0 +64,336,2560,2048,ck,5,0,43.7114,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,80.6,175.04,0.0 +64,352,2560,2048,ck,5,0,45.0782,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,81.88,172.28,0.0 +64,368,2560,2048,ck,5,0,46.3952,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,83.17,169.86,0.0 +64,384,2560,2048,ck,4,0,45.1242,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,89.23,177.19,0.0 +64,400,2560,2048,ck,4,0,49.2663,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,85.14,164.62,0.0 +64,416,2560,2048,ck,5,0,50.1851,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,86.92,163.89,0.0 +64,432,2560,2048,ck,5,0,52.3525,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,86.53,159.29,0.0 +64,448,2560,2048,ck,4,0,52.7866,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,88.99,160.16,0.0 +64,464,2560,2048,ck,5,0,54.8,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,88.78,156.37,0.0 +64,480,2560,2048,ck,5,0,52.8143,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,95.3,164.42,0.0 +64,496,2560,2048,ck,4,0,52.4525,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,99.16,167.74,0.0 +64,512,2560,2048,ck,5,0,51.7684,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,103.71,172.17,0.0 +64,8192,2560,2048,ck,4,0,648.9185,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,132.37,98.57,0.0 diff --git a/aiter/configs/model_configs/a8w8_blockscale_tuned_gemm_qwen3_0.6b.csv b/aiter/configs/model_configs/a8w8_blockscale_tuned_gemm_qwen3_0.6b.csv new file mode 100644 index 0000000000..95ada42c60 --- /dev/null +++ b/aiter/configs/model_configs/a8w8_blockscale_tuned_gemm_qwen3_0.6b.csv @@ -0,0 +1,49 @@ +cu_num,M,N,K,libtype,kernelId,splitK,us,kernelName,tflops,bw,errRatio +64,16,1024,2048,ck,4,0,12.5787,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,5.34,171.93,0.0 +64,16,1024,3072,ck,7,0,12.0654,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,8.34,267.51,0.0 +64,32,1024,2048,ck,5,0,10.7403,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,12.5,207.46,0.0 +64,32,1024,3072,ck,7,0,11.6332,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,17.31,284.49,0.0 +64,64,1024,2048,ck,7,0,10.1227,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,26.52,233.07,0.0 +64,64,1024,3072,ck,7,0,12.7136,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,31.67,273.2,0.0 +64,128,1024,2048,ck,5,0,10.7245,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,50.06,244.43,0.0 +64,128,1024,3072,ck,5,0,14.3693,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,56.04,264.53,0.0 +64,256,1024,2048,ck,5,0,12.9736,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,82.76,242.47,0.0 +64,256,1024,3072,ck,5,0,18.6335,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,86.44,239.16,0.0 +64,512,1024,2048,ck,5,0,23.3616,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,91.92,179.54,0.0 +64,512,1024,3072,ck,5,0,36.0217,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,89.42,160.1,0.0 +64,1024,1024,2048,ck,5,0,42.575,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,100.88,147.77,0.0 +64,1024,1024,3072,ck,4,0,59.3421,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,108.56,141.36,0.0 +64,2048,1024,2048,ck,4,0,71.0219,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,120.95,147.64,0.0 +64,2048,1024,3072,ck,4,0,102.0793,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,126.22,133.54,0.0 +64,4096,1024,2048,ck,4,0,133.2712,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,128.91,141.62,0.0 +64,4096,1024,3072,ck,4,0,192.9952,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,133.53,124.96,0.0 +64,8192,1024,2048,ck,4,0,263.7063,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,130.3,135.19,0.0 +64,8192,1024,3072,ck,4,0,373.7466,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,137.9,120.64,0.0 +64,16384,1024,2048,ck,4,0,524.496,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,131.02,131.95,0.0 +64,16384,1024,3072,ck,4,0,753.9319,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,136.72,115.44,0.0 +64,20480,1024,2048,ck,4,0,671.5965,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,127.9,128.03,0.0 +64,20480,1024,3072,ck,4,0,971.9149,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,132.57,111.12,0.0 +64,16,4096,1024,ck,5,0,14.4201,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,9.31,301.09,0.0 +64,32,4096,1024,ck,7,0,14.1387,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,18.99,317.51,0.0 +64,64,4096,1024,ck,7,0,16.8052,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,31.95,284.68,0.0 +64,128,4096,1024,ck,5,0,20.9177,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,51.33,256.91,0.0 +64,256,4096,1024,ck,4,0,24.9796,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,85.97,262.36,0.0 +64,512,4096,1024,ck,4,0,39.7999,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,107.91,223.94,0.0 +64,1024,4096,1024,ck,4,0,75.5577,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,113.69,180.41,0.0 +64,2048,4096,1024,ck,4,0,145.018,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,118.47,159.07,0.0 +64,4096,4096,1024,ck,4,0,304.1897,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,112.95,137.88,0.0 +64,8192,4096,1024,ck,4,0,598.164,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,114.88,133.23,0.0 +64,16384,4096,1024,ck,4,0,1200.762,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,114.46,129.24,0.0 +64,20480,4096,1024,ck,4,0,1515.3033,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,113.38,127.33,0.0 +64,16,6144,1024,ck,7,0,18.8588,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,10.68,344.9,0.0 +64,32,6144,1024,ck,7,0,21.3488,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,18.86,314.65,0.0 +64,64,6144,1024,ck,7,0,21.7232,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,37.07,328.84,0.0 +64,128,6144,1024,ck,7,0,28.7454,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,56.03,278.15,0.0 +64,256,6144,1024,ck,4,0,36.2969,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,88.75,267.22,0.0 +64,512,6144,1024,ck,4,0,56.7536,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,113.52,230.95,0.0 +64,1024,6144,1024,ck,4,0,109.3472,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,117.83,182.2,0.0 +64,2048,6144,1024,ck,4,0,218.8243,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,117.76,153.34,0.0 +64,4096,6144,1024,ck,4,0,446.9151,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,115.32,136.08,0.0 +64,8192,6144,1024,ck,4,0,900.8286,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,114.43,128.04,0.0 +64,16384,6144,1024,ck,4,0,1795.133,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,114.84,125.0,0.0 +64,20480,6144,1024,ck,4,0,2235.6229,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,115.27,124.76,0.0 diff --git a/aiter/configs/model_configs/a8w8_blockscale_tuned_gemm_qwen3_5-27B.csv b/aiter/configs/model_configs/a8w8_blockscale_tuned_gemm_qwen3_5-27B.csv new file mode 100644 index 0000000000..be35da8db4 --- /dev/null +++ b/aiter/configs/model_configs/a8w8_blockscale_tuned_gemm_qwen3_5-27B.csv @@ -0,0 +1,366 @@ +cu_num,M,N,K,libtype,kernelId,splitK,us,kernelName,tflops,bw,errRatio +64,1,5120,3072,ck,4,0,35.8449,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,0.88,439.17,0.0 +64,1,5120,8704,ck,7,0,74.0455,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,1.2,602.11,0.0 +64,2,5120,3072,ck,7,0,32.5003,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,1.94,484.77,0.0 +64,2,5120,8704,ck,7,0,74.1262,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,2.4,601.71,0.0 +64,4,5120,3072,ck,4,0,35.969,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,3.5,438.76,0.0 +64,4,5120,8704,ck,7,0,74.4339,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,4.79,599.73,0.0 +64,8,5120,3072,ck,7,0,32.8681,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,7.66,481.78,0.0 +64,8,5120,8704,ck,7,0,75.0469,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,9.5,595.84,0.0 +64,16,5120,3072,ck,7,0,33.1785,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,15.17,480.48,0.0 +64,16,5120,8704,ck,7,0,75.7674,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,18.82,592.18,0.0 +64,24,5120,3072,ck,7,0,36.1101,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,20.91,444.42,0.0 +64,24,5120,8704,ck,5,0,77.0484,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,27.76,584.3,0.0 +64,32,5120,3072,ck,7,0,35.9258,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,28.02,449.67,0.0 +64,32,5120,8704,ck,5,0,78.3997,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,36.38,576.16,0.0 +64,40,5120,3072,ck,5,0,38.5909,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,32.61,421.37,0.0 +64,40,5120,8704,ck,5,0,78.5167,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,45.41,577.23,0.0 +64,48,5120,3072,ck,5,0,38.9412,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,38.78,420.32,0.0 +64,48,5120,8704,ck,5,0,80.9134,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,52.87,562.01,0.0 +64,56,5120,3072,ck,7,0,39.7148,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,44.36,414.81,0.0 +64,56,5120,8704,ck,5,0,80.3693,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,62.1,567.7,0.0 +64,64,5120,3072,ck,7,0,40.4736,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,49.74,409.66,0.0 +64,64,5120,8704,ck,5,0,81.9423,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,69.61,558.65,0.0 +64,72,5120,3072,ck,5,0,48.5162,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,46.68,343.95,0.0 +64,72,5120,8704,ck,5,0,92.863,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,69.1,494.58,0.0 +64,80,5120,3072,ck,5,0,49.5481,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,50.79,338.94,0.0 +64,80,5120,8704,ck,5,0,92.5285,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,77.06,498.01,0.0 +64,88,5120,3072,ck,4,0,53.2705,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,51.97,317.25,0.0 +64,88,5120,8704,ck,4,0,110.4217,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,71.03,418.68,0.0 +64,96,5120,3072,ck,4,0,52.5399,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,57.48,323.69,0.0 +64,96,5120,8704,ck,5,0,108.663,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,78.74,426.85,0.0 +64,104,5120,3072,ck,4,0,54.4798,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,60.05,314.12,0.0 +64,104,5120,8704,ck,4,0,114.0897,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,81.25,407.88,0.0 +64,112,5120,3072,ck,4,0,52.7406,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,66.8,326.5,0.0 +64,112,5120,8704,ck,5,0,115.5738,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,86.37,403.95,0.0 +64,120,5120,3072,ck,4,0,50.5377,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,74.69,342.83,0.0 +64,120,5120,8704,ck,4,0,100.01,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,106.94,468.33,0.0 +64,128,5120,3072,ck,4,0,53.0801,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,75.86,328.42,0.0 +64,128,5120,8704,ck,5,0,99.397,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,114.78,472.74,0.0 +64,136,5120,3072,ck,4,0,59.8006,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,71.54,293.29,0.0 +64,136,5120,8704,ck,4,0,135.5545,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,89.42,347.76,0.0 +64,144,5120,3072,ck,4,0,60.2411,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,75.2,292.92,0.0 +64,144,5120,8704,ck,4,0,137.3691,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,93.43,344.27,0.0 +64,152,5120,3072,ck,4,0,64.0916,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,74.6,276.98,0.0 +64,152,5120,8704,ck,5,0,136.1241,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,99.52,348.53,0.0 +64,160,5120,3072,ck,4,0,60.0399,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,83.83,297.44,0.0 +64,160,5120,8704,ck,5,0,135.6244,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,105.15,350.94,0.0 +64,168,5120,3072,ck,4,0,69.7486,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,75.77,257.57,0.0 +64,168,5120,8704,ck,5,0,146.9721,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,101.88,324.87,0.0 +64,176,5120,3072,ck,4,0,65.8057,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,84.13,274.62,0.0 +64,176,5120,8704,ck,5,0,148.4097,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,105.7,322.75,0.0 +64,184,5120,3072,ck,4,0,77.0512,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,75.12,235.92,0.0 +64,184,5120,8704,ck,5,0,173.4977,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,94.52,276.95,0.0 +64,192,5120,3072,ck,4,0,77.8566,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,77.58,234.85,0.0 +64,192,5120,8704,ck,5,0,174.1203,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,98.28,276.83,0.0 +64,200,5120,3072,ck,4,0,77.9719,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,80.69,235.87,0.0 +64,200,5120,8704,ck,4,0,181.1139,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,98.42,266.98,0.0 +64,208,5120,3072,ck,4,0,72.2924,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,90.51,255.87,0.0 +64,208,5120,8704,ck,4,0,181.636,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,102.07,267.04,0.0 +64,216,5120,3072,ck,4,0,78.12,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,86.98,238.15,0.0 +64,216,5120,8704,ck,4,0,189.0926,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,101.81,257.32,0.0 +64,224,5120,3072,ck,4,0,79.321,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,88.83,235.88,0.0 +64,224,5120,8704,ck,4,0,187.9046,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,106.25,259.75,0.0 +64,232,5120,3072,ck,4,0,74.5569,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,97.89,252.38,0.0 +64,232,5120,8704,ck,4,0,175.3156,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,117.95,279.26,0.0 +64,240,5120,3072,ck,4,0,76.0598,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,99.26,248.8,0.0 +64,240,5120,8704,ck,4,0,178.0808,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,120.12,275.78,0.0 +64,248,5120,3072,ck,4,0,68.5878,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,113.74,277.45,0.0 +64,248,5120,8704,ck,4,0,165.3605,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,133.67,297.91,0.0 +64,256,5120,3072,ck,4,0,69.8923,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,115.22,273.8,0.0 +64,256,5120,8704,ck,4,0,167.3519,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,136.34,295.27,0.0 +64,264,5120,3072,ck,4,0,81.0515,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,102.46,237.42,0.0 +64,264,5120,8704,ck,4,0,201.0811,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,117.02,246.5,0.0 +64,272,5120,3072,ck,4,0,82.2626,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,104.01,235.22,0.0 +64,272,5120,8704,ck,4,0,199.7823,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,121.35,248.86,0.0 +64,280,5120,3072,ck,4,0,92.8186,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,94.9,209.61,0.0 +64,280,5120,8704,ck,4,0,230.8247,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,108.12,216.05,0.0 +64,288,5120,3072,ck,4,0,92.6305,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,97.8,211.19,0.0 +64,288,5120,8704,ck,4,0,231.1063,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,111.07,216.44,0.0 +64,296,5120,3072,ck,4,0,100.3553,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,92.78,195.99,0.0 +64,296,5120,8704,ck,4,0,224.7026,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,117.41,223.28,0.0 +64,304,5120,3072,ck,4,0,94.1803,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,101.54,209.97,0.0 +64,304,5120,8704,ck,4,0,226.0766,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,119.85,222.59,0.0 +64,312,5120,3072,ck,4,0,106.1679,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,92.44,187.27,0.0 +64,312,5120,8704,ck,4,0,248.7446,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,111.79,202.92,0.0 +64,320,5120,3072,ck,5,0,114.9468,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,87.57,173.89,0.0 +64,320,5120,8704,ck,4,0,253.2089,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,112.64,199.94,0.0 +64,328,5120,3072,ck,4,0,94.1648,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,109.57,213.4,0.0 +64,328,5120,8704,ck,4,0,238.1408,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,122.76,213.23,0.0 +64,336,5120,3072,ck,4,0,94.1173,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,112.3,214.64,0.0 +64,336,5120,8704,ck,4,0,237.6507,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,126.01,214.3,0.0 +64,344,5120,3072,ck,4,0,109.9089,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,98.46,184.77,0.0 +64,344,5120,8704,ck,4,0,255.0689,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,120.2,200.26,0.0 +64,352,5120,3072,ck,4,0,102.297,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,108.24,199.56,0.0 +64,352,5120,8704,ck,4,0,256.2715,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,122.42,199.92,0.0 +64,360,5120,3072,ck,4,0,107.4799,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,105.37,190.93,0.0 +64,360,5120,8704,ck,4,0,278.8013,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,115.09,184.3,0.0 +64,368,5120,3072,ck,4,0,108.8708,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,106.33,189.47,0.0 +64,368,5120,8704,ck,4,0,278.4959,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,117.77,185.05,0.0 +64,376,5120,3072,ck,4,0,115.2247,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,102.65,179.94,0.0 +64,376,5120,8704,ck,4,0,257.0561,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,130.37,201.07,0.0 +64,384,5120,3072,ck,4,0,108.543,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,111.29,192.0,0.0 +64,384,5120,8704,ck,4,0,254.5963,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,134.43,203.61,0.0 +64,392,5120,3072,ck,4,0,117.56,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,104.89,178.18,0.0 +64,392,5120,8704,ck,4,0,293.4792,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,119.05,177.15,0.0 +64,400,5120,3072,ck,4,0,116.2589,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,108.23,181.09,0.0 +64,400,5120,8704,ck,4,0,293.589,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,121.43,177.6,0.0 +64,408,5120,3072,ck,4,0,116.6696,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,110.01,181.37,0.0 +64,408,5120,8704,ck,4,0,308.7743,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,117.77,169.36,0.0 +64,416,5120,3072,ck,4,0,124.735,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,104.91,170.49,0.0 +64,416,5120,8704,ck,4,0,307.2475,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,120.68,170.69,0.0 +64,424,5120,3072,ck,4,0,122.4101,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,108.96,174.6,0.0 +64,424,5120,8704,ck,4,0,312.3243,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,121.0,168.4,0.0 +64,432,5120,3072,ck,4,0,121.4453,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,111.9,176.87,0.0 +64,432,5120,8704,ck,4,0,315.9592,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,121.86,166.95,0.0 +64,440,5120,3072,ck,4,0,144.8272,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,95.57,149.05,0.0 +64,440,5120,8704,ck,4,0,328.4189,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,119.41,161.07,0.0 +64,448,5120,3072,ck,4,0,142.1904,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,99.11,152.56,0.0 +64,448,5120,8704,ck,4,0,326.8641,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,122.16,162.3,0.0 +64,456,5120,3072,ck,4,0,124.6693,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,115.06,174.85,0.0 +64,456,5120,8704,ck,4,0,331.842,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,122.48,160.33,0.0 +64,464,5120,3072,ck,4,0,126.366,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,115.51,173.35,0.0 +64,464,5120,8704,ck,4,0,332.8535,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,124.25,160.29,0.0 +64,472,5120,3072,ck,4,0,125.6154,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,118.2,175.23,0.0 +64,472,5120,8704,ck,4,0,315.8533,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,133.19,169.4,0.0 +64,480,5120,3072,ck,4,0,138.1526,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,109.3,160.1,0.0 +64,480,5120,8704,ck,4,0,318.8913,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,134.16,168.26,0.0 +64,488,5120,3072,ck,4,0,125.6216,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,122.2,176.92,0.0 +64,488,5120,8704,ck,4,0,320.4373,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,135.74,167.92,0.0 +64,496,5120,3072,ck,4,0,125.1677,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,124.66,178.41,0.0 +64,496,5120,8704,ck,4,0,315.5768,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,140.09,170.99,0.0 +64,504,5120,3072,ck,4,0,120.172,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,131.93,186.71,0.0 +64,504,5120,8704,ck,4,0,311.3322,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,144.29,173.81,0.0 +64,512,5120,3072,ck,4,0,119.8345,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,134.4,188.13,0.0 +64,512,5120,8704,ck,4,0,313.226,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,145.69,173.24,0.0 +64,1024,5120,3072,ck,4,0,240.8796,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,133.73,121.89,0.0 +64,1024,5120,8704,ck,4,0,625.6462,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,145.88,102.24,0.0 +64,2048,5120,3072,ck,4,0,474.6263,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,135.74,90.58,0.0 +64,2048,5120,8704,ck,4,0,1240.9607,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,147.09,67.18,0.0 +64,4096,5120,3072,ck,4,0,940.0359,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,137.07,74.74,0.0 +64,4096,5120,8704,ck,4,0,2539.3671,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,143.77,48.11,0.0 +64,8192,5120,3072,ck,4,0,1850.1587,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,139.28,67.44,0.0 +64,8192,5120,8704,ck,4,0,5060.5879,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,144.28,39.47,0.0 +64,16384,5120,3072,ck,4,0,3757.8692,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,137.15,62.22,0.0 +64,16384,5120,8704,ck,4,0,10235.6174,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,142.67,34.68,0.0 +64,20480,5120,3072,ck,4,0,4723.0002,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,136.41,61.05,0.0 +64,20480,5120,8704,ck,4,0,12639.3843,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,144.42,34.22,0.0 +64,1,7168,5120,ck,7,0,67.055,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,1.09,547.6,0.0 +64,2,7168,5120,ck,7,0,67.8543,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,2.16,541.44,0.0 +64,4,7168,5120,ck,7,0,67.9946,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,4.32,540.9,0.0 +64,8,7168,5120,ck,7,0,68.1301,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,8.62,540.96,0.0 +64,16,7168,5120,ck,7,0,68.9515,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,17.03,536.78,0.0 +64,24,7168,5120,ck,4,0,73.3388,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,24.02,506.79,0.0 +64,32,7168,5120,ck,5,0,74.3126,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,31.61,502.24,0.0 +64,40,7168,5120,ck,7,0,76.2882,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,38.49,491.27,0.0 +64,48,7168,5120,ck,7,0,76.1381,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,46.27,494.29,0.0 +64,56,7168,5120,ck,7,0,86.9846,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,47.25,434.44,0.0 +64,64,7168,5120,ck,7,0,85.6914,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,54.82,442.81,0.0 +64,72,7168,5120,ck,4,0,101.1864,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,52.23,376.54,0.0 +64,80,7168,5120,ck,4,0,102.3098,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,57.39,373.93,0.0 +64,88,7168,5120,ck,5,0,98.0691,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,65.86,391.69,0.0 +64,96,7168,5120,ck,4,0,96.5738,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,72.96,399.36,0.0 +64,104,7168,5120,ck,4,0,104.8764,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,72.79,369.23,0.0 +64,112,7168,5120,ck,4,0,107.0064,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,76.83,363.34,0.0 +64,120,7168,5120,ck,4,0,101.1914,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,87.04,385.75,0.0 +64,128,7168,5120,ck,4,0,102.1472,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,91.98,383.67,0.0 +64,136,7168,5120,ck,4,0,129.9391,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,76.82,302.8,0.0 +64,144,7168,5120,ck,4,0,136.7413,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,77.3,288.88,0.0 +64,152,7168,5120,ck,4,0,122.4671,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,91.1,323.82,0.0 +64,160,7168,5120,ck,4,0,120.632,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,97.35,330.04,0.0 +64,168,7168,5120,ck,4,0,130.7588,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,94.31,305.67,0.0 +64,176,7168,5120,ck,5,0,129.5027,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,99.75,309.83,0.0 +64,184,7168,5120,ck,5,0,175.1763,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,77.1,229.94,0.0 +64,192,7168,5120,ck,5,0,178.8028,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,78.82,226.15,0.0 +64,200,7168,5120,ck,4,0,150.387,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,97.62,269.91,0.0 +64,208,7168,5120,ck,4,0,153.2228,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,99.64,265.93,0.0 +64,216,7168,5120,ck,4,0,181.0379,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,87.58,225.93,0.0 +64,224,7168,5120,ck,4,0,184.7581,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,88.99,222.23,0.0 +64,232,7168,5120,ck,4,0,148.6517,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,114.56,277.25,0.0 +64,240,7168,5120,ck,4,0,152.1646,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,115.77,271.87,0.0 +64,248,7168,5120,ck,4,0,142.5502,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,127.7,291.3,0.0 +64,256,7168,5120,ck,4,0,141.3901,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,132.9,294.79,0.0 +64,264,7168,5120,ck,4,0,168.7103,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,114.86,247.98,0.0 +64,272,7168,5120,ck,4,0,169.796,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,117.58,247.31,0.0 +64,280,7168,5120,ck,4,0,221.1661,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,92.93,190.57,0.0 +64,288,7168,5120,ck,4,0,222.421,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,95.04,190.2,0.0 +64,296,7168,5120,ck,4,0,202.3839,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,107.35,209.8,0.0 +64,304,7168,5120,ck,4,0,200.3786,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,111.36,212.67,0.0 +64,312,7168,5120,ck,4,0,245.0979,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,93.44,174.5,0.0 +64,320,7168,5120,ck,4,0,247.9154,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,94.74,173.15,0.0 +64,328,7168,5120,ck,4,0,202.1131,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,119.12,213.16,0.0 +64,336,7168,5120,ck,4,0,203.4273,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,121.23,212.54,0.0 +64,344,7168,5120,ck,4,0,220.0581,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,114.74,197.19,0.0 +64,352,7168,5120,ck,4,0,220.407,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,117.22,197.58,0.0 +64,360,7168,5120,ck,4,0,261.3669,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,101.1,167.21,0.0 +64,368,7168,5120,ck,4,0,262.4339,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,102.93,167.13,0.0 +64,376,7168,5120,ck,4,0,232.2583,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,118.83,189.51,0.0 +64,384,7168,5120,ck,4,0,229.2603,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,122.94,192.67,0.0 +64,392,7168,5120,ck,4,0,269.0352,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,106.95,164.76,0.0 +64,400,7168,5120,ck,4,0,268.942,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,109.17,165.4,0.0 +64,408,7168,5120,ck,4,0,274.7943,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,108.98,162.44,0.0 +64,416,7168,5120,ck,4,0,274.4765,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,111.25,163.2,0.0 +64,424,7168,5120,ck,4,0,299.0424,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,104.07,150.31,0.0 +64,432,7168,5120,ck,4,0,292.9403,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,108.24,153.97,0.0 +64,440,7168,5120,ck,4,0,324.6209,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,99.49,139.43,0.0 +64,448,7168,5120,ck,4,0,326.2797,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,100.78,139.19,0.0 +64,456,7168,5120,ck,4,0,294.9812,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,113.47,154.49,0.0 +64,464,7168,5120,ck,4,0,292.9574,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,116.25,156.09,0.0 +64,472,7168,5120,ck,4,0,280.3803,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,123.56,163.65,0.0 +64,480,7168,5120,ck,4,0,276.2382,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,127.54,166.66,0.0 +64,488,7168,5120,ck,4,0,275.4432,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,130.04,167.71,0.0 +64,496,7168,5120,ck,4,0,277.1214,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,131.37,167.26,0.0 +64,504,7168,5120,ck,4,0,271.4847,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,136.26,171.3,0.0 +64,512,7168,5120,ck,4,0,277.2898,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,135.53,168.28,0.0 +64,1024,7168,5120,ck,4,0,537.9461,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,139.72,105.26,0.0 +64,2048,7168,5120,ck,4,0,1066.3435,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,140.97,71.78,0.0 +64,4096,7168,5120,ck,4,0,2102.2687,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,143.01,55.36,0.0 +64,8192,7168,5120,ck,4,0,4269.2965,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,140.84,45.93,0.0 +64,16384,7168,5120,ck,4,0,8458.2401,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,142.18,42.03,0.0 +64,20480,7168,5120,ck,4,0,10720.9979,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,140.21,40.59,0.0 +64,1,8192,5120,ck,7,0,76.6738,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,1.09,547.31,0.0 +64,2,8192,5120,ck,7,0,77.3314,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,2.17,542.94,0.0 +64,4,8192,5120,ck,7,0,78.0258,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,4.3,538.66,0.0 +64,8,8192,5120,ck,7,0,78.7914,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,8.52,534.51,0.0 +64,16,8192,5120,ck,7,0,79.6993,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,16.84,530.58,0.0 +64,24,8192,5120,ck,7,0,85.7176,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,23.49,495.34,0.0 +64,32,8192,5120,ck,7,0,86.2539,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,31.12,494.25,0.0 +64,40,8192,5120,ck,7,0,78.8229,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,42.57,543.03,0.0 +64,48,8192,5120,ck,7,0,79.0824,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,50.92,543.42,0.0 +64,56,8192,5120,ck,7,0,95.1865,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,49.35,453.29,0.0 +64,64,8192,5120,ck,7,0,96.0011,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,55.92,451.24,0.0 +64,72,8192,5120,ck,4,0,103.9705,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,58.09,418.3,0.0 +64,80,8192,5120,ck,4,0,104.4642,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,64.24,417.97,0.0 +64,88,8192,5120,ck,4,0,101.3989,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,72.8,432.31,0.0 +64,96,8192,5120,ck,4,0,102.0517,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,78.91,431.23,0.0 +64,104,8192,5120,ck,4,0,114.5421,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,76.17,385.7,0.0 +64,112,8192,5120,ck,4,0,116.4693,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,80.67,380.8,0.0 +64,120,8192,5120,ck,4,0,107.3381,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,93.78,414.8,0.0 +64,128,8192,5120,ck,4,0,109.1852,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,98.34,409.36,0.0 +64,136,8192,5120,ck,4,0,146.5093,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,77.87,306.24,0.0 +64,144,8192,5120,ck,4,0,148.0828,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,81.57,304.15,0.0 +64,152,8192,5120,ck,4,0,134.235,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,94.99,336.81,0.0 +64,160,8192,5120,ck,4,0,134.6065,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,99.71,337.16,0.0 +64,168,8192,5120,ck,4,0,139.8929,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,100.74,325.65,0.0 +64,176,8192,5120,ck,4,0,140.0007,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,105.46,326.63,0.0 +64,184,8192,5120,ck,4,0,183.021,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,84.33,250.79,0.0 +64,192,8192,5120,ck,4,0,184.5597,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,87.27,249.63,0.0 +64,200,8192,5120,ck,4,0,170.5095,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,98.39,271.21,0.0 +64,208,8192,5120,ck,4,0,171.1904,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,101.92,271.14,0.0 +64,216,8192,5120,ck,4,0,193.7095,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,93.54,240.5,0.0 +64,224,8192,5120,ck,4,0,200.9746,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,93.5,232.67,0.0 +64,232,8192,5120,ck,4,0,164.5046,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,118.3,285.29,0.0 +64,240,8192,5120,ck,4,0,164.1554,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,122.64,286.95,0.0 +64,248,8192,5120,ck,4,0,167.7187,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,124.04,281.88,0.0 +64,256,8192,5120,ck,4,0,168.7507,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,127.26,281.17,0.0 +64,264,8192,5120,ck,4,0,192.8451,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,114.84,246.93,0.0 +64,272,8192,5120,ck,4,0,190.4972,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,119.78,250.88,0.0 +64,280,8192,5120,ck,4,0,237.1027,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,99.06,202.29,0.0 +64,288,8192,5120,ck,4,0,235.8966,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,102.41,204.06,0.0 +64,296,8192,5120,ck,4,0,224.2165,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,110.74,215.45,0.0 +64,304,8192,5120,ck,4,0,228.9283,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,111.39,211.77,0.0 +64,312,8192,5120,ck,4,0,255.9937,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,102.24,190.05,0.0 +64,320,8192,5120,ck,4,0,259.27,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,103.54,188.31,0.0 +64,328,8192,5120,ck,4,0,219.3642,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,125.43,223.36,0.0 +64,336,8192,5120,ck,4,0,219.2196,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,128.57,224.29,0.0 +64,344,8192,5120,ck,4,0,244.0908,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,118.22,202.14,0.0 +64,352,8192,5120,ck,4,0,245.4089,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,120.32,201.75,0.0 +64,360,8192,5120,ck,4,0,280.6081,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,107.62,177.06,0.0 +64,368,8192,5120,ck,4,0,283.8888,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,108.74,175.62,0.0 +64,376,8192,5120,ck,4,0,257.9842,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,122.26,193.92,0.0 +64,384,8192,5120,ck,4,0,255.9412,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,125.86,196.14,0.0 +64,392,8192,5120,ck,4,0,307.1995,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,107.04,163.97,0.0 +64,400,8192,5120,ck,4,0,310.0359,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,108.23,163.03,0.0 +64,408,8192,5120,ck,4,0,294.0402,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,116.4,172.48,0.0 +64,416,8192,5120,ck,4,0,301.2059,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,115.86,168.95,0.0 +64,424,8192,5120,ck,4,0,311.0252,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,114.36,164.17,0.0 +64,432,8192,5120,ck,4,0,313.1823,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,115.71,163.59,0.0 +64,440,8192,5120,ck,4,0,330.955,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,111.53,155.32,0.0 +64,448,8192,5120,ck,4,0,330.5297,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,113.7,156.04,0.0 +64,456,8192,5120,ck,4,0,342.4588,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,111.7,151.11,0.0 +64,464,8192,5120,ck,4,0,347.2986,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,112.07,149.5,0.0 +64,472,8192,5120,ck,4,0,315.0676,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,125.67,165.34,0.0 +64,480,8192,5120,ck,4,0,311.5867,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,129.23,167.74,0.0 +64,488,8192,5120,ck,4,0,309.1985,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,132.4,169.59,0.0 +64,496,8192,5120,ck,4,0,305.9953,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,135.97,171.93,0.0 +64,504,8192,5120,ck,4,0,306.7255,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,137.84,172.08,0.0 +64,512,8192,5120,ck,4,0,313.9396,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,136.81,168.67,0.0 +64,1024,8192,5120,ck,4,0,617.2382,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,139.17,103.63,0.0 +64,2048,8192,5120,ck,4,0,1185.2902,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,144.94,72.54,0.0 +64,4096,8192,5120,ck,4,0,2384.1821,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,144.12,54.54,0.0 +64,8192,8192,5120,ck,4,0,4832.5317,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,142.2,45.13,0.0 +64,16384,8192,5120,ck,4,0,9644.5615,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,142.5,40.88,0.0 +64,20480,8192,5120,ck,4,0,12131.3009,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,141.62,39.76,0.0 +64,1,17408,5120,ck,7,0,160.3868,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,1.11,555.96,0.0 +64,2,17408,5120,ck,7,0,160.3319,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,2.22,556.4,0.0 +64,4,17408,5120,ck,7,0,161.3785,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,4.42,553.29,0.0 +64,8,17408,5120,ck,7,0,162.6844,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,8.77,549.83,0.0 +64,16,17408,5120,ck,7,0,164.8444,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,17.3,544.56,0.0 +64,24,17408,5120,ck,7,0,156.6888,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,27.3,574.94,0.0 +64,32,17408,5120,ck,7,0,158.1821,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,36.06,571.54,0.0 +64,40,17408,5120,ck,7,0,164.1216,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,43.45,552.8,0.0 +64,48,17408,5120,ck,7,0,165.0289,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,51.85,551.7,0.0 +64,56,17408,5120,ck,7,0,176.2058,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,56.65,518.52,0.0 +64,64,17408,5120,ck,7,0,177.4502,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,64.29,516.68,0.0 +64,72,17408,5120,ck,5,0,233.7382,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,54.91,393.62,0.0 +64,80,17408,5120,ck,7,0,234.9918,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,60.69,392.88,0.0 +64,88,17408,5120,ck,4,0,247.4265,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,63.4,374.43,0.0 +64,96,17408,5120,ck,4,0,248.3971,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,68.89,374.25,0.0 +64,104,17408,5120,ck,4,0,246.0865,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,75.33,379.06,0.0 +64,112,17408,5120,ck,4,0,247.5756,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,80.64,378.07,0.0 +64,120,17408,5120,ck,4,0,199.2878,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,107.34,471.28,0.0 +64,128,17408,5120,ck,4,0,200.7226,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,113.67,469.51,0.0 +64,136,17408,5120,ck,4,0,333.7152,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,72.65,283.36,0.0 +64,144,17408,5120,ck,4,0,337.1046,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,76.15,281.45,0.0 +64,152,17408,5120,ck,4,0,298.8233,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,90.67,318.58,0.0 +64,160,17408,5120,ck,4,0,299.8362,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,95.12,318.57,0.0 +64,168,17408,5120,ck,4,0,303.5292,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,98.66,315.75,0.0 +64,176,17408,5120,ck,5,0,304.8058,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,102.93,315.47,0.0 +64,184,17408,5120,ck,4,0,461.0282,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,71.14,209.27,0.0 +64,192,17408,5120,ck,4,0,463.7121,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,73.81,208.74,0.0 +64,200,17408,5120,ck,5,0,384.1631,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,92.8,252.8,0.0 +64,208,17408,5120,ck,5,0,388.856,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,95.35,250.57,0.0 +64,216,17408,5120,ck,5,0,483.3415,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,79.66,202.25,0.0 +64,224,17408,5120,ck,5,0,491.4845,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,81.24,199.55,0.0 +64,232,17408,5120,ck,4,0,357.6991,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,115.62,275.08,0.0 +64,240,17408,5120,ck,4,0,353.7584,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,120.94,279.04,0.0 +64,248,17408,5120,ck,4,0,339.9477,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,130.04,291.32,0.0 +64,256,17408,5120,ck,4,0,341.6366,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,133.57,290.81,0.0 +64,264,17408,5120,ck,4,0,454.4213,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,103.56,219.34,0.0 +64,272,17408,5120,ck,4,0,455.4351,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,106.46,219.55,0.0 +64,280,17408,5120,ck,5,0,581.5285,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,85.83,172.5,0.0 +64,288,17408,5120,ck,5,0,594.3946,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,86.37,169.3,0.0 +64,296,17408,5120,ck,5,0,527.8183,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,99.97,191.26,0.0 +64,304,17408,5120,ck,5,0,531.2639,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,102.0,190.62,0.0 +64,312,17408,5120,ck,4,0,729.4028,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,76.25,139.28,0.0 +64,320,17408,5120,ck,4,0,744.9058,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,76.58,136.81,0.0 +64,328,17408,5120,ck,4,0,535.0255,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,109.28,191.07,0.0 +64,336,17408,5120,ck,4,0,527.0793,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,113.64,194.56,0.0 +64,344,17408,5120,ck,5,0,592.0397,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,103.58,173.75,0.0 +64,352,17408,5120,ck,5,0,596.326,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,105.22,173.04,0.0 +64,360,17408,5120,ck,4,0,739.9626,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,86.72,139.88,0.0 +64,368,17408,5120,ck,4,0,748.4909,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,87.64,138.71,0.0 +64,376,17408,5120,ck,4,0,560.3176,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,119.62,185.87,0.0 +64,384,17408,5120,ck,4,0,568.5766,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,120.39,183.73,0.0 +64,392,17408,5120,ck,5,0,837.0589,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,83.48,125.18,0.0 +64,400,17408,5120,ck,4,0,848.4456,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,84.04,123.88,0.0 +64,408,17408,5120,ck,5,0,717.5189,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,101.36,146.93,0.0 +64,416,17408,5120,ck,5,0,731.0468,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,101.44,144.65,0.0 +64,424,17408,5120,ck,5,0,822.4876,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,91.89,128.95,0.0 +64,432,17408,5120,ck,5,0,843.1205,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,91.34,126.18,0.0 +64,440,17408,5120,ck,4,0,1019.8987,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,76.9,104.62,0.0 +64,448,17408,5120,ck,4,0,1039.569,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,76.82,102.95,0.0 +64,456,17408,5120,ck,5,0,814.7424,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,99.77,131.75,0.0 +64,464,17408,5120,ck,5,0,824.5378,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,100.31,130.57,0.0 +64,472,17408,5120,ck,5,0,773.5131,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,108.77,139.6,0.0 +64,480,17408,5120,ck,5,0,774.4636,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,110.48,139.84,0.0 +64,488,17408,5120,ck,4,0,694.9497,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,125.17,156.3,0.0 +64,496,17408,5120,ck,4,0,684.3552,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,129.2,159.18,0.0 +64,504,17408,5120,ck,4,0,655.6456,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,137.03,166.64,0.0 +64,512,17408,5120,ck,4,0,664.5802,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,137.33,164.88,0.0 +64,1024,17408,5120,ck,4,0,1292.6321,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,141.21,100.59,0.0 +64,2048,17408,5120,ck,4,0,2620.8741,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,139.29,65.21,0.0 +64,4096,17408,5120,ck,4,0,5243.782,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,139.24,48.19,0.0 +64,8192,17408,5120,ck,4,0,10636.9877,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,137.28,39.14,0.0 +64,16384,17408,5120,ck,4,0,21145.2935,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,138.12,35.16,0.0 +64,20480,17408,5120,ck,4,0,26902.2857,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,135.7,33.72,0.0 diff --git a/aiter/configs/model_configs/a8w8_blockscale_tuned_gemm_qwen3_5-35B-A3B.csv b/aiter/configs/model_configs/a8w8_blockscale_tuned_gemm_qwen3_5-35B-A3B.csv new file mode 100644 index 0000000000..5ae4fbd0db --- /dev/null +++ b/aiter/configs/model_configs/a8w8_blockscale_tuned_gemm_qwen3_5-35B-A3B.csv @@ -0,0 +1,498 @@ +cu_num,M,N,K,libtype,kernelId,splitK,us,kernelName,tflops,bw,errRatio +64,1,512,2048,ck,7,0,10.4379,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,0.2,100.75,0.0 +64,2,512,2048,ck,5,0,8.9695,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,0.47,117.59,0.0 +64,4,512,2048,ck,7,0,8.0667,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,1.04,131.51,0.0 +64,8,512,2048,ck,7,0,8.1379,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,2.06,131.87,0.0 +64,16,512,2048,ck,7,0,8.1691,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,4.11,134.38,0.0 +64,16,512,7168,ck,5,0,19.6937,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,5.96,193.01,0.0 +64,24,512,2048,ck,5,0,8.7289,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,5.77,128.57,0.0 +64,32,512,2048,ck,5,0,8.7023,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,7.71,131.79,0.0 +64,32,512,7168,ck,5,0,21.4889,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,10.93,182.99,0.0 +64,40,512,2048,ck,5,0,9.5387,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,8.79,122.81,0.0 +64,48,512,2048,ck,7,0,8.5526,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,11.77,139.84,0.0 +64,56,512,2048,ck,7,0,8.5462,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,13.74,142.82,0.0 +64,64,512,2048,ck,7,0,8.5303,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,15.73,145.97,0.0 +64,64,512,7168,ck,7,0,21.5809,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,21.77,194.35,0.0 +64,72,512,2048,ck,7,0,8.6839,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,17.39,146.22,0.0 +64,80,512,2048,ck,5,0,9.0587,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,18.52,142.88,0.0 +64,88,512,2048,ck,7,0,8.6947,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,21.23,151.69,0.0 +64,96,512,2048,ck,7,0,8.8059,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,22.86,152.57,0.0 +64,104,512,2048,ck,7,0,8.8682,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,24.59,154.27,0.0 +64,112,512,2048,ck,7,0,8.9047,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,26.38,156.39,0.0 +64,120,512,2048,ck,7,0,8.9783,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,28.03,157.85,0.0 +64,128,512,2048,ck,5,0,9.3691,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,28.65,153.89,0.0 +64,128,512,7168,ck,7,0,22.2689,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,42.19,211.89,0.0 +64,136,512,2048,ck,4,0,10.4347,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,27.33,140.53,0.0 +64,144,512,2048,ck,5,0,9.7955,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,30.83,152.21,0.0 +64,152,512,2048,ck,5,0,10.1127,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,31.52,149.86,0.0 +64,160,512,2048,ck,5,0,9.8719,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,33.99,156.01,0.0 +64,168,512,2048,ck,5,0,10.1304,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,34.78,154.45,0.0 +64,176,512,2048,ck,5,0,10.1107,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,36.51,157.18,0.0 +64,184,512,2048,ck,4,0,10.7847,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,35.78,149.64,0.0 +64,192,512,2048,ck,5,0,10.1391,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,39.71,161.59,0.0 +64,200,512,2048,ck,5,0,10.4691,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,40.06,158.85,0.0 +64,208,512,2048,ck,5,0,10.2647,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,42.5,164.4,0.0 +64,216,512,2048,ck,5,0,10.45,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,43.35,163.84,0.0 +64,224,512,2048,ck,5,0,10.3259,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,45.49,168.19,0.0 +64,232,512,2048,ck,5,0,10.5303,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,46.2,167.26,0.0 +64,240,512,2048,ck,7,0,12.408,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,40.56,143.93,0.0 +64,248,512,2048,ck,5,0,10.568,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,49.21,171.31,0.0 +64,256,512,2048,ck,5,0,10.2971,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,52.14,178.21,0.0 +64,256,512,7168,ck,5,0,25.7121,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,73.08,224.3,0.0 +64,264,512,2048,ck,5,0,11.4352,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,48.42,162.62,0.0 +64,272,512,2048,ck,5,0,11.5199,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,49.52,163.56,0.0 +64,280,512,2048,ck,5,0,11.4795,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,51.15,166.27,0.0 +64,288,512,2048,ck,5,0,11.5487,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,52.3,167.41,0.0 +64,296,512,2048,ck,5,0,11.5739,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,53.63,169.16,0.0 +64,304,512,2048,ck,5,0,11.6232,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,54.85,170.56,0.0 +64,312,512,2048,ck,5,0,11.7611,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,55.63,170.65,0.0 +64,320,512,2048,ck,5,0,11.6451,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,57.63,174.46,0.0 +64,328,512,2048,ck,5,0,10.5267,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,65.34,195.33,0.0 +64,336,512,2048,ck,5,0,11.8467,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,59.48,175.64,0.0 +64,344,512,2048,ck,5,0,12.6728,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,56.93,166.13,0.0 +64,352,512,2048,ck,5,0,11.946,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,61.79,178.3,0.0 +64,360,512,2048,ck,4,0,13.8909,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,54.35,155.1,0.0 +64,368,512,2048,ck,5,0,12.034,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,64.13,181.08,0.0 +64,376,512,2048,ck,4,0,13.3212,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,59.19,165.42,0.0 +64,384,512,2048,ck,5,0,12.0636,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,66.76,184.71,0.0 +64,392,512,2048,ck,4,0,13.9527,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,58.92,161.46,0.0 +64,400,512,2048,ck,5,0,15.5927,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,53.8,146.05,0.0 +64,408,512,2048,ck,4,0,13.7392,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,62.28,167.55,0.0 +64,416,512,2048,ck,5,0,13.9519,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,62.53,166.75,0.0 +64,424,512,2048,ck,4,0,13.894,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,64.0,169.22,0.0 +64,432,512,2048,ck,4,0,13.9791,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,64.81,169.95,0.0 +64,440,512,2048,ck,4,0,13.8468,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,66.64,173.34,0.0 +64,448,512,2048,ck,5,0,13.8824,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,67.68,174.67,0.0 +64,456,512,2048,ck,5,0,14.176,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,67.46,172.79,0.0 +64,464,512,2048,ck,5,0,14.486,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,67.17,170.78,0.0 +64,472,512,2048,ck,4,0,14.3331,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,69.06,174.32,0.0 +64,480,512,2048,ck,5,0,13.3496,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,75.41,189.0,0.0 +64,488,512,2048,ck,4,0,13.1844,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,77.62,193.24,0.0 +64,496,512,2048,ck,5,0,12.9779,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,80.15,198.21,0.0 +64,504,512,2048,ck,4,0,13.7907,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,76.64,188.31,0.0 +64,512,512,2048,ck,5,0,13.9824,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,76.79,187.48,0.0 +64,512,512,7168,ck,4,0,37.4301,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,100.4,210.11,0.0 +64,1024,512,2048,ck,5,0,22.7933,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,94.22,184.01,0.0 +64,1024,512,7168,ck,5,0,70.906,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,106.0,170.06,0.0 +64,1536,512,7168,ck,4,0,83.1748,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,135.55,195.41,0.0 +64,2048,512,2048,ck,4,0,42.2697,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,101.61,173.65,0.0 +64,2048,512,7168,ck,4,0,116.6605,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,128.86,175.27,0.0 +64,4096,512,2048,ck,4,0,76.8486,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,111.78,177.38,0.0 +64,4096,512,7168,ck,4,0,213.7524,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,140.65,174.15,0.0 +64,8192,512,2048,ck,4,0,131.9558,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,130.19,198.66,0.0 +64,8192,512,7168,ck,4,0,417.0123,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,144.19,169.73,0.0 +64,16384,512,2048,ck,4,0,265.9049,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,129.22,193.23,0.0 +64,16384,512,7168,ck,4,0,818.9168,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,146.85,168.38,0.0 +64,20480,512,2048,ck,4,0,331.4829,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,129.57,192.96,0.0 +64,20480,512,7168,ck,4,0,1020.2685,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,147.34,168.04,0.0 +64,16,576,7168,ck,7,0,25.2051,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,5.24,169.09,0.0 +64,32,576,7168,ck,7,0,27.8155,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,9.5,158.01,0.0 +64,64,576,7168,ck,7,0,28.1682,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,18.76,165.48,0.0 +64,128,576,7168,ck,5,0,31.7294,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,33.31,163.69,0.0 +64,256,576,7168,ck,5,0,42.8623,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,49.32,146.02,0.0 +64,512,576,7168,ck,5,0,66.3525,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,63.72,126.42,0.0 +64,1024,576,7168,ck,5,0,122.1949,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,69.2,103.51,0.0 +64,1536,576,7168,ck,5,0,172.8024,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,73.4,97.85,0.0 +64,2048,576,7168,ck,5,0,228.4508,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,74.03,92.66,0.0 +64,4096,576,7168,ck,5,0,447.9123,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,75.51,85.3,0.0 +64,8192,576,7168,ck,5,0,877.6341,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,77.08,82.36,0.0 +64,16384,576,7168,ck,5,0,1711.816,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,79.03,82.04,0.0 +64,20480,576,7168,ck,5,0,2132.8093,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,79.29,81.83,0.0 +64,128,1024,4096,ck,5,0,16.8606,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,63.68,295.41,0.0 +64,16,1536,7168,ck,7,0,25.6659,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,13.73,435.36,0.0 +64,32,1536,7168,ck,7,0,25.6437,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,27.48,442.13,0.0 +64,64,1536,7168,ck,5,0,33.7328,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,41.78,345.82,0.0 +64,128,1536,7168,ck,5,0,36.4975,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,77.23,337.58,0.0 +64,256,1536,7168,ck,4,0,53.0244,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,106.31,257.08,0.0 +64,512,1536,7168,ck,4,0,86.4729,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,130.38,187.95,0.0 +64,1024,1536,7168,ck,4,0,164.6679,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,136.93,130.54,0.0 +64,1536,1536,7168,ck,4,0,239.2314,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,141.38,111.77,0.0 +64,2048,1536,7168,ck,4,0,313.7759,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,143.72,101.92,0.0 +64,4096,1536,7168,ck,4,0,642.2216,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,140.44,82.45,0.0 +64,8192,1536,7168,ck,4,0,1218.5593,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,148.03,77.88,0.0 +64,16384,1536,7168,ck,4,0,2464.8862,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,146.37,72.53,0.0 +64,20480,1536,7168,ck,4,0,3096.6507,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,145.63,71.28,0.0 +64,1,2048,256,ck,7,0,3.218,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,0.33,164.28,0.0 +64,1,2048,2048,ck,7,0,12.39,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,0.68,339.02,0.0 +64,2,2048,256,ck,7,0,2.7482,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,0.76,193.94,0.0 +64,2,2048,2048,ck,7,0,11.5069,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,1.46,365.57,0.0 +64,4,2048,256,ck,5,0,2.8438,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,1.47,190.48,0.0 +64,4,2048,2048,ck,7,0,11.4914,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,2.92,367.13,0.0 +64,8,2048,256,ck,7,0,3.238,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,2.59,172.67,0.0 +64,8,2048,2048,ck,7,0,11.4504,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,5.86,370.59,0.0 +64,16,2048,256,ck,7,0,3.3793,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,4.96,175.75,0.0 +64,16,2048,2048,ck,7,0,11.4601,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,11.71,374.57,0.0 +64,24,2048,256,ck,5,0,3.6475,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,6.9,172.37,0.0 +64,24,2048,2048,ck,7,0,12.2285,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,16.46,355.05,0.0 +64,32,2048,256,ck,7,0,3.3543,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,10.0,197.82,0.0 +64,32,2048,2048,ck,7,0,12.0861,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,22.21,363.3,0.0 +64,40,2048,256,ck,4,0,6.9311,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,6.05,100.76,0.0 +64,40,2048,2048,ck,7,0,13.8578,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,24.21,320.4,0.0 +64,48,2048,256,ck,5,0,4.0333,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,12.48,181.78,0.0 +64,48,2048,2048,ck,7,0,13.969,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,28.82,321.37,0.0 +64,56,2048,256,ck,5,0,4.0432,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,14.52,189.95,0.0 +64,56,2048,2048,ck,7,0,13.593,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,34.56,333.88,0.0 +64,64,2048,256,ck,5,0,4.3025,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,15.6,186.59,0.0 +64,64,2048,2048,ck,7,0,13.6859,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,39.23,335.2,0.0 +64,72,2048,256,ck,7,0,4.5555,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,16.57,183.87,0.0 +64,72,2048,2048,ck,5,0,15.8397,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,38.13,292.72,0.0 +64,80,2048,256,ck,5,0,4.8103,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,17.44,181.37,0.0 +64,80,2048,2048,ck,5,0,14.6585,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,45.78,319.67,0.0 +64,88,2048,256,ck,7,0,4.4529,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,20.72,203.75,0.0 +64,88,2048,2048,ck,5,0,14.9206,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,49.48,317.34,0.0 +64,96,2048,256,ck,7,0,4.4844,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,22.45,210.08,0.0 +64,96,2048,2048,ck,5,0,16.233,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,49.61,294.72,0.0 +64,104,2048,256,ck,7,0,4.9629,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,21.97,196.84,0.0 +64,104,2048,2048,ck,5,0,15.779,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,55.29,306.31,0.0 +64,112,2048,256,ck,7,0,5.0764,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,23.13,199.3,0.0 +64,112,2048,2048,ck,4,0,16.8734,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,55.68,289.36,0.0 +64,120,2048,256,ck,5,0,4.8226,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,26.09,217.0,0.0 +64,120,2048,2048,ck,5,0,16.0642,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,62.66,306.99,0.0 +64,128,2048,256,ck,7,0,5.2336,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,25.65,206.62,0.0 +64,128,2048,2048,ck,5,0,16.4074,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,65.44,303.57,0.0 +64,136,2048,256,ck,5,0,5.8396,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,24.42,191.14,0.0 +64,136,2048,2048,ck,5,0,17.5211,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,65.11,287.08,0.0 +64,144,2048,256,ck,7,0,5.8184,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,25.95,197.82,0.0 +64,144,2048,2048,ck,5,0,17.7004,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,68.24,286.94,0.0 +64,152,2048,256,ck,5,0,5.6989,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,27.97,208.07,0.0 +64,152,2048,2048,ck,5,0,17.4914,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,72.9,293.18,0.0 +64,160,2048,256,ck,5,0,5.767,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,29.09,211.65,0.0 +64,160,2048,2048,ck,5,0,17.5621,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,76.42,294.8,0.0 +64,168,2048,256,ck,7,0,6.1672,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,28.56,203.56,0.0 +64,168,2048,2048,ck,4,0,18.7568,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,75.13,278.65,0.0 +64,176,2048,256,ck,5,0,6.3305,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,29.15,203.81,0.0 +64,176,2048,2048,ck,4,0,18.9101,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,78.07,278.99,0.0 +64,184,2048,256,ck,7,0,6.2998,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,30.63,210.33,0.0 +64,184,2048,2048,ck,5,0,19.6285,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,78.64,271.28,0.0 +64,192,2048,256,ck,7,0,6.4041,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,31.44,212.34,0.0 +64,192,2048,2048,ck,5,0,19.9932,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,80.56,268.79,0.0 +64,200,2048,256,ck,7,0,6.5945,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,31.8,211.49,0.0 +64,200,2048,2048,ck,5,0,23.2685,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,72.1,233.07,0.0 +64,208,2048,256,ck,7,0,6.7368,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,32.37,212.19,0.0 +64,208,2048,2048,ck,5,0,23.8354,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,73.2,229.59,0.0 +64,216,2048,256,ck,7,0,6.7891,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,33.36,215.69,0.0 +64,216,2048,2048,ck,5,0,25.3111,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,71.59,218.14,0.0 +64,224,2048,256,ck,7,0,6.9045,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,34.02,217.12,0.0 +64,224,2048,2048,ck,5,0,25.2653,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,74.37,220.48,0.0 +64,232,2048,256,ck,5,0,7.1914,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,33.83,213.3,0.0 +64,232,2048,2048,ck,5,0,25.1212,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,77.47,223.7,0.0 +64,240,2048,256,ck,5,0,7.0217,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,35.84,223.42,0.0 +64,240,2048,2048,ck,5,0,25.2065,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,79.87,224.9,0.0 +64,248,2048,256,ck,5,0,6.7561,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,38.49,237.35,0.0 +64,248,2048,2048,ck,5,0,23.8498,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,87.23,239.75,0.0 +64,256,2048,256,ck,5,0,6.9801,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,38.46,234.72,0.0 +64,256,2048,2048,ck,5,0,24.0041,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,89.46,240.26,0.0 +64,264,2048,256,ck,5,0,7.6262,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,36.3,219.4,0.0 +64,264,2048,2048,ck,5,0,26.923,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,82.26,216.04,0.0 +64,272,2048,256,ck,5,0,7.6995,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,37.04,221.84,0.0 +64,272,2048,2048,ck,5,0,28.8829,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,79.0,203.08,0.0 +64,280,2048,256,ck,7,0,8.1211,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,36.15,214.61,0.0 +64,280,2048,2048,ck,5,0,29.2356,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,80.34,202.31,0.0 +64,288,2048,256,ck,5,0,8.3939,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,35.98,211.78,0.0 +64,288,2048,2048,ck,5,0,29.7398,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,81.24,200.53,0.0 +64,296,2048,256,ck,5,0,8.5529,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,36.29,211.91,0.0 +64,296,2048,2048,ck,5,0,29.8122,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,83.29,201.69,0.0 +64,304,2048,256,ck,7,0,8.4051,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,37.93,219.78,0.0 +64,304,2048,2048,ck,5,0,29.9148,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,85.25,202.64,0.0 +64,312,2048,256,ck,7,0,8.6367,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,37.88,217.92,0.0 +64,312,2048,2048,ck,5,0,32.2663,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,81.11,189.4,0.0 +64,320,2048,256,ck,5,0,8.7431,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,38.38,219.25,0.0 +64,320,2048,2048,ck,5,0,32.5607,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,82.44,189.2,0.0 +64,328,2048,256,ck,5,0,8.7899,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,39.13,222.04,0.0 +64,328,2048,2048,ck,5,0,32.5465,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,84.54,190.79,0.0 +64,336,2048,256,ck,5,0,8.7335,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,40.34,227.46,0.0 +64,336,2048,2048,ck,5,0,33.1155,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,85.11,189.0,0.0 +64,344,2048,256,ck,7,0,9.2273,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,39.09,219.06,0.0 +64,344,2048,2048,ck,5,0,34.3755,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,83.95,183.5,0.0 +64,352,2048,256,ck,5,0,9.0859,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,40.62,226.31,0.0 +64,352,2048,2048,ck,5,0,34.2199,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,86.29,185.77,0.0 +64,360,2048,256,ck,5,0,9.6983,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,38.92,215.61,0.0 +64,360,2048,2048,ck,4,0,34.9894,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,86.31,183.09,0.0 +64,368,2048,256,ck,5,0,9.6687,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,39.91,219.87,0.0 +64,368,2048,2048,ck,4,0,35.059,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,88.05,184.13,0.0 +64,376,2048,256,ck,5,0,9.3422,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,42.2,231.28,0.0 +64,376,2048,2048,ck,5,0,35.5759,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,88.66,182.83,0.0 +64,384,2048,256,ck,5,0,9.5223,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,42.29,230.56,0.0 +64,384,2048,2048,ck,4,0,35.5943,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,90.5,184.12,0.0 +64,392,2048,256,ck,5,0,10.1043,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,40.68,220.73,0.0 +64,392,2048,2048,ck,5,0,39.0868,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,84.13,168.93,0.0 +64,400,2048,256,ck,5,0,10.1839,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,41.19,222.42,0.0 +64,400,2048,2048,ck,5,0,38.6522,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,86.81,172.1,0.0 +64,408,2048,256,ck,5,0,10.1271,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,42.24,227.1,0.0 +64,408,2048,2048,ck,5,0,39.0666,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,87.61,171.53,0.0 +64,416,2048,256,ck,5,0,10.1383,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,43.03,230.29,0.0 +64,416,2048,2048,ck,5,0,38.9967,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,89.49,173.1,0.0 +64,424,2048,256,ck,5,0,10.6291,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,41.83,222.93,0.0 +64,424,2048,2048,ck,5,0,40.9917,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,86.77,165.87,0.0 +64,432,2048,256,ck,7,0,10.7703,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,42.06,223.24,0.0 +64,432,2048,2048,ck,5,0,40.4778,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,89.53,169.19,0.0 +64,440,2048,256,ck,5,0,10.7499,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,42.92,226.9,0.0 +64,440,2048,2048,ck,5,0,41.8265,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,88.25,164.91,0.0 +64,448,2048,256,ck,4,0,11.2083,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,41.91,220.73,0.0 +64,448,2048,2048,ck,5,0,42.0404,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,89.39,165.24,0.0 +64,456,2048,256,ck,5,0,11.0796,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,43.16,226.43,0.0 +64,456,2048,2048,ck,5,0,42.5618,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,89.87,164.37,0.0 +64,464,2048,256,ck,5,0,11.1379,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,43.68,228.37,0.0 +64,464,2048,2048,ck,5,0,42.954,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,90.62,164.02,0.0 +64,472,2048,256,ck,5,0,10.8839,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,45.47,236.9,0.0 +64,472,2048,2048,ck,5,0,40.9894,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,96.6,173.08,0.0 +64,480,2048,256,ck,5,0,11.2177,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,44.87,232.96,0.0 +64,480,2048,2048,ck,5,0,41.7296,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,96.49,171.18,0.0 +64,488,2048,256,ck,5,0,10.9083,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,46.91,242.76,0.0 +64,488,2048,2048,ck,4,0,41.6525,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,98.28,172.68,0.0 +64,496,2048,256,ck,5,0,10.9664,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,47.43,244.65,0.0 +64,496,2048,2048,ck,4,0,41.9262,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,99.24,172.73,0.0 +64,504,2048,256,ck,5,0,11.648,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,45.37,233.32,0.0 +64,504,2048,2048,ck,5,0,41.8391,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,101.05,174.26,0.0 +64,512,2048,256,ck,5,0,10.7788,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,49.81,255.36,0.0 +64,512,2048,2048,ck,5,0,42.2431,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,101.67,173.76,0.0 +64,1024,2048,256,ck,5,0,19.2941,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,55.65,258.15,0.0 +64,1024,2048,2048,ck,4,0,70.2808,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,122.22,149.2,0.0 +64,2048,2048,256,ck,5,0,35.1713,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,61.06,268.32,0.0 +64,2048,2048,2048,ck,4,0,134.0017,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,128.21,125.2,0.0 +64,4096,2048,256,ck,5,0,65.9647,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,65.11,278.18,0.0 +64,4096,2048,2048,ck,4,0,263.3279,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,130.48,111.5,0.0 +64,8192,2048,256,ck,4,0,124.1623,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,69.18,291.36,0.0 +64,8192,2048,2048,ck,4,0,520.1263,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,132.12,104.83,0.0 +64,16384,2048,256,ck,5,0,235.6501,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,72.9,304.81,0.0 +64,16384,2048,2048,ck,4,0,1034.023,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,132.92,101.41,0.0 +64,20480,2048,256,ck,5,0,293.0381,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,73.28,305.94,0.0 +64,20480,2048,2048,ck,4,0,1287.5347,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,133.43,100.99,0.0 +64,1,2560,2048,ck,7,0,14.4911,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,0.72,362.29,0.0 +64,2,2560,2048,ck,7,0,13.2125,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,1.59,397.9,0.0 +64,4,2560,2048,ck,7,0,13.2684,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,3.16,397.3,0.0 +64,8,2560,2048,ck,7,0,13.3721,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,6.27,396.36,0.0 +64,16,2560,2048,ck,7,0,13.5109,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,12.42,396.54,0.0 +64,24,2560,2048,ck,7,0,15.002,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,16.77,360.95,0.0 +64,32,2560,2048,ck,7,0,15.2073,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,22.06,359.84,0.0 +64,40,2560,2048,ck,7,0,14.9051,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,28.14,370.99,0.0 +64,48,2560,2048,ck,7,0,14.9905,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,33.58,372.7,0.0 +64,56,2560,2048,ck,5,0,16.3004,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,36.02,346.27,0.0 +64,64,2560,2048,ck,4,0,18.6764,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,35.93,305.29,0.0 +64,72,2560,2048,ck,5,0,17.5906,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,42.92,327.39,0.0 +64,80,2560,2048,ck,4,0,18.5466,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,45.23,313.61,0.0 +64,88,2560,2048,ck,5,0,17.5771,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,52.5,334.17,0.0 +64,96,2560,2048,ck,5,0,17.9183,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,56.18,331.0,0.0 +64,104,2560,2048,ck,5,0,19.1607,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,56.91,312.53,0.0 +64,112,2560,2048,ck,5,0,19.769,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,59.41,305.82,0.0 +64,120,2560,2048,ck,5,0,19.6431,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,64.06,310.7,0.0 +64,128,2560,2048,ck,4,0,20.135,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,66.66,305.95,0.0 +64,136,2560,2048,ck,4,0,20.2559,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,70.4,306.96,0.0 +64,144,2560,2048,ck,4,0,20.3461,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,74.21,308.42,0.0 +64,152,2560,2048,ck,5,0,24.0417,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,66.29,263.39,0.0 +64,160,2560,2048,ck,5,0,24.1438,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,69.49,264.65,0.0 +64,168,2560,2048,ck,5,0,26.5122,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,66.45,243.17,0.0 +64,176,2560,2048,ck,5,0,26.8536,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,68.72,242.22,0.0 +64,184,2560,2048,ck,5,0,29.9102,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,64.51,219.38,0.0 +64,192,2560,2048,ck,5,0,30.9032,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,65.15,214.19,0.0 +64,200,2560,2048,ck,5,0,31.3765,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,66.84,212.79,0.0 +64,208,2560,2048,ck,4,0,32.6094,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,66.88,206.5,0.0 +64,216,2560,2048,ck,5,0,32.6098,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,69.46,208.26,0.0 +64,224,2560,2048,ck,5,0,32.4252,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,72.44,211.21,0.0 +64,232,2560,2048,ck,5,0,33.0569,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,73.59,208.91,0.0 +64,240,2560,2048,ck,5,0,34.3665,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,73.23,202.62,0.0 +64,248,2560,2048,ck,5,0,27.542,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,94.42,254.9,0.0 +64,256,2560,2048,ck,5,0,27.9832,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,95.93,252.93,0.0 +64,272,2560,2048,ck,4,0,36.3576,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,78.45,197.83,0.0 +64,288,2560,2048,ck,4,0,37.6533,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,80.2,194.07,0.0 +64,304,2560,2048,ck,4,0,37.611,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,84.75,197.33,0.0 +64,320,2560,2048,ck,5,0,42.4025,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,79.13,177.74,0.0 +64,336,2560,2048,ck,5,0,43.7114,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,80.6,175.04,0.0 +64,352,2560,2048,ck,5,0,45.0782,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,81.88,172.28,0.0 +64,368,2560,2048,ck,5,0,46.3952,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,83.17,169.86,0.0 +64,384,2560,2048,ck,4,0,45.1242,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,89.23,177.19,0.0 +64,400,2560,2048,ck,4,0,49.2663,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,85.14,164.62,0.0 +64,416,2560,2048,ck,5,0,50.1851,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,86.92,163.89,0.0 +64,432,2560,2048,ck,5,0,52.3525,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,86.53,159.29,0.0 +64,448,2560,2048,ck,4,0,52.7866,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,88.99,160.16,0.0 +64,464,2560,2048,ck,5,0,54.8,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,88.78,156.37,0.0 +64,480,2560,2048,ck,5,0,52.8143,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,95.3,164.42,0.0 +64,496,2560,2048,ck,4,0,52.4525,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,99.16,167.74,0.0 +64,512,2560,2048,ck,5,0,51.7684,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,103.71,172.17,0.0 +64,8192,2560,2048,ck,4,0,648.9185,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,132.37,98.57,0.0 +64,128,4096,1280,ck,5,0,15.6544,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,85.74,412.36,0.0 +64,1,4608,2048,ck,7,0,21.8652,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,0.86,432.12,0.0 +64,2,4608,2048,ck,7,0,22.0835,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,1.71,428.36,0.0 +64,4,4608,2048,ck,7,0,22.2302,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,3.4,426.55,0.0 +64,8,4608,2048,ck,7,0,22.3048,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,6.77,427.14,0.0 +64,16,4608,2048,ck,7,0,22.5907,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,13.37,425.72,0.0 +64,16,4608,7168,ck,7,0,61.196,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,17.27,544.03,0.0 +64,24,4608,2048,ck,7,0,24.6725,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,18.36,393.46,0.0 +64,32,4608,2048,ck,7,0,24.7515,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,24.4,395.84,0.0 +64,32,4608,7168,ck,5,0,65.4147,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,32.32,512.95,0.0 +64,40,4608,2048,ck,5,0,26.7606,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,28.21,369.49,0.0 +64,48,4608,2048,ck,4,0,27.1449,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,33.38,367.58,0.0 +64,56,4608,2048,ck,5,0,26.7624,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,39.49,376.2,0.0 +64,64,4608,2048,ck,5,0,28.055,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,43.06,362.08,0.0 +64,64,4608,7168,ck,5,0,70.5849,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,59.9,482.8,0.0 +64,72,4608,2048,ck,4,0,28.1445,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,48.28,364.13,0.0 +64,80,4608,2048,ck,4,0,28.4329,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,53.11,363.6,0.0 +64,88,4608,2048,ck,5,0,36.0626,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,46.06,289.18,0.0 +64,96,4608,2048,ck,5,0,35.8703,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,50.51,293.24,0.0 +64,104,4608,2048,ck,5,0,37.595,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,52.21,282.18,0.0 +64,112,4608,2048,ck,5,0,39.1637,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,53.98,273.18,0.0 +64,120,4608,2048,ck,5,0,36.3764,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,62.26,296.59,0.0 +64,128,4608,2048,ck,5,0,37.1152,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,65.09,293.11,0.0 +64,128,4608,7168,ck,5,0,97.1681,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,87.02,361.51,0.0 +64,136,4608,2048,ck,4,0,45.3808,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,56.56,241.71,0.0 +64,144,4608,2048,ck,4,0,45.6162,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,59.58,242.44,0.0 +64,152,4608,2048,ck,5,0,41.2897,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,69.48,270.03,0.0 +64,160,4608,2048,ck,4,0,41.3532,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,73.03,271.79,0.0 +64,168,4608,2048,ck,5,0,45.6617,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,69.44,248.12,0.0 +64,176,4608,2048,ck,5,0,45.576,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,72.89,250.56,0.0 +64,184,4608,2048,ck,4,0,53.2196,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,65.26,216.27,0.0 +64,192,4608,2048,ck,4,0,53.7164,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,67.46,215.95,0.0 +64,200,4608,2048,ck,4,0,53.3531,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,70.75,219.11,0.0 +64,208,4608,2048,ck,4,0,53.7077,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,73.1,219.34,0.0 +64,216,4608,2048,ck,4,0,56.0011,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,72.8,211.96,0.0 +64,224,4608,2048,ck,4,0,57.1927,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,73.92,209.12,0.0 +64,232,4608,2048,ck,4,0,52.4809,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,83.44,229.62,0.0 +64,240,4608,2048,ck,4,0,52.5518,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,86.2,231.02,0.0 +64,248,4608,2048,ck,4,0,46.7871,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,100.05,261.41,0.0 +64,256,4608,2048,ck,5,0,46.6065,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,103.67,264.36,0.0 +64,256,4608,7168,ck,4,0,129.3305,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,130.76,287.82,0.0 +64,264,4608,2048,ck,5,0,58.4321,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,85.28,212.4,0.0 +64,272,4608,2048,ck,5,0,59.044,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,86.95,211.72,0.0 +64,280,4608,2048,ck,4,0,66.6974,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,79.24,188.78,0.0 +64,288,4608,2048,ck,4,0,67.7728,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,80.21,187.11,0.0 +64,296,4608,2048,ck,4,0,65.8927,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,84.79,193.82,0.0 +64,304,4608,2048,ck,4,0,66.4709,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,86.32,193.49,0.0 +64,312,4608,2048,ck,4,0,72.7388,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,80.96,178.06,0.0 +64,320,4608,2048,ck,4,0,72.6712,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,83.11,179.46,0.0 +64,328,4608,2048,ck,4,0,61.6997,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,100.34,212.83,0.0 +64,336,4608,2048,ck,4,0,63.5835,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,99.74,207.95,0.0 +64,344,4608,2048,ck,4,0,67.6465,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,95.98,196.79,0.0 +64,352,4608,2048,ck,4,0,68.7362,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,96.66,194.98,0.0 +64,360,4608,2048,ck,4,0,68.5411,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,99.13,196.85,0.0 +64,368,4608,2048,ck,4,0,69.4557,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,100.0,195.55,0.0 +64,376,4608,2048,ck,4,0,71.4828,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,99.28,191.27,0.0 +64,384,4608,2048,ck,4,0,72.0933,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,100.53,190.9,0.0 +64,392,4608,2048,ck,4,0,78.0831,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,94.75,177.41,0.0 +64,400,4608,2048,ck,4,0,85.573,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,88.23,162.93,0.0 +64,408,4608,2048,ck,4,0,74.8713,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,102.85,187.43,0.0 +64,416,4608,2048,ck,4,0,75.1986,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,104.41,187.81,0.0 +64,424,4608,2048,ck,4,0,79.1491,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,101.11,179.57,0.0 +64,432,4608,2048,ck,4,0,80.8701,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,100.82,176.87,0.0 +64,440,4608,2048,ck,4,0,84.0773,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,98.77,171.19,0.0 +64,448,4608,2048,ck,4,0,85.0858,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,99.38,170.22,0.0 +64,456,4608,2048,ck,5,0,90.1317,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,95.49,161.69,0.0 +64,464,4608,2048,ck,4,0,84.2516,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,103.95,174.05,0.0 +64,472,4608,2048,ck,5,0,86.5888,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,102.89,170.39,0.0 +64,480,4608,2048,ck,4,0,80.6333,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,112.36,184.09,0.0 +64,488,4608,2048,ck,4,0,78.5308,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,117.29,190.17,0.0 +64,496,4608,2048,ck,5,0,82.9216,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,112.9,181.18,0.0 +64,504,4608,2048,ck,5,0,80.9207,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,117.56,186.78,0.0 +64,512,4608,2048,ck,4,0,75.5376,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,127.93,201.28,0.0 +64,512,4608,7168,ck,4,0,245.3308,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,137.87,168.83,0.0 +64,1024,4608,2048,ck,4,0,146.3894,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,132.03,143.26,0.0 +64,1024,4608,7168,ck,4,0,470.4402,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,143.79,105.87,0.0 +64,1536,4608,7168,ck,4,0,706.1958,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,143.68,82.41,0.0 +64,2048,4608,2048,ck,4,0,288.709,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,133.89,112.59,0.0 +64,2048,4608,7168,ck,4,0,925.3082,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,146.21,71.96,0.0 +64,4096,4608,2048,ck,4,0,590.6784,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,130.88,94.09,0.0 +64,4096,4608,7168,ck,4,0,1856.4598,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,145.75,53.94,0.0 +64,8192,4608,2048,ck,4,0,1144.8114,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,135.06,88.85,0.0 +64,8192,4608,7168,ck,4,0,3706.059,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,146.02,45.13,0.0 +64,16384,4608,2048,ck,4,0,2321.4591,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,133.21,83.56,0.0 +64,16384,4608,7168,ck,4,0,7509.3088,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,144.13,40.15,0.0 +64,20480,4608,2048,ck,4,0,2907.8671,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,132.93,82.58,0.0 +64,20480,4608,7168,ck,4,0,9434.4568,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,143.4,39.07,0.0 +64,1,6144,2048,ck,7,0,28.1272,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,0.89,447.87,0.0 +64,2,6144,2048,ck,4,0,30.3999,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,1.66,414.86,0.0 +64,4,6144,2048,ck,7,0,28.4451,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,3.54,444.37,0.0 +64,8,6144,2048,ck,7,0,28.6326,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,7.03,443.47,0.0 +64,16,6144,2048,ck,7,0,28.8897,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,13.94,443.49,0.0 +64,24,6144,2048,ck,7,0,31.2273,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,19.34,413.96,0.0 +64,32,6144,2048,ck,7,0,31.7942,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,25.33,410.19,0.0 +64,40,6144,2048,ck,7,0,32.5022,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,30.97,404.78,0.0 +64,48,6144,2048,ck,5,0,36.0501,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,33.51,368.13,0.0 +64,56,6144,2048,ck,7,0,32.2193,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,43.74,415.46,0.0 +64,64,6144,2048,ck,7,0,32.2696,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,49.91,418.36,0.0 +64,72,6144,2048,ck,4,0,44.6024,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,40.62,305.25,0.0 +64,80,6144,2048,ck,7,0,43.7239,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,46.04,314.01,0.0 +64,88,6144,2048,ck,4,0,44.7597,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,49.48,309.31,0.0 +64,96,6144,2048,ck,4,0,44.8335,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,53.89,311.36,0.0 +64,104,6144,2048,ck,4,0,43.575,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,60.06,322.98,0.0 +64,112,6144,2048,ck,5,0,45.7558,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,61.6,310.09,0.0 +64,120,6144,2048,ck,4,0,41.5737,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,72.64,344.05,0.0 +64,128,6144,2048,ck,4,0,42.0674,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,76.57,342.73,0.0 +64,136,6144,2048,ck,5,0,56.7687,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,60.29,256.0,0.0 +64,144,6144,2048,ck,5,0,57.7782,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,62.72,253.51,0.0 +64,152,6144,2048,ck,4,0,52.2986,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,73.14,282.26,0.0 +64,160,6144,2048,ck,4,0,52.0028,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,77.43,286.07,0.0 +64,168,6144,2048,ck,5,0,54.5057,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,77.57,275.04,0.0 +64,176,6144,2048,ck,4,0,52.0801,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,85.05,290.05,0.0 +64,184,6144,2048,ck,4,0,67.6618,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,68.44,224.95,0.0 +64,192,6144,2048,ck,4,0,63.5511,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,76.03,241.31,0.0 +64,200,6144,2048,ck,4,0,60.9457,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,82.58,253.51,0.0 +64,208,6144,2048,ck,4,0,59.9919,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,87.25,259.45,0.0 +64,216,6144,2048,ck,4,0,64.338,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,84.49,243.7,0.0 +64,224,6144,2048,ck,4,0,64.7067,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,87.12,244.09,0.0 +64,232,6144,2048,ck,4,0,62.3486,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,93.64,255.16,0.0 +64,240,6144,2048,ck,4,0,62.0497,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,97.34,258.24,0.0 +64,248,6144,2048,ck,4,0,54.3603,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,114.81,296.88,0.0 +64,256,6144,2048,ck,4,0,54.7972,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,117.57,296.6,0.0 +64,264,6144,2048,ck,4,0,69.7711,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,95.22,234.59,0.0 +64,272,6144,2048,ck,4,0,70.3541,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,97.3,234.28,0.0 +64,280,6144,2048,ck,4,0,78.0416,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,90.29,212.67,0.0 +64,288,6144,2048,ck,4,0,78.2285,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,92.65,213.63,0.0 +64,296,6144,2048,ck,4,0,77.9038,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,95.62,215.99,0.0 +64,304,6144,2048,ck,4,0,78.5257,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,97.43,215.74,0.0 +64,312,6144,2048,ck,4,0,87.1662,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,90.08,195.67,0.0 +64,320,6144,2048,ck,4,0,88.2706,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,91.23,194.52,0.0 +64,328,6144,2048,ck,4,0,80.165,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,102.97,215.62,0.0 +64,336,6144,2048,ck,4,0,81.2065,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,104.13,214.27,0.0 +64,344,6144,2048,ck,4,0,84.0024,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,103.06,208.5,0.0 +64,352,6144,2048,ck,4,0,84.9616,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,104.26,207.5,0.0 +64,360,6144,2048,ck,4,0,92.7986,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,97.63,191.21,0.0 +64,368,6144,2048,ck,4,0,93.5784,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,98.97,190.84,0.0 +64,376,6144,2048,ck,4,0,89.1997,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,106.08,201.49,0.0 +64,384,6144,2048,ck,4,0,90.4864,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,106.8,199.9,0.0 +64,392,6144,2048,ck,4,0,97.0628,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,101.64,187.53,0.0 +64,400,6144,2048,ck,4,0,97.3049,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,103.45,188.25,0.0 +64,408,6144,2048,ck,4,0,99.0865,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,103.62,186.02,0.0 +64,416,6144,2048,ck,4,0,105.1378,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,99.57,176.4,0.0 +64,424,6144,2048,ck,4,0,103.7592,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,102.84,179.85,0.0 +64,432,6144,2048,ck,5,0,112.6845,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,96.48,166.63,0.0 +64,440,6144,2048,ck,4,0,114.717,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,96.52,164.67,0.0 +64,448,6144,2048,ck,4,0,118.2953,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,95.31,160.66,0.0 +64,456,6144,2048,ck,4,0,107.9807,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,106.27,177.07,0.0 +64,464,6144,2048,ck,4,0,106.883,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,109.25,179.96,0.0 +64,472,6144,2048,ck,4,0,106.8688,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,111.15,181.06,0.0 +64,480,6144,2048,ck,4,0,106.2816,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,113.66,183.14,0.0 +64,488,6144,2048,ck,4,0,103.5695,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,118.58,189.04,0.0 +64,496,6144,2048,ck,5,0,112.6771,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,110.78,174.78,0.0 +64,504,6144,2048,ck,4,0,106.0555,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,119.59,186.77,0.0 +64,512,6144,2048,ck,4,0,100.4293,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,128.3,198.38,0.0 +64,1024,6144,2048,ck,4,0,196.5693,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,131.1,138.69,0.0 +64,2048,6144,2048,ck,4,0,396.6876,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,129.92,105.73,0.0 +64,4096,6144,2048,ck,4,0,798.5085,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,129.09,89.3,0.0 +64,8192,6144,2048,ck,4,0,1549.4403,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,133.05,83.92,0.0 +64,16384,6144,2048,ck,4,0,3096.8893,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,133.14,79.91,0.0 +64,20480,6144,2048,ck,4,0,3931.7623,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,131.09,77.87,0.0 +64,16,7168,256,ck,5,0,7.5942,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,7.73,272.38,0.0 +64,16,7168,2304,ck,5,0,29.7999,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,17.73,563.13,0.0 +64,32,7168,256,ck,7,0,6.7547,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,17.39,340.79,0.0 +64,32,7168,2304,ck,5,0,31.5786,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,33.47,539.85,0.0 +64,64,7168,256,ck,7,0,8.0483,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,29.18,344.03,0.0 +64,64,7168,2304,ck,5,0,35.8219,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,59.01,490.76,0.0 +64,128,7168,256,ck,5,0,10.4942,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,44.76,352.84,0.0 +64,128,7168,2304,ck,4,0,44.49,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,95.03,419.08,0.0 +64,256,7168,256,ck,5,0,17.0442,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,55.12,326.83,0.0 +64,256,7168,2304,ck,5,0,70.6064,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,119.76,294.24,0.0 +64,512,7168,256,ck,5,0,31.2223,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,60.18,298.06,0.0 +64,512,7168,2304,ck,4,0,126.6107,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,133.57,197.73,0.0 +64,1024,7168,256,ck,5,0,59.2074,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,63.47,283.36,0.0 +64,1024,7168,2304,ck,4,0,250.469,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,135.04,133.97,0.0 +64,1536,7168,256,ck,5,0,86.3696,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,65.27,280.75,0.0 +64,1536,7168,2304,ck,4,0,376.9631,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,134.59,111.61,0.0 +64,2048,7168,256,ck,4,0,113.1272,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,66.44,280.39,0.0 +64,2048,7168,2304,ck,4,0,518.9176,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,130.36,97.5,0.0 +64,4096,7168,256,ck,5,0,206.5246,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,72.79,298.29,0.0 +64,4096,7168,2304,ck,4,0,1016.5121,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,133.09,83.3,0.0 +64,8192,7168,256,ck,5,0,429.9034,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,69.93,282.33,0.0 +64,8192,7168,2304,ck,4,0,2010.797,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,134.57,76.0,0.0 +64,16384,7168,256,ck,5,0,859.6215,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,69.95,280.25,0.0 +64,16384,7168,2304,ck,4,0,4047.0059,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,133.72,71.45,0.0 +64,20480,7168,256,ck,5,0,1043.3434,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,72.04,288.19,0.0 +64,20480,7168,2304,ck,4,0,5075.6717,a8w8_blockscale_1x128x128_256x16x256x128_8x16_16x16_1x4_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,133.27,70.4,0.0 diff --git a/aiter/configs/model_configs/a8w8_blockscale_untuned_gemm_qwen3-30B-A3B.csv b/aiter/configs/model_configs/a8w8_blockscale_untuned_gemm_qwen3-30B-A3B.csv new file mode 100644 index 0000000000..0bd2a48cac --- /dev/null +++ b/aiter/configs/model_configs/a8w8_blockscale_untuned_gemm_qwen3-30B-A3B.csv @@ -0,0 +1,189 @@ +M,N,K +2,2560,2048 +4,2560,2048 +8,2560,2048 +16,2560,2048 +24,2560,2048 +32,2560,2048 +40,2560,2048 +48,2560,2048 +56,2560,2048 +64,2560,2048 +72,2560,2048 +80,2560,2048 +88,2560,2048 +96,2560,2048 +104,2560,2048 +112,2560,2048 +120,2560,2048 +128,2560,2048 +136,2560,2048 +144,2560,2048 +152,2560,2048 +160,2560,2048 +168,2560,2048 +176,2560,2048 +184,2560,2048 +192,2560,2048 +200,2560,2048 +208,2560,2048 +216,2560,2048 +224,2560,2048 +232,2560,2048 +240,2560,2048 +248,2560,2048 +256,2560,2048 +272,2560,2048 +288,2560,2048 +304,2560,2048 +320,2560,2048 +336,2560,2048 +352,2560,2048 +368,2560,2048 +384,2560,2048 +400,2560,2048 +416,2560,2048 +432,2560,2048 +448,2560,2048 +464,2560,2048 +480,2560,2048 +496,2560,2048 +512,2560,2048 +8192,2560,2048 +16,2048,2048 +32,2048,2048 +64,2048,2048 +128,2048,2048 +256,2048,2048 +512,2048,2048 +1024,2048,2048 +2048,2048,2048 +4096,2048,2048 +8192,2048,2048 +16384,2048,2048 +20480,2048,2048 +1,2048,2048 +2,2048,2048 +4,2048,2048 +8,2048,2048 +16,2048,2048 +24,2048,2048 +32,2048,2048 +40,2048,2048 +48,2048,2048 +56,2048,2048 +64,2048,2048 +72,2048,2048 +80,2048,2048 +88,2048,2048 +96,2048,2048 +104,2048,2048 +112,2048,2048 +120,2048,2048 +128,2048,2048 +136,2048,2048 +144,2048,2048 +152,2048,2048 +160,2048,2048 +168,2048,2048 +176,2048,2048 +184,2048,2048 +192,2048,2048 +200,2048,2048 +208,2048,2048 +216,2048,2048 +224,2048,2048 +232,2048,2048 +240,2048,2048 +248,2048,2048 +256,2048,2048 +264,2048,2048 +272,2048,2048 +280,2048,2048 +288,2048,2048 +296,2048,2048 +304,2048,2048 +312,2048,2048 +320,2048,2048 +328,2048,2048 +336,2048,2048 +344,2048,2048 +352,2048,2048 +360,2048,2048 +368,2048,2048 +376,2048,2048 +384,2048,2048 +392,2048,2048 +400,2048,2048 +408,2048,2048 +416,2048,2048 +424,2048,2048 +432,2048,2048 +440,2048,2048 +448,2048,2048 +456,2048,2048 +464,2048,2048 +472,2048,2048 +480,2048,2048 +488,2048,2048 +496,2048,2048 +504,2048,2048 +512,2048,2048 +1024,2048,2048 +2048,2048,2048 +4096,2048,2048 +8192,2048,2048 +16384,2048,2048 +20480,2048,2048 +1,2560,2048 +2,2560,2048 +4,2560,2048 +8,2560,2048 +16,2560,2048 +24,2560,2048 +32,2560,2048 +40,2560,2048 +48,2560,2048 +56,2560,2048 +64,2560,2048 +72,2560,2048 +80,2560,2048 +88,2560,2048 +96,2560,2048 +104,2560,2048 +112,2560,2048 +120,2560,2048 +128,2560,2048 +136,2560,2048 +144,2560,2048 +152,2560,2048 +160,2560,2048 +168,2560,2048 +176,2560,2048 +184,2560,2048 +192,2560,2048 +200,2560,2048 +208,2560,2048 +216,2560,2048 +224,2560,2048 +232,2560,2048 +240,2560,2048 +248,2560,2048 +256,2560,2048 +272,2560,2048 +288,2560,2048 +304,2560,2048 +320,2560,2048 +336,2560,2048 +352,2560,2048 +368,2560,2048 +384,2560,2048 +400,2560,2048 +416,2560,2048 +432,2560,2048 +448,2560,2048 +464,2560,2048 +480,2560,2048 +496,2560,2048 +512,2560,2048 +8192,2560,2048 diff --git a/aiter/configs/model_configs/a8w8_blockscale_untuned_gemm_qwen3_0.6b.csv b/aiter/configs/model_configs/a8w8_blockscale_untuned_gemm_qwen3_0.6b.csv new file mode 100644 index 0000000000..1fc0003068 --- /dev/null +++ b/aiter/configs/model_configs/a8w8_blockscale_untuned_gemm_qwen3_0.6b.csv @@ -0,0 +1,49 @@ +M,N,K +16,1024,2048 +16,1024,3072 +32,1024,2048 +32,1024,3072 +64,1024,2048 +64,1024,3072 +128,1024,2048 +128,1024,3072 +256,1024,2048 +256,1024,3072 +512,1024,2048 +512,1024,3072 +1024,1024,2048 +1024,1024,3072 +2048,1024,2048 +2048,1024,3072 +4096,1024,2048 +4096,1024,3072 +8192,1024,2048 +8192,1024,3072 +16384,1024,2048 +16384,1024,3072 +20480,1024,2048 +20480,1024,3072 +16,4096,1024 +32,4096,1024 +64,4096,1024 +128,4096,1024 +256,4096,1024 +512,4096,1024 +1024,4096,1024 +2048,4096,1024 +4096,4096,1024 +8192,4096,1024 +16384,4096,1024 +20480,4096,1024 +16,6144,1024 +32,6144,1024 +64,6144,1024 +128,6144,1024 +256,6144,1024 +512,6144,1024 +1024,6144,1024 +2048,6144,1024 +4096,6144,1024 +8192,6144,1024 +16384,6144,1024 +20480,6144,1024 diff --git a/aiter/configs/model_configs/a8w8_blockscale_untuned_gemm_qwen3_5-27B.csv b/aiter/configs/model_configs/a8w8_blockscale_untuned_gemm_qwen3_5-27B.csv new file mode 100644 index 0000000000..69ca100964 --- /dev/null +++ b/aiter/configs/model_configs/a8w8_blockscale_untuned_gemm_qwen3_5-27B.csv @@ -0,0 +1,366 @@ +M,N,K +1,8192,5120 +2,8192,5120 +4,8192,5120 +8,8192,5120 +16,8192,5120 +24,8192,5120 +32,8192,5120 +40,8192,5120 +48,8192,5120 +56,8192,5120 +64,8192,5120 +72,8192,5120 +80,8192,5120 +88,8192,5120 +96,8192,5120 +104,8192,5120 +112,8192,5120 +120,8192,5120 +128,8192,5120 +136,8192,5120 +144,8192,5120 +152,8192,5120 +160,8192,5120 +168,8192,5120 +176,8192,5120 +184,8192,5120 +192,8192,5120 +200,8192,5120 +208,8192,5120 +216,8192,5120 +224,8192,5120 +232,8192,5120 +240,8192,5120 +248,8192,5120 +256,8192,5120 +264,8192,5120 +272,8192,5120 +280,8192,5120 +288,8192,5120 +296,8192,5120 +304,8192,5120 +312,8192,5120 +320,8192,5120 +328,8192,5120 +336,8192,5120 +344,8192,5120 +352,8192,5120 +360,8192,5120 +368,8192,5120 +376,8192,5120 +384,8192,5120 +392,8192,5120 +400,8192,5120 +408,8192,5120 +416,8192,5120 +424,8192,5120 +432,8192,5120 +440,8192,5120 +448,8192,5120 +456,8192,5120 +464,8192,5120 +472,8192,5120 +480,8192,5120 +488,8192,5120 +496,8192,5120 +504,8192,5120 +512,8192,5120 +1024,8192,5120 +2048,8192,5120 +4096,8192,5120 +8192,8192,5120 +16384,8192,5120 +20480,8192,5120 +1,5120,3072 +2,5120,3072 +4,5120,3072 +8,5120,3072 +16,5120,3072 +24,5120,3072 +32,5120,3072 +40,5120,3072 +48,5120,3072 +56,5120,3072 +64,5120,3072 +72,5120,3072 +80,5120,3072 +88,5120,3072 +96,5120,3072 +104,5120,3072 +112,5120,3072 +120,5120,3072 +128,5120,3072 +136,5120,3072 +144,5120,3072 +152,5120,3072 +160,5120,3072 +168,5120,3072 +176,5120,3072 +184,5120,3072 +192,5120,3072 +200,5120,3072 +208,5120,3072 +216,5120,3072 +224,5120,3072 +232,5120,3072 +240,5120,3072 +248,5120,3072 +256,5120,3072 +264,5120,3072 +272,5120,3072 +280,5120,3072 +288,5120,3072 +296,5120,3072 +304,5120,3072 +312,5120,3072 +320,5120,3072 +328,5120,3072 +336,5120,3072 +344,5120,3072 +352,5120,3072 +360,5120,3072 +368,5120,3072 +376,5120,3072 +384,5120,3072 +392,5120,3072 +400,5120,3072 +408,5120,3072 +416,5120,3072 +424,5120,3072 +432,5120,3072 +440,5120,3072 +448,5120,3072 +456,5120,3072 +464,5120,3072 +472,5120,3072 +480,5120,3072 +488,5120,3072 +496,5120,3072 +504,5120,3072 +512,5120,3072 +1024,5120,3072 +2048,5120,3072 +4096,5120,3072 +8192,5120,3072 +16384,5120,3072 +20480,5120,3072 +1,17408,5120 +2,17408,5120 +4,17408,5120 +8,17408,5120 +16,17408,5120 +24,17408,5120 +32,17408,5120 +40,17408,5120 +48,17408,5120 +56,17408,5120 +64,17408,5120 +72,17408,5120 +80,17408,5120 +88,17408,5120 +96,17408,5120 +104,17408,5120 +112,17408,5120 +120,17408,5120 +128,17408,5120 +136,17408,5120 +144,17408,5120 +152,17408,5120 +160,17408,5120 +168,17408,5120 +176,17408,5120 +184,17408,5120 +192,17408,5120 +200,17408,5120 +208,17408,5120 +216,17408,5120 +224,17408,5120 +232,17408,5120 +240,17408,5120 +248,17408,5120 +256,17408,5120 +264,17408,5120 +272,17408,5120 +280,17408,5120 +288,17408,5120 +296,17408,5120 +304,17408,5120 +312,17408,5120 +320,17408,5120 +328,17408,5120 +336,17408,5120 +344,17408,5120 +352,17408,5120 +360,17408,5120 +368,17408,5120 +376,17408,5120 +384,17408,5120 +392,17408,5120 +400,17408,5120 +408,17408,5120 +416,17408,5120 +424,17408,5120 +432,17408,5120 +440,17408,5120 +448,17408,5120 +456,17408,5120 +464,17408,5120 +472,17408,5120 +480,17408,5120 +488,17408,5120 +496,17408,5120 +504,17408,5120 +512,17408,5120 +1024,17408,5120 +2048,17408,5120 +4096,17408,5120 +8192,17408,5120 +16384,17408,5120 +20480,17408,5120 +1,5120,8704 +2,5120,8704 +4,5120,8704 +8,5120,8704 +16,5120,8704 +24,5120,8704 +32,5120,8704 +40,5120,8704 +48,5120,8704 +56,5120,8704 +64,5120,8704 +72,5120,8704 +80,5120,8704 +88,5120,8704 +96,5120,8704 +104,5120,8704 +112,5120,8704 +120,5120,8704 +128,5120,8704 +136,5120,8704 +144,5120,8704 +152,5120,8704 +160,5120,8704 +168,5120,8704 +176,5120,8704 +184,5120,8704 +192,5120,8704 +200,5120,8704 +208,5120,8704 +216,5120,8704 +224,5120,8704 +232,5120,8704 +240,5120,8704 +248,5120,8704 +256,5120,8704 +264,5120,8704 +272,5120,8704 +280,5120,8704 +288,5120,8704 +296,5120,8704 +304,5120,8704 +312,5120,8704 +320,5120,8704 +328,5120,8704 +336,5120,8704 +344,5120,8704 +352,5120,8704 +360,5120,8704 +368,5120,8704 +376,5120,8704 +384,5120,8704 +392,5120,8704 +400,5120,8704 +408,5120,8704 +416,5120,8704 +424,5120,8704 +432,5120,8704 +440,5120,8704 +448,5120,8704 +456,5120,8704 +464,5120,8704 +472,5120,8704 +480,5120,8704 +488,5120,8704 +496,5120,8704 +504,5120,8704 +512,5120,8704 +1024,5120,8704 +2048,5120,8704 +4096,5120,8704 +8192,5120,8704 +16384,5120,8704 +20480,5120,8704 +1,7168,5120 +2,7168,5120 +4,7168,5120 +8,7168,5120 +16,7168,5120 +24,7168,5120 +32,7168,5120 +40,7168,5120 +48,7168,5120 +56,7168,5120 +64,7168,5120 +72,7168,5120 +80,7168,5120 +88,7168,5120 +96,7168,5120 +104,7168,5120 +112,7168,5120 +120,7168,5120 +128,7168,5120 +136,7168,5120 +144,7168,5120 +152,7168,5120 +160,7168,5120 +168,7168,5120 +176,7168,5120 +184,7168,5120 +192,7168,5120 +200,7168,5120 +208,7168,5120 +216,7168,5120 +224,7168,5120 +232,7168,5120 +240,7168,5120 +248,7168,5120 +256,7168,5120 +264,7168,5120 +272,7168,5120 +280,7168,5120 +288,7168,5120 +296,7168,5120 +304,7168,5120 +312,7168,5120 +320,7168,5120 +328,7168,5120 +336,7168,5120 +344,7168,5120 +352,7168,5120 +360,7168,5120 +368,7168,5120 +376,7168,5120 +384,7168,5120 +392,7168,5120 +400,7168,5120 +408,7168,5120 +416,7168,5120 +424,7168,5120 +432,7168,5120 +440,7168,5120 +448,7168,5120 +456,7168,5120 +464,7168,5120 +472,7168,5120 +480,7168,5120 +488,7168,5120 +496,7168,5120 +504,7168,5120 +512,7168,5120 +1024,7168,5120 +2048,7168,5120 +4096,7168,5120 +8192,7168,5120 +16384,7168,5120 +20480,7168,5120 diff --git a/aiter/configs/model_configs/a8w8_blockscale_untuned_gemm_qwen3_5-35B-A3B.csv b/aiter/configs/model_configs/a8w8_blockscale_untuned_gemm_qwen3_5-35B-A3B.csv new file mode 100644 index 0000000000..3e5a9b8bd4 --- /dev/null +++ b/aiter/configs/model_configs/a8w8_blockscale_untuned_gemm_qwen3_5-35B-A3B.csv @@ -0,0 +1,498 @@ +M,N,K +1,512,2048 +2,512,2048 +4,512,2048 +8,512,2048 +16,512,2048 +24,512,2048 +32,512,2048 +40,512,2048 +48,512,2048 +56,512,2048 +64,512,2048 +72,512,2048 +80,512,2048 +88,512,2048 +96,512,2048 +104,512,2048 +112,512,2048 +120,512,2048 +128,512,2048 +136,512,2048 +144,512,2048 +152,512,2048 +160,512,2048 +168,512,2048 +176,512,2048 +184,512,2048 +192,512,2048 +200,512,2048 +208,512,2048 +216,512,2048 +224,512,2048 +232,512,2048 +240,512,2048 +248,512,2048 +256,512,2048 +264,512,2048 +272,512,2048 +280,512,2048 +288,512,2048 +296,512,2048 +304,512,2048 +312,512,2048 +320,512,2048 +328,512,2048 +336,512,2048 +344,512,2048 +352,512,2048 +360,512,2048 +368,512,2048 +376,512,2048 +384,512,2048 +392,512,2048 +400,512,2048 +408,512,2048 +416,512,2048 +424,512,2048 +432,512,2048 +440,512,2048 +448,512,2048 +456,512,2048 +464,512,2048 +472,512,2048 +480,512,2048 +488,512,2048 +496,512,2048 +504,512,2048 +512,512,2048 +1024,512,2048 +2048,512,2048 +4096,512,2048 +8192,512,2048 +16384,512,2048 +20480,512,2048 +16,512,7168 +32,512,7168 +64,512,7168 +128,512,7168 +256,512,7168 +512,512,7168 +1024,512,7168 +1536,512,7168 +2048,512,7168 +4096,512,7168 +8192,512,7168 +16384,512,7168 +20480,512,7168 +16,576,7168 +32,576,7168 +64,576,7168 +128,576,7168 +256,576,7168 +512,576,7168 +1024,576,7168 +1536,576,7168 +2048,576,7168 +4096,576,7168 +8192,576,7168 +16384,576,7168 +20480,576,7168 +128,1024,4096 +16,1536,7168 +32,1536,7168 +64,1536,7168 +128,1536,7168 +256,1536,7168 +512,1536,7168 +1024,1536,7168 +1536,1536,7168 +2048,1536,7168 +4096,1536,7168 +8192,1536,7168 +16384,1536,7168 +20480,1536,7168 +1,2048,256 +2,2048,256 +4,2048,256 +8,2048,256 +16,2048,256 +24,2048,256 +32,2048,256 +40,2048,256 +48,2048,256 +56,2048,256 +64,2048,256 +72,2048,256 +80,2048,256 +88,2048,256 +96,2048,256 +104,2048,256 +112,2048,256 +120,2048,256 +128,2048,256 +136,2048,256 +144,2048,256 +152,2048,256 +160,2048,256 +168,2048,256 +176,2048,256 +184,2048,256 +192,2048,256 +200,2048,256 +208,2048,256 +216,2048,256 +224,2048,256 +232,2048,256 +240,2048,256 +248,2048,256 +256,2048,256 +264,2048,256 +272,2048,256 +280,2048,256 +288,2048,256 +296,2048,256 +304,2048,256 +312,2048,256 +320,2048,256 +328,2048,256 +336,2048,256 +344,2048,256 +352,2048,256 +360,2048,256 +368,2048,256 +376,2048,256 +384,2048,256 +392,2048,256 +400,2048,256 +408,2048,256 +416,2048,256 +424,2048,256 +432,2048,256 +440,2048,256 +448,2048,256 +456,2048,256 +464,2048,256 +472,2048,256 +480,2048,256 +488,2048,256 +496,2048,256 +504,2048,256 +512,2048,256 +1024,2048,256 +2048,2048,256 +4096,2048,256 +8192,2048,256 +16384,2048,256 +20480,2048,256 +1,2048,2048 +2,2048,2048 +4,2048,2048 +8,2048,2048 +16,2048,2048 +24,2048,2048 +32,2048,2048 +40,2048,2048 +48,2048,2048 +56,2048,2048 +64,2048,2048 +72,2048,2048 +80,2048,2048 +88,2048,2048 +96,2048,2048 +104,2048,2048 +112,2048,2048 +120,2048,2048 +128,2048,2048 +136,2048,2048 +144,2048,2048 +152,2048,2048 +160,2048,2048 +168,2048,2048 +176,2048,2048 +184,2048,2048 +192,2048,2048 +200,2048,2048 +208,2048,2048 +216,2048,2048 +224,2048,2048 +232,2048,2048 +240,2048,2048 +248,2048,2048 +256,2048,2048 +264,2048,2048 +272,2048,2048 +280,2048,2048 +288,2048,2048 +296,2048,2048 +304,2048,2048 +312,2048,2048 +320,2048,2048 +328,2048,2048 +336,2048,2048 +344,2048,2048 +352,2048,2048 +360,2048,2048 +368,2048,2048 +376,2048,2048 +384,2048,2048 +392,2048,2048 +400,2048,2048 +408,2048,2048 +416,2048,2048 +424,2048,2048 +432,2048,2048 +440,2048,2048 +448,2048,2048 +456,2048,2048 +464,2048,2048 +472,2048,2048 +480,2048,2048 +488,2048,2048 +496,2048,2048 +504,2048,2048 +512,2048,2048 +1024,2048,2048 +2048,2048,2048 +4096,2048,2048 +8192,2048,2048 +16384,2048,2048 +20480,2048,2048 +1,2560,2048 +2,2560,2048 +4,2560,2048 +8,2560,2048 +16,2560,2048 +24,2560,2048 +32,2560,2048 +40,2560,2048 +48,2560,2048 +56,2560,2048 +64,2560,2048 +72,2560,2048 +80,2560,2048 +88,2560,2048 +96,2560,2048 +104,2560,2048 +112,2560,2048 +120,2560,2048 +128,2560,2048 +136,2560,2048 +144,2560,2048 +152,2560,2048 +160,2560,2048 +168,2560,2048 +176,2560,2048 +184,2560,2048 +192,2560,2048 +200,2560,2048 +208,2560,2048 +216,2560,2048 +224,2560,2048 +232,2560,2048 +240,2560,2048 +248,2560,2048 +256,2560,2048 +272,2560,2048 +288,2560,2048 +304,2560,2048 +320,2560,2048 +336,2560,2048 +352,2560,2048 +368,2560,2048 +384,2560,2048 +400,2560,2048 +416,2560,2048 +432,2560,2048 +448,2560,2048 +464,2560,2048 +480,2560,2048 +496,2560,2048 +512,2560,2048 +8192,2560,2048 +128,4096,1280 +1,4608,2048 +2,4608,2048 +4,4608,2048 +8,4608,2048 +16,4608,2048 +24,4608,2048 +32,4608,2048 +40,4608,2048 +48,4608,2048 +56,4608,2048 +64,4608,2048 +72,4608,2048 +80,4608,2048 +88,4608,2048 +96,4608,2048 +104,4608,2048 +112,4608,2048 +120,4608,2048 +128,4608,2048 +136,4608,2048 +144,4608,2048 +152,4608,2048 +160,4608,2048 +168,4608,2048 +176,4608,2048 +184,4608,2048 +192,4608,2048 +200,4608,2048 +208,4608,2048 +216,4608,2048 +224,4608,2048 +232,4608,2048 +240,4608,2048 +248,4608,2048 +256,4608,2048 +264,4608,2048 +272,4608,2048 +280,4608,2048 +288,4608,2048 +296,4608,2048 +304,4608,2048 +312,4608,2048 +320,4608,2048 +328,4608,2048 +336,4608,2048 +344,4608,2048 +352,4608,2048 +360,4608,2048 +368,4608,2048 +376,4608,2048 +384,4608,2048 +392,4608,2048 +400,4608,2048 +408,4608,2048 +416,4608,2048 +424,4608,2048 +432,4608,2048 +440,4608,2048 +448,4608,2048 +456,4608,2048 +464,4608,2048 +472,4608,2048 +480,4608,2048 +488,4608,2048 +496,4608,2048 +504,4608,2048 +512,4608,2048 +1024,4608,2048 +2048,4608,2048 +4096,4608,2048 +8192,4608,2048 +16384,4608,2048 +20480,4608,2048 +16,4608,7168 +32,4608,7168 +64,4608,7168 +128,4608,7168 +256,4608,7168 +512,4608,7168 +1024,4608,7168 +1536,4608,7168 +2048,4608,7168 +4096,4608,7168 +8192,4608,7168 +16384,4608,7168 +20480,4608,7168 +1,6144,2048 +2,6144,2048 +4,6144,2048 +8,6144,2048 +16,6144,2048 +24,6144,2048 +32,6144,2048 +40,6144,2048 +48,6144,2048 +56,6144,2048 +64,6144,2048 +72,6144,2048 +80,6144,2048 +88,6144,2048 +96,6144,2048 +104,6144,2048 +112,6144,2048 +120,6144,2048 +128,6144,2048 +136,6144,2048 +144,6144,2048 +152,6144,2048 +160,6144,2048 +168,6144,2048 +176,6144,2048 +184,6144,2048 +192,6144,2048 +200,6144,2048 +208,6144,2048 +216,6144,2048 +224,6144,2048 +232,6144,2048 +240,6144,2048 +248,6144,2048 +256,6144,2048 +264,6144,2048 +272,6144,2048 +280,6144,2048 +288,6144,2048 +296,6144,2048 +304,6144,2048 +312,6144,2048 +320,6144,2048 +328,6144,2048 +336,6144,2048 +344,6144,2048 +352,6144,2048 +360,6144,2048 +368,6144,2048 +376,6144,2048 +384,6144,2048 +392,6144,2048 +400,6144,2048 +408,6144,2048 +416,6144,2048 +424,6144,2048 +432,6144,2048 +440,6144,2048 +448,6144,2048 +456,6144,2048 +464,6144,2048 +472,6144,2048 +480,6144,2048 +488,6144,2048 +496,6144,2048 +504,6144,2048 +512,6144,2048 +1024,6144,2048 +2048,6144,2048 +4096,6144,2048 +8192,6144,2048 +16384,6144,2048 +20480,6144,2048 +16,7168,256 +32,7168,256 +64,7168,256 +128,7168,256 +256,7168,256 +512,7168,256 +1024,7168,256 +1536,7168,256 +2048,7168,256 +4096,7168,256 +8192,7168,256 +16384,7168,256 +20480,7168,256 +16,7168,2304 +32,7168,2304 +64,7168,2304 +128,7168,2304 +256,7168,2304 +512,7168,2304 +1024,7168,2304 +1536,7168,2304 +2048,7168,2304 +4096,7168,2304 +8192,7168,2304 +16384,7168,2304 +20480,7168,2304