diff --git a/tests/device_tests/a2a3/tensormap_and_ringbuffer/paged_attention/golden.py b/tests/device_tests/a2a3/tensormap_and_ringbuffer/paged_attention/golden.py index 433e8a3b..898c4fad 100644 --- a/tests/device_tests/a2a3/tensormap_and_ringbuffer/paged_attention/golden.py +++ b/tests/device_tests/a2a3/tensormap_and_ringbuffer/paged_attention/golden.py @@ -23,7 +23,7 @@ "dtype": "bfloat16", }, "Case2": { - "batch": 256, + "batch": 64, "num_heads": 64, "kv_head_num": 1, "head_dim": 128, @@ -42,36 +42,6 @@ "max_model_len": 32768, "dtype": "bfloat16", }, - "Case4": { - "batch": 512, - "num_heads": 16, - "kv_head_num": 1, - "head_dim": 128, - "block_size": 128, - "context_len": 8192, - "max_model_len": 32768, - "dtype": "bfloat16", - }, - "Case5": { - "batch": 128, - "num_heads": 64, - "kv_head_num": 1, - "head_dim": 128, - "block_size": 64, - "context_len": 16384, - "max_model_len": 32768, - "dtype": "bfloat16", - }, - "Case6": { - "batch": 32, - "num_heads": 16, - "kv_head_num": 1, - "head_dim": 128, - "block_size": 128, - "context_len": 4096, - "max_model_len": 32768, - "dtype": "bfloat16", - }, } DEFAULT_CASE = "Case1" diff --git a/tests/device_tests/a2a3/tensormap_and_ringbuffer/paged_attention_unroll/golden.py b/tests/device_tests/a2a3/tensormap_and_ringbuffer/paged_attention_unroll/golden.py index cd0c7916..65fc992a 100644 --- a/tests/device_tests/a2a3/tensormap_and_ringbuffer/paged_attention_unroll/golden.py +++ b/tests/device_tests/a2a3/tensormap_and_ringbuffer/paged_attention_unroll/golden.py @@ -23,7 +23,7 @@ "dtype": "bfloat16", }, "Case2": { - "batch": 256, + "batch": 64, "num_heads": 64, "kv_head_num": 1, "head_dim": 128,