microsoft · Starmys · Jan 30, 2023 · Jan 31, 2023 · Jan 31, 2023 · Jan 31, 2023
diff --git a/.gitignore b/.gitignore
@@ -8,3 +8,4 @@ _build
 generated
 test/bench/*/latency.csv
 test/bench/*/latency.png
+test/lut_maker/*.log.csv
diff --git a/docs/1-code-specializer.md b/docs/1-code-specializer.md
@@ -10,9 +10,9 @@ To balance between the flexibility, performance, and developing efficiency, we a
 
 | Layer | Base Class | Role |
 | :- | :- | :- |
-| Sparse Operator | [`sparta.nn.OperatorBase`](reference/nn.rst) | User interface as `torch.nn.Module` |
-| Sparse Context | `sparta.specializer.funtional.SparseCtxBase` | Function context to interact with `torch.autograd.Function` |
-| Sparse Kernel Placeholder | `sparta.specializer.funtional.KernelPlaceholder` | Collection of multiple kernel implementations |
+| Sparse Operator | [`sparta.nn.SparseOperator`](reference/nn.rst) | User interface as `torch.nn.Module` |
+| Sparse Context | `sparta.specializer.functional.SparseCtxBase` | Function context to interact with `torch.autograd.Function` |
+| Sparse Kernel Placeholder | `sparta.specializer.functional.KernelPlaceholder` | Collection of multiple kernel implementations |
 | Sparse Kernel | `sparta.specializer.kernels.KernelBase` | Tunable sparse CUDA kernel interface |
 
 ## Generating CUDA Codes

diff --git a/docs/reference/nn.rst b/docs/reference/nn.rst
@@ -2,7 +2,7 @@
 sparta.nn
 ===================================
 
-.. autoclass:: sparta.nn.OperatorBase
+.. autoclass:: sparta.nn.SparseOperator
     :members:
 
 .. autoclass:: sparta.nn.SparseLinear

diff --git a/examples/sparse_attention.ipynb b/examples/sparse_attention.ipynb
@@ -122,7 +122,7 @@
    "source": [
     "Check whether the sparse operator works correctly.\n",
     "\n",
-    "We provide `sparta.testing.sparse_multi_head_attention_reference()` function to calculate masked attention using dense method."
+    "We provide `sparta.testing.sparse_multi_head_attention_forward_reference()` function to calculate masked attention using dense method."
    ]
   },
   {
@@ -141,7 +141,7 @@
     "value.requires_grad = True\n",
     "\n",
     "def dense_attention(query, key, value):\n",
-    "    return sparta.testing.sparse_multi_head_attention_reference(query, key, value, mask)\n",
+    "    return sparta.testing.sparse_multi_head_attention_forward_reference(query, key, value, mask)\n",
     "\n",
     "for sparse_out, dense_out in zip(forward_backward(dense_attention), forward_backward(sparse_attention)):\n",
     "    torch.testing.assert_close(sparse_out, dense_out)"

diff --git a/setup.py b/setup.py
@@ -21,14 +21,15 @@
     os.makedirs(os.path.join('csrc', 'build'), exist_ok=True)
     with open(os.path.join('csrc', 'build', 'moe_sparse_forward_kernel.cu'), 'w') as f:
         f.write(moe_kernel)
+
     moe_ext = CUDAExtension(
-        name='sparse_moe_cpp',
+        name='sparta.sp_moe_ops',
         sources=[
             os.path.join('csrc', 'moe_sparse_forward.cpp'),
             os.path.join('csrc', 'build', 'moe_sparse_forward_kernel.cu'),
         ],
         extra_compile_args=[
-            '-std=c++14',
+            '-std=c++17',
             '-O3',
             '-U__CUDA_NO_HALF_OPERATORS__',
             '-U__CUDA_NO_HALF_CONVERSIONS__',
@@ -37,12 +38,12 @@
     ext_modules.append(moe_ext)
 
     seqlen_dynamic_attention_ext = CUDAExtension(
-        name='seqlen_dynamic_sparse_attention_cpp',
+        name='sparta.sp_attn_ops',
         sources=[
             os.path.join('csrc', 'seqlen_dynamic_sparse_attention_forward.cpp'),
             os.path.join('csrc', 'seqlen_dynamic_sparse_attention_forward_kernel.cu'),
         ],
-        extra_compile_args=['-std=c++14', '-O3'],
+        extra_compile_args=['-std=c++17', '-O3'],
     )
     ext_modules.append(seqlen_dynamic_attention_ext)
 
@@ -63,8 +64,8 @@
     cmdclass={'build_ext': BuildExtension},
     include_package_data=True,
     package_data={
-        'sparta.specializer.kernels.templates': ['*.j2'],
-        'sparta.specializer.kernels.look_up_tables': ['*.csv'],
+        'sparta.kernels.templates': ['*.j2'],
+        'sparta.kernels.look_up_tables': ['*.csv'],
         'sparta.tesa.templates': ['*.j2'],
     },
 )
diff --git a/sparta/common/tuning.py b/sparta/common/tuning.py
diff --git a/sparta/common/utils.py b/sparta/common/utils.py
diff --git a/sparta/kernels/__init__.py b/sparta/kernels/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+from sparta.kernels.kernel_base import KernelBase, SparsityAttr, KernelGroup
+from sparta.kernels.matmul import SparseMatMulKernel, SparTASparseMatMulKernel, OpenAISparseMatMulKernel
+from sparta.kernels.softmax import SparseSoftmaxForwardKernel, SparTASparseSoftmaxForwardKernel, SparseSoftmaxBackwardKernel, SparTASparseSoftmaxBackwardKernel
+from sparta.kernels.attention import FlashSparseAttentionFP32ForwardKernel, FlashSparseAttentionFP32BackwardKernel, FlashSparseAttentionFP16ForwardKernel, FlashSparseAttentionFP16BackwardKernel