diff --git a/.githooks/pre-commit b/.githooks/pre-commit new file mode 100755 index 0000000..7bc59ff --- /dev/null +++ b/.githooks/pre-commit @@ -0,0 +1,34 @@ +#!/bin/sh + +# Get list of files that are staged for commit +KT_FILES=$(git diff --cached --name-only --diff-filter=ACMR | grep -E '^android/.*\.kt$' || true) +GRADLE_FILES=$(git diff --cached --name-only --diff-filter=ACMR | grep -E '.*\.gradle\.kts$' || true) +CPP_FILES=$(git diff --cached --name-only --diff-filter=ACMR | grep -E '^(jni/.*\.(cpp|h|c|hpp|cc)|cli/.*\.(cpp|h|c|hpp|cc)|cpp/.*\.(cpp|h|c|hpp|cc))$' || true) + +# Run spotless checks if needed +if [ -n "$KT_FILES" ]; then + echo "Running spotlessKotlinCheck..." + ./gradlew spotlessKotlinCheck + if [ $? -ne 0 ]; then + echo "❌ Kotlin files need formatting. Please run './gradlew spotlessKotlinApply' or 'make format' and commit again." + exit 1 + fi +fi + +if [ -n "$GRADLE_FILES" ]; then + echo "Running spotlessKotlinGradleCheck..." + ./gradlew spotlessKotlinGradleCheck + if [ $? -ne 0 ]; then + echo "❌ Gradle files need formatting. Please run './gradlew spotlessKotlinGradleApply' or 'make format' and commit again." + exit 1 + fi +fi + +if [ -n "$CPP_FILES" ]; then + echo "Running spotlessCppCheck..." + ./gradlew spotlessCppCheck + if [ $? -ne 0 ]; then + echo "❌ C++ files need formatting. Please run './gradlew spotlessCppApply' or 'make format' and commit again." + exit 1 + fi +fi \ No newline at end of file diff --git a/.github/workflows/pr-checks.yml b/.github/workflows/pr-checks.yml new file mode 100644 index 0000000..de4a16d --- /dev/null +++ b/.github/workflows/pr-checks.yml @@ -0,0 +1,81 @@ +name: PR Checks + +on: + pull_request: + types: [opened, synchronize, reopened] + push: + branches: + - main + +jobs: + check-format: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + java-version: '17' + distribution: 'temurin' + cache: gradle + + - name: Install clang-format 14 (native) + run: | + sudo apt update + sudo apt install -y clang-format-14 + sudo ln -sf /usr/bin/clang-format-14 /usr/local/bin/clang-format + clang-format --version + + - name: Verify clang-format installation + run: | + if ! command -v clang-format >/dev/null; then + echo "❌ clang-format not found" + exit 1 + fi + clang-format --version + + - name: Grant execute permission for gradlew + run: chmod +x gradlew + + - name: Run spotlessCheck + run: | + echo "Running spotlessCheck..." + ./gradlew spotlessCheck + if [ $? -ne 0 ]; then + echo "❌ spotlessCheck failed. Please run './gradlew spotlessApply' locally to fix formatting issues." + exit 1 + fi + + build-test-kotlin: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + java-version: '17' + distribution: 'temurin' + cache: gradle + + - name: Grant execute permission for gradlew + run: chmod +x gradlew + + - name: Run detekt + run: | + echo "Running detekt..." + ./gradlew detekt + if [ $? -ne 0 ]; then + echo "❌ detekt found code style issues. Please fix them locally." + exit 1 + fi + + - name: Run unit tests + run: | + echo "Running unit tests..." + ./gradlew testDebugUnitTest + if [ $? -ne 0 ]; then + echo "❌ Unit tests failed. Please fix the failing tests locally." + exit 1 + fi \ No newline at end of file diff --git a/README.md b/README.md index 8e725e0..79cd7ea 100644 --- a/README.md +++ b/README.md @@ -230,6 +230,64 @@ With `all` option, it will conduct deep clean including open source components. WhisperKit Android is currently in the beta stage. We are actively developing the project and welcome contributions from the community. +## Development Setup + +### Installing clang-format + +The project uses clang-format 14 for C++ code formatting. You'll need to install it based on your operating system: + +#### macOS +```bash +# Install LLVM 14 +brew install llvm@14 + +# Create symlink for clang-format +sudo ln -sf /opt/homebrew/opt/llvm@14/bin/clang-format /opt/homebrew/bin/clang-format +``` + +#### Linux (Ubuntu/Debian) +```bash +sudo apt update +sudo apt install -y clang-format-14 +sudo ln -sf /usr/bin/clang-format-14 /usr/local/bin/clang-format +``` + +Verify the installation: +```bash +clang-format --version +``` + +To check C++ code formatting: +```bash +./gradlew spotlessCppCheck +``` + +## Git Hooks + +This project uses Git hooks to maintain code quality. These hooks help ensure consistent code formatting and quality standards. + +### Setup + +To use the Git hooks, run the following command in your repository root: + +```bash +git config core.hooksPath .githooks +``` + +### Available Hooks + +#### pre-commit +- Runs `spotlessApply` to automatically fix code formatting issues +- If formatting fixes are applied, they are automatically staged +- The commit will be blocked if `spotlessApply` fails + +### Troubleshooting + +If you need to bypass the hooks temporarily (not recommended), you can use: +```bash +git commit --no-verify +``` + # License - We release WhisperKit Android under [MIT License](LICENSE). diff --git a/build.gradle.kts b/build.gradle.kts index f649b2f..83cda58 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -37,6 +37,6 @@ spotless { "cpp/**/*.cpp", "cpp/**/*.h", "cpp/**/*.c", "cpp/**/*.hpp", "cpp/**/*.cc", ) targetExclude("**/build/**", "**/external/**") - clangFormat("20.1.5") + clangFormat("14.0.6") } } diff --git a/cpp/src/Models/TextDecoder.cpp b/cpp/src/Models/TextDecoder.cpp index 39a0218..297bc94 100644 --- a/cpp/src/Models/TextDecoder.cpp +++ b/cpp/src/Models/TextDecoder.cpp @@ -101,15 +101,15 @@ bool is_exact_match_for_separate_kv_cache_no_alignment_heads(const tflite::Model } /* helper functions to calculate the number of inputs and outputs for a given number of layers */ - auto calculate_num_inputs_for_variant_with_layers = [=](const int num_layers) -> auto { + auto calculate_num_inputs_for_variant_with_layers = [=](const int num_layers) -> auto{ return num_shared_inputs + num_layers * kv_factor; }; - auto calculate_num_outputs_for_variant_with_layers = [=](const int num_layers) -> auto { + auto calculate_num_outputs_for_variant_with_layers = [=](const int num_layers) -> auto{ return kv_factor * num_layers + 1; }; - auto output_names_for_variant_with_layers = [=](const int num_layers) -> auto { + auto output_names_for_variant_with_layers = [=](const int num_layers) -> auto{ std::unordered_set output_names; output_names.insert(std::string("logits")); for (int i = 0; i < num_layers; ++i) { @@ -119,7 +119,7 @@ bool is_exact_match_for_separate_kv_cache_no_alignment_heads(const tflite::Model return output_names; }; - auto input_names_for_variant_with_layers = [](const int num_layers) -> auto { + auto input_names_for_variant_with_layers = [](const int num_layers) -> auto{ std::unordered_set input_names; input_names.insert(std::string("x")); input_names.insert(std::string("index")); diff --git a/cpp/src/Text/post_proc.cpp b/cpp/src/Text/post_proc.cpp index bf23fe5..fada65b 100644 --- a/cpp/src/Text/post_proc.cpp +++ b/cpp/src/Text/post_proc.cpp @@ -5,7 +5,7 @@ #define LOGITS_TO_NEG_INF(start, end) \ for (auto iter = (start); iter != (end); iter++) *iter = -1e9; -#define DEC_2_ROUND(x) (round((x) * 100.0) / 100.0) +#define DEC_2_ROUND(x) (round((x)*100.0) / 100.0) using namespace std; using json = nlohmann::json; diff --git a/cpp/src/Text/post_proc.hpp b/cpp/src/Text/post_proc.hpp index 319dfe6..f6efaad 100644 --- a/cpp/src/Text/post_proc.hpp +++ b/cpp/src/Text/post_proc.hpp @@ -14,7 +14,7 @@ constexpr const uint32_t SAMPLE_BEGIN = 1; class PostProcModel : public MODEL_SUPER_CLASS { public: PostProcModel(Tokenizer* tokenizer, bool timestamp_text = false); - virtual ~PostProcModel() {}; + virtual ~PostProcModel(){}; bool initialize(bool debug = false); virtual void invoke(bool measure_time = false); diff --git a/cpp/src/WhisperKitConfiguration.cpp b/cpp/src/WhisperKitConfiguration.cpp index dfa4bbf..62390ac 100644 --- a/cpp/src/WhisperKitConfiguration.cpp +++ b/cpp/src/WhisperKitConfiguration.cpp @@ -4,7 +4,7 @@ #include "WhisperKitPipeline.hpp" #include "backend_class.hpp" -whisperkit_configuration_t::whisperkit_configuration_t() {}; +whisperkit_configuration_t::whisperkit_configuration_t(){}; void whisperkit_configuration_t::set_audio_encoder(const char* audio_encoder) noexcept { this->audio_encoder = audio_encoder;