diff --git a/.github/workflows/build-on-release.yml b/.github/workflows/build-on-release.yml index d2282ad..81d7d78 100644 --- a/.github/workflows/build-on-release.yml +++ b/.github/workflows/build-on-release.yml @@ -53,7 +53,7 @@ jobs: - name: Build and push base image uses: docker/build-push-action@v6 with: - context: embed + platforms: ${{ matrix.platform }} push: true tags: | @@ -126,7 +126,7 @@ jobs: - name: Build and push variant image uses: docker/build-push-action@v6 with: - context: embed + file: embed/Dockerfile.variant platforms: ${{ matrix.platform }} push: true diff --git a/.github/workflows/test-and-tag.yml b/.github/workflows/test-and-tag.yml index ae2e909..b0ec2a8 100644 --- a/.github/workflows/test-and-tag.yml +++ b/.github/workflows/test-and-tag.yml @@ -7,16 +7,11 @@ on: env: CARGO_TERM_COLOR: always RUST_BACKTRACE: 1 - WORKING_DIRECTORY: embed jobs: test: name: Test Suite runs-on: ubuntu-latest - defaults: - run: - working-directory: ${{ env.WORKING_DIRECTORY }} - services: localstack: image: localstack/localstack:4.11.1 @@ -63,7 +58,7 @@ jobs: - name: Cache cargo build uses: actions/cache@v3 with: - path: embed/target + path: target key: ${{ runner.os }}-cargo-build-target-${{ hashFiles('Cargo.lock') }} restore-keys: | ${{ runner.os }}-cargo-build-target- @@ -71,53 +66,4 @@ jobs: - name: Run tests env: LOCALSTACK_ENDPOINT: http://127.0.0.1:4566 - run: cargo test --verbose - - create-tag: - name: Create Tag - runs-on: ubuntu-latest - needs: test - if: github.ref == 'refs/heads/main' && github.event_name == 'push' - permissions: - contents: write - steps: - - name: Checkout code - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Get latest tag - id: get_latest_tag - run: | - # Get the latest tag, or use v0.0.0 if no tags exist - LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.0.0") - echo "latest_tag=$LATEST_TAG" >> $GITHUB_OUTPUT - echo "Latest tag: $LATEST_TAG" - - - name: Bump version - id: bump_version - run: | - LATEST_TAG="${{ steps.get_latest_tag.outputs.latest_tag }}" - # Remove 'v' prefix if present - VERSION=${LATEST_TAG#v} - - # Split version into parts - IFS='.' read -r -a VERSION_PARTS <<< "$VERSION" - MAJOR="${VERSION_PARTS[0]}" - MINOR="${VERSION_PARTS[1]}" - PATCH="${VERSION_PARTS[2]}" - - # Bump patch version - PATCH=$((PATCH + 1)) - - NEW_VERSION="v${MAJOR}.${MINOR}.${PATCH}" - echo "new_version=$NEW_VERSION" >> $GITHUB_OUTPUT - echo "New version: $NEW_VERSION" - - - name: Create and push tag - run: | - NEW_VERSION="${{ steps.bump_version.outputs.new_version }}" - git config user.name "github-actions[bot]" - git config user.email "github-actions[bot]@users.noreply.github.com" - git tag -a "$NEW_VERSION" -m "Release $NEW_VERSION" - git push origin "$NEW_VERSION" \ No newline at end of file + run: cargo test --verbose \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 4f97cf4..4fd4467 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3580,7 +3580,6 @@ dependencies = [ "aws-config", "aws-credential-types", "aws-sdk-s3", - "cc", "clap", "fastembed", "lambda_runtime", diff --git a/Cargo.toml b/Cargo.toml index 3bf8560..6756956 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,4 +1,3 @@ - [package] name = "serverless-vectorizer" version = "0.1.0" @@ -66,4 +65,4 @@ path = "tests/unit_tests.rs" required-features = ["aws"] [build-dependencies] -cc = { version = "1.1.24" } + diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 0000000..3329aec --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,190 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to the Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +Copyright 2026 RustyZip Contributors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 0000000..d944902 --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 RustyZip Contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 012169e..df7cb99 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,23 @@ # Serverless Vectorizer -AWS Lambda container image for generating text embeddings. Models are pre-loaded into Docker images for fast cold starts - one image per model variant. +[![CI](https://github.com/johnnywale/serverless-vectorizer/actions/workflows/ci.yml/badge.svg)](https://github.com/johnnywale/serverless-vectorizer/actions/workflows/ci.yml) +[![Release](https://img.shields.io/github/v/release/johnnywale/serverless-vectorizer)](https://github.com/johnnywale/serverless-vectorizer/releases) +[![License](https://img.shields.io/github/license/johnnywale/serverless-vectorizer)](LICENSE-MIT) +[![Docker Pulls](https://img.shields.io/docker/pulls/johnnywale/serverless-vectorizer)](https://hub.docker.com/r/johnnywale/serverless-vectorizer) +[![Docker Image Size](https://img.shields.io/docker/image-size/johnnywale/serverless-vectorizer/latest)](https://hub.docker.com/r/johnnywale/serverless-vectorizer) + +AWS Lambda container image for generating text embeddings. Models are pre-loaded into Docker images for fast cold +starts - one image per model variant. ## Supported Models -| Model | ID | Dimension | Language | -|-------|-----|-----------|----------| -| BGE-Small-EN-v1.5 | `bge-small-en-v1.5` | 384 | English | -| BGE-Base-EN-v1.5 | `bge-base-en-v1.5` | 768 | English | -| BGE-Large-EN-v1.5 | `bge-large-en-v1.5` | 1024 | English | -| Multilingual-E5-Large | `multilingual-e5-large` | 1024 | Multilingual | -| All-MpNet-Base-v2 | `all-mpnet-base-v2` | 768 | English | +| Model | ID | Dimension | Language | +|-----------------------|-------------------------|-----------|--------------| +| BGE-Small-EN-v1.5 | `bge-small-en-v1.5` | 384 | English | +| BGE-Base-EN-v1.5 | `bge-base-en-v1.5` | 768 | English | +| BGE-Large-EN-v1.5 | `bge-large-en-v1.5` | 1024 | English | +| Multilingual-E5-Large | `multilingual-e5-large` | 1024 | Multilingual | +| All-MpNet-Base-v2 | `all-mpnet-base-v2` | 768 | English | All models support a maximum of 512 tokens per input text. @@ -92,9 +99,21 @@ aws lambda invoke \ ``` **Response:** + ```json { - "embeddings": [[0.123, 0.456, ...], [0.789, 0.012, ...]], + "embeddings": [ + [ + 0.123, + 0.456, + ... + ], + [ + 0.789, + 0.012, + ... + ] + ], "dimension": 384 } ``` @@ -121,6 +140,7 @@ aws lambda invoke \ ``` The S3 file can contain: + - Plain text (embedded as single document) - JSON array of strings (each string embedded separately) @@ -142,9 +162,16 @@ aws lambda invoke \ ``` **Response includes S3 location:** + ```json { - "embeddings": [[0.123, 0.456, ...]], + "embeddings": [ + [ + 0.123, + 0.456, + ... + ] + ], "dimension": 384, "s3_location": "s3://my-output-bucket/embeddings/output.json" } @@ -171,9 +198,15 @@ aws lambda invoke \ ```json { - "messages": ["text1", "text2"], // Direct text input (array of strings) - "s3_file": "bucket/key", // OR read input from S3 - "save_to_s3": { // Optional: save embeddings to S3 + "messages": [ + "text1", + "text2" + ], + // Direct text input (array of strings) + "s3_file": "bucket/key", + // OR read input from S3 + "save_to_s3": { + // Optional: save embeddings to S3 "bucket": "bucket-name", "key": "path/to/output.json" } @@ -186,9 +219,19 @@ Either `messages` or `s3_file` must be provided. `save_to_s3` is optional. ```json { - "embeddings": [[...], [...]], // Array of embedding vectors - "dimension": 384, // Vector dimension - "s3_location": "s3://..." // Only present if save_to_s3 was used + "embeddings": [ + [ + ... + ], + [ + ... + ] + ], + // Array of embedding vectors + "dimension": 384, + // Vector dimension + "s3_location": "s3://..." + // Only present if save_to_s3 was used } ``` @@ -246,7 +289,7 @@ Push to ECR and create Lambda function: ```bash # Tag and push -aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 123456789.dkr.ecr.us-east-1.amazonaws.com +aws ecr get-login-password --region us-east-1 | docker login --name AWS --password-stdin 123456789.dkr.ecr.us-east-1.amazonaws.com docker tag serverless-vectorizer:bge-small 123456789.dkr.ecr.us-east-1.amazonaws.com/serverless-vectorizer:bge-small docker push 123456789.dkr.ecr.us-east-1.amazonaws.com/serverless-vectorizer:bge-small @@ -265,6 +308,11 @@ aws lambda update-function-code \ --image-uri 123456789.dkr.ecr.us-east-1.amazonaws.com/serverless-vectorizer:bge-small ``` +## Acknowledgments + +This project is powered by [fastembed-rs](https://github.com/Anush008/fastembed-rs), a Rust library for fast, +lightweight text embedding generation. + ## License MIT