diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..7596153 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,36 @@ +# SPDX-FileCopyrightText: 2026-present AUTHOR +# +# SPDX-License-Identifier: Apache-2.0 + +name: Release + +on: + push: + tags: + - "v[0-9].[0-9]+.[0-9]+*" + +jobs: + release-on-pypi: + name: Publish on PyPI + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: "3.13" + + - name: Install Hatch + run: pip install hatch + + - name: Build + run: hatch build + + - name: Publish to PyPI + env: + HATCH_INDEX_USER: __token__ + HATCH_INDEX_AUTH: ${{ secrets.PYPI_API_TOKEN }} + run: hatch publish -y diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..1fa286d --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,46 @@ +# SPDX-FileCopyrightText: 2026-present AUTHOR +# +# SPDX-License-Identifier: Apache-2.0 + +name: Test + +on: + push: + branches: [main] + pull_request: + +concurrency: + group: test-${{ github.head_ref }} + cancel-in-progress: true + +env: + PYTHONUNBUFFERED: "1" + FORCE_COLOR: "1" + +jobs: + test: + name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + python-version: ["3.10", "3.14"] + + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Hatch + run: pip install hatch + + - name: Lint + if: matrix.python-version == '3.10' && runner.os == 'Linux' + run: hatch run fmt-check + + - name: Run tests + run: hatch run test:all diff --git a/README.md b/README.md index 2930ad1..c6c9724 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,78 @@ -# custom-component -A template repository that can be used to create custom Haystack components. +# Custom Component Template + +A template repository for creating custom [Haystack](https://haystack.deepset.ai/) components and publishing them as standalone Python packages. + +For more details, see the Haystack documentation on [creating custom components](https://docs.haystack.deepset.ai/docs/custom-components) and [creating custom document stores](https://docs.haystack.deepset.ai/docs/creating-custom-document-stores). + +## How to use this template + +1. Click **[Use this template](https://github.com/deepset-ai/custom-component/generate)** to create a new repository. + +2. **Rename the package directory** from `src/haystack_integrations/components/example/` to match your integration. See [Namespace convention](#namespace-convention) below for the correct path. + +3. **Update `pyproject.toml`** — search for `TODO` comments and replace: + - `name`: your package name, following the `-haystack` convention (e.g. `opensearch-haystack`) + - `description`, `authors`, `keywords`, `project.urls` + - `dependencies`: add your integration-specific dependencies + - `tool.hatch.version.raw-options`: if you renamed directories, the version path is still derived from git tags so no change is needed here + +4. **Add your component code** in the renamed directory and export your classes from `__init__.py`. + +5. **Add tests** in `tests/` — see the skeleton in `tests/test_example.py`. + +6. **Search for all `TODO` comments** across the project and address them. + +## Namespace convention + +Haystack integrations use the `haystack_integrations` namespace package. The directory structure under `src/` determines the import path for your component. + +**Components** (converters, embedders, generators, rankers, etc.) use: +``` +src/haystack_integrations/components/// +``` +Import path: `from haystack_integrations.components.. import MyComponent` + +Common component types: `converters`, `embedders`, `generators`, `rankers`, `retrievers`, `connectors`, `tools`, `websearch` + +**Document stores** use a separate namespace: +``` +src/haystack_integrations/document_stores// +``` +Import path: `from haystack_integrations.document_stores. import MyDocumentStore` + +## Development + +This project uses [Hatch](https://hatch.pypa.io/) for build and environment management. + +```bash +# Install Hatch +pip install hatch + +# Format and lint +hatch run fmt # auto-fix +hatch run fmt-check # check only + +# Run tests +hatch run test:unit # unit tests only +hatch run test:integration # integration tests only +hatch run test:all # all tests +hatch run test:cov # with coverage +``` + +## Publishing to PyPI + +This template includes a GitHub Actions workflow that publishes your package to PyPI when you push a version tag. + +1. **Add a `PYPI_API_TOKEN` secret** to your repository settings (Settings > Secrets and variables > Actions). + +2. **Create a version tag** and push it: + ```bash + git tag v0.1.0 + git push origin v0.1.0 + ``` + +The release workflow will build and publish the package automatically. + +## License + +`Apache-2.0` - See [LICENSE](LICENSE) for details. diff --git a/examples/example.py b/examples/example.py new file mode 100644 index 0000000..dda8191 --- /dev/null +++ b/examples/example.py @@ -0,0 +1,12 @@ +# SPDX-FileCopyrightText: 2026-present AUTHOR +# +# SPDX-License-Identifier: Apache-2.0 + +from haystack_integrations.components.example import ExampleComponent + +# This is a minimal example showing how to use the component. +# Replace this with a usage example that demonstrates your component's functionality. +component = ExampleComponent(param="my_param") +result = component.run(input_text="Hello, world!") + +print(result) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..747b062 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,166 @@ +# SPDX-FileCopyrightText: 2026-present AUTHOR +# +# SPDX-License-Identifier: Apache-2.0 + +[build-system] +requires = ["hatchling", "hatch-vcs"] +build-backend = "hatchling.build" + +[project] +name = "example-haystack" # TODO: Replace with your package name, e.g. "deepset-ai-haystack" +dynamic = ["version"] +description = "A custom Haystack component" # TODO: Replace with your description +readme = "README.md" +requires-python = ">=3.10" +license = "Apache-2.0" +keywords = [ + "haystack", + # TODO: Add relevant keywords for your integration +] +authors = [ + # TODO: Replace with your name and email + { name = "AUTHOR", email = "your@email.com" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dependencies = [ + "haystack-ai", + # TODO: Add your integration-specific dependencies here +] + +[project.urls] +# TODO: Replace with your repository URL +Documentation = "https://github.com/your-org/example-haystack#readme" +Issues = "https://github.com/your-org/example-haystack/issues" +Source = "https://github.com/your-org/example-haystack" + +[tool.hatch.version] +source = "vcs" +tag-pattern = "v(?P.*)" + +[tool.hatch.version.raw-options] +git_describe_command = 'git describe --tags --match="v[0-9]*"' + +[tool.hatch.build.targets.wheel] +packages = ["src/haystack_integrations"] + +[tool.hatch.envs.default] +installer = "uv" +dependencies = ["ruff"] + +[tool.hatch.envs.default.scripts] +fmt = "ruff check --fix {args:.} && ruff format {args:.}" +fmt-check = "ruff check {args:.} && ruff format --check {args:.}" + +[tool.hatch.envs.test] +dependencies = [ + "pytest", + "pytest-cov", +] + +[tool.hatch.envs.test.scripts] +unit = 'pytest -m "not integration" {args:tests}' +integration = 'pytest -m "integration" {args:tests}' +all = "pytest {args:tests}" +cov = "pytest --cov=haystack_integrations {args:tests}" + +[tool.ruff] +line-length = 120 + +[tool.ruff.lint] +select = [ + "A", + "ANN", + "ARG", + "B", + "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty + "DTZ", + "E", + "EM", + "F", + "I", + "ICN", + "ISC", + "N", + "PLC", + "PLE", + "PLR", + "PLW", + "Q", + "RUF", + "S", + "T", + "TID", + "UP", + "W", + "YTT", +] +ignore = [ + "B027", # empty method in abstract base class + "S105", # possible hardcoded password + "S106", # possible hardcoded password + "S107", # possible hardcoded password + "C901", # too complex + "PLR0911", # too many return statements + "PLR0912", # too many branches + "PLR0913", # too many arguments + "PLR0915", # too many statements +] + +[tool.ruff.lint.isort] +known-first-party = ["haystack_integrations"] + +[tool.ruff.lint.flake8-tidy-imports] +ban-relative-imports = "parents" + +[tool.ruff.lint.per-file-ignores] +# Tests can use magic values, assertions, and relative imports +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] +# Examples can print their output and don't need type annotations +"examples/**/*" = ["D", "T201", "ANN"] + +[tool.mypy] +install_types = true +non_interactive = true +check_untyped_defs = true +disallow_incomplete_defs = true + +[[tool.mypy.overrides]] +module = ["haystack.*"] +ignore_missing_imports = true + +[tool.coverage.run] +source = ["haystack_integrations"] +branch = true +parallel = false + +[tool.coverage.report] +omit = ["*/tests/*", "*/__init__.py"] +show_missing = true +exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] + +[tool.pytest.ini_options] +minversion = "6.0" +markers = [ + "unit: unit tests", + "integration: integration tests", +] +addopts = ["--import-mode=importlib"] diff --git a/src/haystack_integrations/components/example/__init__.py b/src/haystack_integrations/components/example/__init__.py new file mode 100644 index 0000000..0989a98 --- /dev/null +++ b/src/haystack_integrations/components/example/__init__.py @@ -0,0 +1,8 @@ +# SPDX-FileCopyrightText: 2026-present AUTHOR +# +# SPDX-License-Identifier: Apache-2.0 + +# TODO: Rename the import to match your component class. +from .example_component import ExampleComponent + +__all__ = ["ExampleComponent"] diff --git a/src/haystack_integrations/components/example/example_component.py b/src/haystack_integrations/components/example/example_component.py new file mode 100644 index 0000000..6b286e1 --- /dev/null +++ b/src/haystack_integrations/components/example/example_component.py @@ -0,0 +1,46 @@ +# SPDX-FileCopyrightText: 2026-present AUTHOR +# +# SPDX-License-Identifier: Apache-2.0 + +from haystack import component + + +# TODO: Rename this class and update the output types and run method to match your use case. +@component +class ExampleComponent: + """ + A custom Haystack component. + + Usage: + ```python + from haystack_integrations.components.example import ExampleComponent + + component = ExampleComponent() + result = component.run(input_text="Hello, world!") + ``` + """ + + def __init__(self, param: str = "default") -> None: + """ + Initialize the component. + + :param param: An example parameter. + """ + self.param = param + + @component.output_types(output=str) + def run(self, input_text: str) -> dict[str, str]: + """ + Process the input and return results. + + :param input_text: The text to process. + :returns: A dictionary with the following keys: + - `output`: The processed text. + """ + # TODO: Implement your component logic here. + result = input_text + return {"output": result} + + # NOTE: Custom `to_dict` and `from_dict` methods are only needed if the default serialization doesn't work + # for your component (e.g. it has non-serializable attributes). For details, see: + # https://docs.haystack.deepset.ai/docs/serialization#default-serialization-behavior diff --git a/tests/test_example.py b/tests/test_example.py new file mode 100644 index 0000000..a4dff1a --- /dev/null +++ b/tests/test_example.py @@ -0,0 +1,40 @@ +# SPDX-FileCopyrightText: 2026-present AUTHOR +# +# SPDX-License-Identifier: Apache-2.0 + +# TODO: Replace these example tests with tests for your own component(s). + +from haystack.core.serialization import component_from_dict, component_to_dict + +from haystack_integrations.components.example import ExampleComponent + + +class TestExampleComponent: + def test_init_default(self): + component = ExampleComponent() + assert component.param == "default" + + def test_init_custom_param(self): + component = ExampleComponent(param="custom") + assert component.param == "custom" + + def test_run(self): + component = ExampleComponent() + result = component.run(input_text="Hello, world!") + assert result == {"output": "Hello, world!"} + + def test_to_dict(self): + component = ExampleComponent(param="custom") + data = component_to_dict(component, "ExampleComponent") + assert data == { + "type": "haystack_integrations.components.example.example_component.ExampleComponent", + "init_parameters": {"param": "custom"}, + } + + def test_from_dict(self): + data = { + "type": "haystack_integrations.components.example.example_component.ExampleComponent", + "init_parameters": {"param": "custom"}, + } + deserialized = component_from_dict(ExampleComponent, data, "ExampleComponent") + assert deserialized.param == "custom"