diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4e1e5cd..8a56b83 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -34,6 +34,8 @@ jobs: - name: Install Project Dependencies run: | make install + - name: Install spaCy English model + run: uv pip install "https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0-py3-none-any.whl" - name: Run Tests run: | make test.coverage diff --git a/.zenodo.json b/.zenodo.json index 8ce5a32..2d2f78c 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -1,6 +1,6 @@ { "access_right": "open", - "version": "0.6.0", + "version": "0.6.1", "creators": [ { "orcid": "0000-0003-0665-098X", diff --git a/CITATION.cff b/CITATION.cff index bdae432..6583218 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -5,6 +5,6 @@ authors: given-names: Eidan J. orcid: https://orcid.org/0000-0003-0665-098X title: "pii-codex: a Python library for PII detection, categorization, and severity assessment" -version: 0.6.0 +version: 0.6.1 doi: 10.5281/zenodo.7212576 -date-released: 2026-02-13 +date-released: 2026-02-15 diff --git a/README.md b/README.md index efbf1ce..17aa508 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ This project uses `uv` for dependency management. Install [uv](https://docs.astr make install ``` -This runs `uv sync --extra dev --extra detections` so you get the base package, dev tools (pytest, black, pylint, etc.), and detection extras (spaCy, Presidio Analyzer/Anonymizer). The spaCy model `en_core_web_lg` is included in the `detections` extra and is installed automatically; you do not need to run `spacy download` yourself. If for some reason the model is missing at runtime, the code will attempt to install it (via `spacy download` or, in uv-managed venvs without pip, via `uv pip install` and a known wheel URL). +This runs `uv sync --extra dev --extra detections` so you get the base package, dev tools (pytest, black, pylint, etc.), and detection extras (spaCy, Presidio Analyzer/Anonymizer). Install the spaCy English model for detection: `python -m spacy download en_core_web_lg` (or `uv pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0-py3-none-any.whl`). If the model is missing at runtime, the code may attempt to install it when possible. For more detail, see [docs/LOCAL_SETUP.md](docs/LOCAL_SETUP.md). This project has been tested on Ubuntu and macOS with Python 3.11 and 3.12. @@ -62,7 +62,7 @@ uv add pii-codex uv add "pii-codex[detections]" ``` -The `[detections]` extra installs spaCy, Microsoft Presidio Analyzer and Anonymizer, and the `en_core_web_lg` model (via a direct wheel URL), so detection works out of the box. If you install without the extra and later use detection features, the code will try to install the model on first use when possible. +The `[detections]` extra installs spaCy and Microsoft Presidio Analyzer/Anonymizer. Install the spaCy English model for detection: `python -m spacy download en_core_web_lg`. If you install without the extra and later use detection features, the code may try to install the model on first use when possible. For those using Google Collab, check out the example notebook: diff --git a/pii_codex/__init__.py b/pii_codex/__init__.py index 906d362..43c4ab0 100644 --- a/pii_codex/__init__.py +++ b/pii_codex/__init__.py @@ -1 +1 @@ -__version__ = "0.6.0" +__version__ = "0.6.1" diff --git a/pyproject.toml b/pyproject.toml index 14cde51..2cb7064 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "pii-codex" -version = "0.6.0" +version = "0.6.1" description = "PII Detection, Categorization, and Severity Assessment" authors = [ {name = "Eidan J. Rosado"} @@ -31,7 +31,6 @@ detections = [ "thinc>=8.3.10", # 8.3.10+ has Python 3.13 wheels; older builds fail on 3.13 C API "presidio-analyzer>=2.2.361", "presidio-anonymizer>=2.2.361", - "en-core-web-lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0-py3-none-any.whl", ] dev = [ "pytest>=7.4.0,<8.0.0", @@ -60,7 +59,7 @@ Homepage = "https://github.com/EdyVision/pii-codex" Repository = "https://github.com/EdyVision/pii-codex" [bumpver] -current_version = "0.6.0" +current_version = "0.6.1" version_pattern = "MAJOR.MINOR.PATCH" files = [ "pyproject.toml", diff --git a/uv.lock b/uv.lock index c570c5f..fe7fe09 100644 --- a/uv.lock +++ b/uv.lock @@ -930,14 +930,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" }, ] -[[package]] -name = "en-core-web-lg" -version = "3.8.0" -source = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0-py3-none-any.whl" } -wheels = [ - { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0-py3-none-any.whl", hash = "sha256:293e9547a655b25499198ab15a525b05b9407a75f10255e405e8c3854329ab63" }, -] - [[package]] name = "exceptiongroup" version = "1.3.0" @@ -2437,7 +2429,6 @@ dependencies = [ [package.optional-dependencies] detections = [ - { name = "en-core-web-lg" }, { name = "presidio-analyzer" }, { name = "presidio-anonymizer" }, { name = "spacy" }, @@ -2476,7 +2467,6 @@ requires-dist = [ { name = "black", marker = "extra == 'dev'", specifier = ">=23.0.0,<24.0.0" }, { name = "coverage", marker = "extra == 'dev'", specifier = ">=7.2.0,<8.0.0" }, { name = "dataclasses-json", specifier = ">=0.5.7,<0.6.0" }, - { name = "en-core-web-lg", marker = "extra == 'detections'", url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0-py3-none-any.whl" }, { name = "faker", marker = "extra == 'dev'", specifier = ">=19.0.0,<20.0.0" }, { name = "importlib-resources", marker = "extra == 'dev'", specifier = ">=6.0.0,<7.0.0" }, { name = "ipykernel", marker = "extra == 'dev'", specifier = ">=6.25.0,<7.0.0" },