From 666ee0852d11055837c18066e5ac3eec010166ae Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 23 May 2026 06:30:24 +0000 Subject: [PATCH 1/3] v3.1.0: agent-friendly API, test coverage, and tooling refresh. Implements the v3.1.0 epic (tbd strif-9nyk). The Windows/macOS CI matrix bead (strif-dzxb) is intentionally deferred to a separate effort. API additions (agent/IDE ergonomics): - atomic_write_text() / atomic_write_bytes(): one-call wrappers around atomic_output_file() for the common whole-value write case. - __version__ exposed on the package via importlib.metadata. - Insertion / Replacement are now NamedTuples with named fields (offset/text and start/end/text). Runtime-compatible with positional tuples; see note below on static typing. - HashAlgorithm Literal type on hash_string()/hash_file() params for autocomplete; default stays "sha1" (no behavior change). - Removed the unreachable abbreviate_str/abbreviate_list aliases (they were never exported from the public package). Tests (43 total, up from 26): - New tests/test_atomic_var.py: AtomicVar set/swap/update, copy vs deepcopy independence, updates() context manager + immutable guard, truthiness, value_is_immutable, and a real concurrency test (10 threads x 1000 increments) that would fail without the lock. - hash_file: binary content + >8KB chunked-read path vs hashlib; unsupported-algorithm error. - New tests/test_ids.py: new_uid charset/length-bits math; timestamped uid sorts by creation time. - atomic_output_file {timestamp} backups don't clobber each other. - atomic_write_text/bytes round-trip, make_parents, backup. - Tightened test_string_replace.py: named-field construction, dropped redundant assert messages (per testing guidelines). Tooling: - Bump softprops/action-gh-release v2.6.2 -> v3.0.0 (Node 24 runtime; clears the Node 20 deprecation warning). SHA-pinned. - Bump astral-sh/setup-uv v7.6.0 -> v8.1.0 in CI and publish. SHA-pinned. README: - Document atomic_write_text/bytes; add "Using strif with LLM Agents" subsection; note Insertion/Replacement named fields; version banner. BACKWARD COMPATIBILITY NOTE: The NamedTuple change is fully runtime-compatible (NamedTuple subclasses tuple, so positional construction, unpacking, and indexing all still work). However, callers who statically type a value as list[Insertion]/list[Replacement] and assign bare tuple literals will now see a type error and should switch to the named constructors. Removing the abbreviate_* aliases is a breaking change only for code importing them from the private strif.strif submodule. --- .github/workflows/ci.yml | 2 +- .github/workflows/publish.yml | 4 +- README.md | 45 +++++++++++++++-- src/strif/__init__.py | 20 ++++++-- src/strif/strif.py | 53 ++++++++++++++++---- src/strif/string_replace.py | 14 ++++-- tests/test_atomic_var.py | 91 +++++++++++++++++++++++++++++++++++ tests/test_files.py | 33 +++++++++++++ tests/test_hash.py | 21 ++++++++ tests/test_ids.py | 26 ++++++++++ tests/test_string_replace.py | 81 ++++++++++++++----------------- 11 files changed, 323 insertions(+), 67 deletions(-) create mode 100644 tests/test_atomic_var.py create mode 100644 tests/test_ids.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 43bee95..c8e0dc8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,7 +37,7 @@ jobs: fetch-depth: 0 - name: Install uv (official Astral action) - uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0 + uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 with: # Update this as needed: version: "0.10.2" diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 02d4951..55ce67f 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -24,7 +24,7 @@ jobs: fetch-depth: 0 - name: Install uv (official Astral action) - uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0 + uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 with: version: "0.10.2" enable-cache: true @@ -52,7 +52,7 @@ jobs: # Pinned to commit SHA because this is a single-maintainer third-party action # that runs alongside a publish step with write permissions. Update the SHA # together with the version comment when bumping. - uses: softprops/action-gh-release@3bb12739c298aeb8a4eeaf626c5b8d85266b0e65 # v2.6.2 + uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda # v3.0.0 with: generate_release_notes: true prerelease: ${{ contains(github.ref_name, '-') }} diff --git a/README.md b/README.md index 61544ed..3883c3f 100644 --- a/README.md +++ b/README.md @@ -7,14 +7,16 @@ It is simply a few functions and tricks that have repeatedly shown value in vari projects. The goal is not to give a comprehensive suite of utilities but simply to complement the standard libraries and fill in a few gaps. -✨ **NEW:** **Version 3.0** is out and has additions and updates for Python 3.10-3.13! ✨ +✨ **NEW:** **Version 3.1** adds `atomic_write_text()`/`atomic_write_bytes()`, exposes +`__version__`, and supports Python 3.10-3.14. ✨ ## Key Features - **Atomic file operations** with handling of parent directories and backups. This is essential for thread safety and good hygiene so partial or corrupt outputs are never present in final file locations, even in case a program crashes. - See `atomic_output_file()`, `copyfile_atomic()`. + See `atomic_output_file()`, `atomic_write_text()`, `atomic_write_bytes()`, + `copyfile_atomic()`. - **Abbreviate and quote strings**, which is useful for logging a clean way. See `abbrev_str()`, `single_line()`, `quote_if_needed()`. @@ -43,6 +45,27 @@ The libs are all small so see pydoc strings or code for full docs. > that has some extra functions for pretty, human-readable outputs for objects, sizes, > times and dates, etc. +## Using strif with LLM Agents + +Strif is handy for code that generates files, which is increasingly often AI agent code. + +- **Atomic writes for streamed or generated output.** If a generation is interrupted or + crashes mid-write, you never leave a truncated or corrupt file in its final location. + `atomic_write_text("out.md", content)` is a one-liner for the common case. + +- **Content hashing for caching and dedup.** Use `hash_file()` or `hash_string()` to key + a cache on file contents, or `file_mtime_hash()` for a fast (content-free) cache key. + +- **Sortable, readable run ids.** `new_timestamped_uid()` gives ids that sort by creation + time, which is convenient for logs and scratch directories. + +```python +from strif import atomic_write_text + +# Safe even if the process dies partway through writing: +atomic_write_text("some-dir/output.md", generated_text, make_parents=True) +``` + ## Installation ```sh @@ -178,6 +201,13 @@ pip install strif Moves a file to a new location, automatically creating parent directories and optionally keeping a backup of the destination if it already exists. +- **`atomic_write_text(dest_path, text, make_parents=False, backup_suffix=None, + encoding='utf-8')`** and **`atomic_write_bytes(dest_path, data, make_parents=False, + backup_suffix=None)`** + + Convenience wrappers around `atomic_output_file()` for the common case of writing a + whole string or bytes value atomically in a single call. + For example, it is generally a good idea to wrap an `open()` call with `atomic_output_file()`: @@ -187,6 +217,12 @@ with atomic_output_file("some-dir/my-final-output.txt") as temp_target: f.write("some contents") ``` +Or, for the common whole-value case, just: + +```python +atomic_write_text("some-dir/my-final-output.txt", "some contents") +``` + And this can (and in most cases should) be used in place of `shutil.copyfile`: ```python @@ -310,6 +346,9 @@ Examples: ## Multiple String Replacements +`Insertion` and `Replacement` are `NamedTuple`s, so you can use named fields +(`Insertion(offset, text)`, `Replacement(start, end, text)`) or plain positional tuples. + - **`insert_multiple(text: str, insertions: list[Insertion]) -> str`** Insert multiple strings into `text` at the given offsets, at once. @@ -317,7 +356,7 @@ Examples: - **`replace_multiple(text: str, replacements: list[Replacement]) -> str`** Replace multiple substrings in `text` with new strings, simultaneously. - The replacements are a list of tuples (start_offset, end_offset, new_string). + Each `Replacement` is `(start_offset, end_offset, new_string)`. ## FAQ diff --git a/src/strif/__init__.py b/src/strif/__init__.py index 90e0a41..cd2ec05 100644 --- a/src/strif/__init__.py +++ b/src/strif/__init__.py @@ -1,4 +1,7 @@ +from importlib.metadata import PackageNotFoundError, version + __all__ = ( # noqa: F405 + "__version__", # atomic_var.py "AtomicVar", # strif.py @@ -11,6 +14,7 @@ "clean_alphanum_hash", "file_mtime_hash", "base36_encode", + "HashAlgorithm", "Hash", "hash_string", "hash_file", @@ -25,6 +29,8 @@ "move_file", "make_parent_dirs", "atomic_output_file", + "atomic_write_text", + "atomic_write_bytes", "temp_output_file", "temp_output_dir", "copyfile_atomic", @@ -41,7 +47,13 @@ "StringTemplate", ) -from .atomic_var import * # noqa: F403 -from .strif import * # noqa: F403 -from .string_replace import * # noqa: F403 -from .string_template import * # noqa: F403 +try: + __version__ = version("strif") +except PackageNotFoundError: + # Running from a source tree that isn't installed. + __version__ = "0.0.0.dev0" + +from .atomic_var import * # noqa: F403, E402 +from .strif import * # noqa: F403, E402 +from .string_replace import * # noqa: F403, E402 +from .string_template import * # noqa: F403, E402 diff --git a/src/strif/strif.py b/src/strif/strif.py index c374298..8bde1c9 100644 --- a/src/strif/strif.py +++ b/src/strif/strif.py @@ -18,7 +18,7 @@ from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path -from typing import Any +from typing import Any, Literal __all__ = ( "DEV_NULL", @@ -30,6 +30,7 @@ "clean_alphanum_hash", "file_mtime_hash", "base36_encode", + "HashAlgorithm", "Hash", "hash_string", "hash_file", @@ -44,6 +45,8 @@ "move_file", "make_parent_dirs", "atomic_output_file", + "atomic_write_text", + "atomic_write_bytes", "temp_output_file", "temp_output_dir", "copyfile_atomic", @@ -183,6 +186,10 @@ def base36_encode(n: int) -> str: return encoded +HashAlgorithm = Literal["sha1", "sha256", "sha384", "sha512", "md5", "blake2b", "blake2s"] +"""Common hash algorithms, for autocompletion. Any name `hashlib` accepts also works.""" + + @dataclass(frozen=True) class Hash: """ @@ -222,7 +229,7 @@ def with_prefix(self) -> str: return f"{self.algorithm}:{self.hex}" -def hash_string(string: str, algorithm: str = "sha1") -> Hash: +def hash_string(string: str, algorithm: HashAlgorithm | str = "sha1") -> Hash: """ Flexible hash of a string. """ @@ -231,7 +238,7 @@ def hash_string(string: str, algorithm: str = "sha1") -> Hash: return Hash(algorithm, hasher.digest()) -def hash_file(file_path: str | Path, algorithm: str = "sha1") -> Hash: +def hash_file(file_path: str | Path, algorithm: HashAlgorithm | str = "sha1") -> Hash: """ Hash the content of a file. """ @@ -286,13 +293,6 @@ def abbrev_list( return joiner.join(shortened) -abbreviate_str = abbrev_str -"""Deprecated. Use `abbrev_str()` instead.""" - -abbreviate_list = abbrev_list -"""Deprecated. Use `abbrev_list()` instead.""" - - def single_line(text: str) -> str: """ Convert newlines and other whitespace to spaces. @@ -569,6 +569,39 @@ def atomic_output_file( tmp_path.replace(dest_path) +def atomic_write_text( + dest_path: str | Path, + text: str, + make_parents: bool = False, + backup_suffix: str | None = None, + encoding: str = "utf-8", +) -> None: + """ + Atomically write a string to a file, so a partial or corrupt file never appears + at `dest_path`. Convenience wrapper around `atomic_output_file()`. + """ + with atomic_output_file( + dest_path, make_parents=make_parents, backup_suffix=backup_suffix + ) as tmp_path: + tmp_path.write_text(text, encoding=encoding) + + +def atomic_write_bytes( + dest_path: str | Path, + data: bytes, + make_parents: bool = False, + backup_suffix: str | None = None, +) -> None: + """ + Atomically write bytes to a file, so a partial or corrupt file never appears + at `dest_path`. Convenience wrapper around `atomic_output_file()`. + """ + with atomic_output_file( + dest_path, make_parents=make_parents, backup_suffix=backup_suffix + ) as tmp_path: + tmp_path.write_bytes(data) + + @contextmanager def temp_output_file( prefix: str = "tmp", diff --git a/src/strif/string_replace.py b/src/strif/string_replace.py index 51a7b5d..f7ae81a 100644 --- a/src/strif/string_replace.py +++ b/src/strif/string_replace.py @@ -1,8 +1,13 @@ -from typing import TypeAlias +from __future__ import annotations + +from typing import NamedTuple __all__ = ["Insertion", "insert_multiple", "Replacement", "replace_multiple"] -Insertion = tuple[int, str] + +class Insertion(NamedTuple): + offset: int + text: str def insert_multiple(text: str, insertions: list[Insertion]) -> str: @@ -19,7 +24,10 @@ def insert_multiple(text: str, insertions: list[Insertion]) -> str: return "".join(chunks) -Replacement: TypeAlias = tuple[int, int, str] +class Replacement(NamedTuple): + start: int + end: int + text: str def replace_multiple(text: str, replacements: list[Replacement]) -> str: diff --git a/tests/test_atomic_var.py b/tests/test_atomic_var.py new file mode 100644 index 0000000..c6a7e1a --- /dev/null +++ b/tests/test_atomic_var.py @@ -0,0 +1,91 @@ +import threading +from dataclasses import dataclass + +from strif import AtomicVar +from strif.atomic_var import value_is_immutable + + +def test_atomic_var_set_swap_update(): + var = AtomicVar(0) + var.set(5) + assert var.value == 5 + assert var.swap(10) == 5 + assert var.value == 10 + # update() with a returning function. + assert var.update(lambda x: x + 1) == 11 + # update() with an in-place mutation returns None -> value unchanged reference. + lst = AtomicVar([1, 2]) + assert lst.update(lambda x: x.append(3)) == [1, 2, 3] + + +def test_atomic_var_copy_independence(): + var = AtomicVar([[1], [2]]) + shallow = var.copy() + deep = var.deepcopy() + var.value[0].append(99) + # Shallow copy shares inner lists; deep copy does not. + assert shallow[0] == [1, 99] + assert deep[0] == [1] + + +def test_atomic_var_updates_context_manager(): + var = AtomicVar([1, 2, 3]) + with var.updates() as value: + value.append(4) + assert var.value == [1, 2, 3, 4] + + +def test_atomic_var_updates_rejects_immutable(): + var = AtomicVar(0) + try: + with var.updates(): + pass + raise AssertionError("updates() should reject immutable values") + except ValueError: + pass + + +def test_atomic_var_truthiness(): + assert not AtomicVar(0) + assert AtomicVar(1) + assert not AtomicVar([]) + assert AtomicVar([1]) + + +def test_value_is_immutable(): + assert value_is_immutable(0) + assert value_is_immutable("x") + assert value_is_immutable((1, 2)) + assert not value_is_immutable([1, 2]) + assert not value_is_immutable({}) + + @dataclass(frozen=True) + class Frozen: + x: int + + @dataclass + class Mutable: + x: int + + assert value_is_immutable(Frozen(1)) + assert not value_is_immutable(Mutable(1)) + + +def test_atomic_var_concurrent_updates(): + # Without the lock serializing the read-modify-write, the final count would be + # less than the expected total due to lost updates. + var = AtomicVar(0) + threads_count = 10 + increments = 1000 + + def worker(): + for _ in range(increments): + var.update(lambda x: x + 1) + + threads = [threading.Thread(target=worker) for _ in range(threads_count)] + for t in threads: + t.start() + for t in threads: + t.join() + + assert var.value == threads_count * increments diff --git a/tests/test_files.py b/tests/test_files.py index 2120fcd..43ffb77 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -5,6 +5,8 @@ from strif import ( atomic_output_file, + atomic_write_bytes, + atomic_write_text, copy_to_backup, is_truthy, move_file, @@ -65,6 +67,37 @@ def test_atomic_output_file_force_replaces_dir(tmp_path: Path): assert target.read_text() == "now a file" +def test_atomic_output_file_timestamp_backups_do_not_clobber(tmp_path: Path): + out = tmp_path / "out.txt" + out.write_text("v1") + with atomic_output_file(out, backup_suffix="{timestamp}.bak") as tmp: + tmp.write_text("v2") + with atomic_output_file(out, backup_suffix="{timestamp}.bak") as tmp: + tmp.write_text("v3") + # Each write keeps its own uniquely-named backup, so both prior versions survive. + backups = sorted(p.read_text() for p in tmp_path.glob("out.txt*bak")) + assert out.read_text() == "v3" + assert backups == ["v1", "v2"] + + +def test_atomic_write_text_and_bytes(tmp_path: Path): + text_path = tmp_path / "a.txt" + atomic_write_text(text_path, "hello") + assert text_path.read_text() == "hello" + + bytes_path = tmp_path / "b.bin" + atomic_write_bytes(bytes_path, b"\x00\x01\x02") + assert bytes_path.read_bytes() == b"\x00\x01\x02" + + +def test_atomic_write_text_make_parents_and_backup(tmp_path: Path): + nested = tmp_path / "sub" / "a.txt" + atomic_write_text(nested, "first", make_parents=True) + atomic_write_text(nested, "second", backup_suffix=".bak") + assert nested.read_text() == "second" + assert (tmp_path / "sub" / "a.txt.bak").read_text() == "first" + + @pytest.mark.skipif(not os.path.exists("/proc/self/fd"), reason="Linux-only fd accounting") def test_temp_output_file_no_fd_leak(): before = len(os.listdir("/proc/self/fd")) diff --git a/tests/test_hash.py b/tests/test_hash.py index 52f43c3..ce09061 100644 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -1,4 +1,8 @@ +import hashlib import os +from pathlib import Path + +import pytest from strif import hash_file, hash_string @@ -16,3 +20,20 @@ def test_hash_file(): assert hash_string("Hello, World!").with_prefix == result_hash assert hash_string("Hello, World!").base64 == "CgqfKmdylCVXq1NV12r0Qvj2XgE=" + + +def test_hash_file_binary_and_chunked(tmp_path: Path): + # Non-UTF8 bytes plus a payload larger than the 8192-byte read chunk, to exercise + # the chunked read loop and confirm binary content hashes identically to hashlib. + payload = bytes(range(256)) * 500 # 128 KB + file_path = tmp_path / "blob.bin" + file_path.write_bytes(payload) + + assert hash_file(file_path, "sha256").hex == hashlib.sha256(payload).hexdigest() + + +def test_hash_file_unsupported_algorithm(tmp_path: Path): + file_path = tmp_path / "f.txt" + file_path.write_text("x") + with pytest.raises(ValueError): + hash_file(file_path, "not-a-real-algo") diff --git a/tests/test_ids.py b/tests/test_ids.py new file mode 100644 index 0000000..c3d2d20 --- /dev/null +++ b/tests/test_ids.py @@ -0,0 +1,26 @@ +import re + +from strif import new_timestamped_uid, new_uid + +_BASE36 = re.compile(r"^[0-9a-z]+$") + + +def test_new_uid_charset_and_length(): + assert _BASE36.match(new_uid()) + # Length follows int(bits / 5.16) + 1 over the 36-char alphabet. + assert len(new_uid(32)) == int(32 / 5.16) + 1 + assert len(new_uid(64)) == int(64 / 5.16) + 1 + assert len(new_uid(128)) > len(new_uid(64)) + + +def test_new_uid_is_random(): + assert new_uid() != new_uid() + + +def test_new_timestamped_uid_sorts_by_time(): + first = new_timestamped_uid() + second = new_timestamped_uid() + # Timestamp prefix means lexical order tracks creation order. + assert first < second + # Starts with a UTC date like 20150912T... + assert re.match(r"^\d{8}T\d{6}", first) diff --git a/tests/test_string_replace.py b/tests/test_string_replace.py index 2a0e816..a0fcc65 100644 --- a/tests/test_string_replace.py +++ b/tests/test_string_replace.py @@ -1,53 +1,46 @@ -from strif.string_replace import Insertion, Replacement, insert_multiple, replace_multiple +import pytest +from strif.string_replace import Insertion, Replacement, insert_multiple, replace_multiple -def test_insert_multiple(): - text = "hello world" - insertions: list[Insertion] = [(5, ",")] - expected = "hello, world" - assert insert_multiple(text, insertions) == expected, "Single insertion failed" - text = "hello world" - insertions = [(0, "Start "), (11, " End")] - expected = "Start hello world End" - assert insert_multiple(text, insertions) == expected, "Multiple insertions failed" +def test_named_tuple_fields_and_positional_compat(): + # NamedTuple gives named access while staying tuple-compatible, so both the named + # and positional/unpacking APIs must work. + ins = Insertion(offset=5, text=",") + assert ins.offset == 5 and ins.text == "," + assert ins == (5, ",") - text = "short" - insertions = [(10, " end")] - expected = "short end" - assert insert_multiple(text, insertions) == expected, "Out of bounds insertion failed" + start, end, text = Replacement(0, 3, "x") + assert (start, end, text) == (0, 3, "x") - text = "negative test" - insertions = [(-1, "ss")] - expected = "negative tessst" - assert insert_multiple(text, insertions) == expected, "Negative offset insertion failed" - text = "no change" - insertions = [] - expected = "no change" - assert insert_multiple(text, insertions) == expected, "Empty insertions failed" +def test_insert_multiple(): + assert insert_multiple("hello world", [Insertion(5, ",")]) == "hello, world" + assert ( + insert_multiple("hello world", [Insertion(0, "Start "), Insertion(11, " End")]) + == "Start hello world End" + ) + # Out-of-bounds offset clamps to the end. + assert insert_multiple("short", [Insertion(10, " end")]) == "short end" + # Negative offset indexes from the end. + assert insert_multiple("negative test", [Insertion(-1, "ss")]) == "negative tessst" + assert insert_multiple("no change", []) == "no change" def test_replace_multiple(): - text = "The quick brown fox" - replacements: list[Replacement] = [(4, 9, "slow"), (16, 19, "dog")] - expected = "The slow brown dog" - assert replace_multiple(text, replacements) == expected, "Multiple replacements failed" - - text = "overlap test" - replacements = [(0, 6, "start"), (5, 10, "end")] - try: - replace_multiple(text, replacements) - raise AssertionError("Overlapping replacements did not raise ValueError") - except ValueError: - pass # Expected exception - - text = "short text" - replacements = [(5, 10, " longer text")] - expected = "short longer text" - assert replace_multiple(text, replacements) == expected, "Out of bounds replacement failed" - - text = "no change" - replacements = [] - expected = "no change" - assert replace_multiple(text, replacements) == expected, "Empty replacements failed" + assert ( + replace_multiple( + "The quick brown fox", [Replacement(4, 9, "slow"), Replacement(16, 19, "dog")] + ) + == "The slow brown dog" + ) + # Out-of-bounds end clamps to the end. + assert ( + replace_multiple("short text", [Replacement(5, 10, " longer text")]) == "short longer text" + ) + assert replace_multiple("no change", []) == "no change" + + +def test_replace_multiple_rejects_overlap(): + with pytest.raises(ValueError): + replace_multiple("overlap test", [Replacement(0, 6, "start"), Replacement(5, 10, "end")]) From b1b1fec9de66e74f5228c2e6052b150c8f9f207b Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 23 May 2026 07:58:22 +0000 Subject: [PATCH 2/3] Convert test_hash_file to tmp_path fixture. Was writing to a relative tmp/ dir (gitignored, but pollutes cwd and is inconsistent with the other tmp_path-based tests in the same file). Same assertions, cleaner isolation. --- tests/test_hash.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tests/test_hash.py b/tests/test_hash.py index ce09061..c4ecdc0 100644 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -1,5 +1,4 @@ import hashlib -import os from pathlib import Path import pytest @@ -7,12 +6,9 @@ from strif import hash_file, hash_string -def test_hash_file(): - os.makedirs("tmp", exist_ok=True) - file_path = "tmp/test_file.txt" - - with open(file_path, "w") as f: - f.write("Hello, World!") +def test_hash_file(tmp_path: Path): + file_path = tmp_path / "test_file.txt" + file_path.write_text("Hello, World!") result_hash = hash_file(file_path, "sha1").with_prefix assert result_hash == "sha1:0a0a9f2a6772942557ab5355d76af442f8f65e01" From e6fddc5c58b9d5ea7e44f1c3d0f1c8167a379747 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 23 May 2026 19:48:37 +0000 Subject: [PATCH 3/3] Address PR review: hash_file spellings, sdist hygiene, test/doc fixes. From external review of PR #8: - hash_file(): remove the `algorithm not in hashlib.algorithms_available` precheck. It rejected valid hashlib spellings like "SHA1" and "sha-1" (which are NOT in algorithms_available but DO work via hashlib.new), and was inconsistent with hash_string(). hashlib.new() still raises ValueError for genuinely unknown algorithms, so the contract is unchanged for callers. - pyproject.toml: add [tool.hatch.build.targets.sdist] exclude so the published source distribution no longer ships .claude/.tbd/.github/ .copier-answers.yml agent+tooling state. - tests/test_ids.py: make the timestamped-uid test deterministic. Assert the sortable timestamp-prefix format and base36 suffix plus uniqueness, instead of comparing two rapidly-created ids (which could tie on timestamp precision and fall through to the random suffix). - README.md: fix sf.write -> f.write typo in the backup_suffix example. Not changed: kept the abbreviate_str/abbreviate_list removal (maintainer confirmed); will be noted in the v3.1.0 release notes. --- README.md | 2 +- pyproject.toml | 4 ++++ src/strif/strif.py | 3 --- tests/test_ids.py | 15 ++++++++------- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 3883c3f..19d74d1 100644 --- a/README.md +++ b/README.md @@ -240,7 +240,7 @@ There are also some handy additional options: with atomic_output_file("some-dir/my-final-output.txt", make_parents=True, backup_suffix=".old.{timestamp}") as temp_target: with open(temp_target, "w") as f: - sf.write("some contents") + f.write("some contents") ``` This creates parent folders as needed (a major convenience). diff --git a/pyproject.toml b/pyproject.toml index e4195ae..35975a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,6 +81,10 @@ bump = true # The source location for the package. packages = ["src/strif"] +[tool.hatch.build.targets.sdist] +# Keep agent/tooling state out of the published source distribution. +exclude = [".claude", ".tbd", ".github", ".copier-answers.yml", "attic"] + # ---- Settings ---- diff --git a/src/strif/strif.py b/src/strif/strif.py index 8bde1c9..33d89eb 100644 --- a/src/strif/strif.py +++ b/src/strif/strif.py @@ -242,9 +242,6 @@ def hash_file(file_path: str | Path, algorithm: HashAlgorithm | str = "sha1") -> """ Hash the content of a file. """ - if algorithm not in hashlib.algorithms_available: - raise ValueError(f"Unsupported hash algorithm: {algorithm}") - hasher = hashlib.new(algorithm) file_path = Path(file_path) with file_path.open("rb") as file: diff --git a/tests/test_ids.py b/tests/test_ids.py index c3d2d20..1e7a8d1 100644 --- a/tests/test_ids.py +++ b/tests/test_ids.py @@ -17,10 +17,11 @@ def test_new_uid_is_random(): assert new_uid() != new_uid() -def test_new_timestamped_uid_sorts_by_time(): - first = new_timestamped_uid() - second = new_timestamped_uid() - # Timestamp prefix means lexical order tracks creation order. - assert first < second - # Starts with a UTC date like 20150912T... - assert re.match(r"^\d{8}T\d{6}", first) +def test_new_timestamped_uid_format(): + uid = new_timestamped_uid() + # The id starts with a fixed-width UTC timestamp (e.g. 20150912T084555Z-...), which + # is what makes these ids sort by creation time lexically. A random suffix follows. + prefix, _, suffix = uid.partition("-") + assert re.match(r"^\d{8}T\d{6}", prefix) + assert _BASE36.match(suffix.rsplit("-", 1)[-1]) + assert new_timestamped_uid() != new_timestamped_uid()