borgbackup · mr-raj12 · Jun 27, 2026 · ThomasWaldmann · Jun 27, 2026 · ThomasWaldmann
diff --git a/src/borg/archive.py b/src/borg/archive.py
@@ -1841,11 +1841,7 @@ def verify_data(self):
         pi.finish()
         if defect_chunks:
             if self.repair:
-                # We would remove the defect chunks here, but single-object delete is not implemented
-                # yet (Repository.delete is a no-op: a pack holds multiple objects, so dropping the
-                # whole pack to drop one chunk would take the good ones with it). So we recheck each
-                # chunk and only report the ones that keep failing; real removal will come via compact.
-                logger.warning("Found defect chunks. They can not be removed yet and are only reported.")
+                logger.warning("Found defect chunks, removing them from the repository.")
                 for defect_chunk in defect_chunks:
                     # remote repo (ssh): retry might help for strange network / NIC / RAM errors
                     # as the chunk will be retransmitted from remote server.
@@ -1857,18 +1853,16 @@ def verify_data(self):
                         # we must decompress, so it'll call assert_id() in there:
                         self.repo_objs.parse(defect_chunk, encrypted_data, decompress=True, ro_type=ROBJ_DONTCARE)
                     except IntegrityErrorBase:
-                        # failed twice -> we would like to get rid of this defect chunk. We must not
-                        # drop its whole pack here: the pack holds other, good chunks too.
-                        # Repository.delete is a no-op for now (it just logs); real single-object
-                        # removal will happen via compact.
-                        # TODO: actually remove the defect chunk once single-object delete exists.
-                        self.repository.delete(defect_chunk)
+                        # failed twice -> remove this defect chunk. delete rewrites its pack without it,
+                        # keeping the other chunks. persist=False: finish() rebuilds the index from the
+                        # rewritten packs anyway, so a per-chunk full index write would be wasted.
+                        self.repository.delete(defect_chunk, persist=False)
+                        # drop it from our own index too, so rebuild_archives reports the file it belongs to.
+                        del self.chunks[defect_chunk]
                     else:
                         logger.warning("chunk %s not deleted, did not consistently fail.", bin_to_hex(defect_chunk))
             else:
-                logger.warning(
-                    "Found defect chunks. Run with --repair to recheck them (removal is not implemented yet)."
-                )
+                logger.warning("Found defect chunks. Run with --repair to remove them.")
                 for defect_chunk in defect_chunks:
                     logger.debug("chunk %s is defect.", bin_to_hex(defect_chunk))
         log = logger.error if errors else logger.info

diff --git a/src/borg/repository.py b/src/borg/repository.py
@@ -798,48 +798,46 @@ def put(self, id, data):
         # PackWriter shares this repository's index, so add() triggers the lazy build itself.
         return self._pack_writer.add(id, data)
 
-    def delete(self, id):
+    def delete(self, id, *, persist=True):
         """Delete a single repo object by rewriting its pack without it.
 
-        A pack holds several objects, so we can not just drop the whole pack: that would take the
-        target's innocent neighbours with it. Route through compact_pack, which copies the survivors
-        into a new pack and repoints them in the chunk index. Rewriting a whole pack to remove one
-        object is slow, but delete is only used by special-purpose callers (tests, the debug command,
-        check --repair), never on a hot path, so the cost does not matter.
+        A pack holds several objects, so we rewrite it via compact_pack(complete=False) rather than
+        dropping the whole pack. With persist=True the full chunk index is written back so the next
+        borg process sees the deletion; bulk callers that rebuild the index themselves (check --repair)
+        pass persist=False to avoid a full index rewrite per deleted object.
         """
         self._lock_refresh()
-        from .cache import write_chunkindex_to_repo
-
         entry = self.chunks.get(id)
         if entry is None:
             raise self.ObjectNotFound(id, str(self._location))
         pack_id = entry.pack_id
-        # keep every other object in the same pack; compact_pack rewrites the pack without the target.
-        # work off the live index (each entry already carries its offset/size after flush): do not
-        # rebuild from the packs here, that could not tell the target apart from stale duplicate copies
-        # of the same id left in other packs by re-puts (build keeps an arbitrary one of them).
+        # build keep_ids from the live index (offsets are filled in after flush), not from the packs:
+        # a rebuild could not tell the target from stale duplicates that re-puts left in other packs.
         keep_ids = {cid for cid, e in self.chunks.iteritems() if e.pack_id == pack_id}
         keep_ids.discard(id)
-        self.compact_pack(pack_id, keep_ids=keep_ids, drop_ids={id})
-        # persist a full index so a following borg process sees the deletion; close() only writes new
-        # entries incrementally and would not record the removal.
-        write_chunkindex_to_repo(self, self.chunks, incremental=False, force_write=True, delete_other=True)
+        self.compact_pack(pack_id, keep_ids=keep_ids, drop_ids={id}, complete=False)
+        if persist:
+            # close() only persists new entries incrementally, so write the full index here to record
+            # the removal for the next borg process.
+            from .cache import write_chunkindex_to_repo
+
+            write_chunkindex_to_repo(self, self.chunks, incremental=False, force_write=True, delete_other=True)
 
-    def compact_pack(self, pack_id, *, keep_ids: set, drop_ids: set):
+    def compact_pack(self, pack_id, *, keep_ids: set, drop_ids: set, complete: bool = True):
         """Rewrite pack <pack_id>, keeping <keep_ids> and dropping <drop_ids>, then delete the old pack.
 
-        keep_ids and drop_ids are sets of chunk ids that must together be every object of the pack, as
-        recorded in the chunk index. The caller guarantees that completeness (both callers build the two
-        sets by iterating the index for this pack_id); here we only assert their ranges tile contiguously
-        from offset 0 with no gap or overlap, and that their intersection is empty. Kept objects are
-        copied into a new pack via store.defrag and repointed in the chunk index; dropped objects' index
-        entries are removed.
+        keep_ids and drop_ids come from the caller iterating the chunk index for this pack_id; they
+        must not overlap. With complete=True (compact) they are the pack's whole object set, so their
+        ranges must tile it from offset 0 with no gap or overlap. With complete=False (delete) the index
+        view may be partial (a re-put can repoint an earlier object to another pack while its bytes stay
+        here), so we copy only the known survivors at their real offsets and allow gaps. Kept objects are
+        copied into a new pack via store.defrag and repointed; dropped objects' index entries are removed.
 
         Returns the new pack_id, None if nothing is kept (pack dropped), or <pack_id> unchanged if the
         kept objects reproduce the old pack (same sha256 name, nothing to delete).
 
-        Updates the in-memory chunk index only. The caller holds the exclusive lock and owns index
-        durability: invalidate the cached index before calling, write it back after, as compact does.
+        Updates the in-memory chunk index only; the caller holds the exclusive lock and owns index
+        durability (invalidate before, write back after, as compact does).
         """
         self._lock_refresh()
         pack_key = "packs/" + bin_to_hex(pack_id)
@@ -855,13 +853,12 @@ def compact_pack(self, pack_id, *, keep_ids: set, drop_ids: set):
             located.append((entry.obj_offset, obj_id, entry.obj_size, keep))
         located.sort()
 
-        # keep + drop tile the pack contiguously from offset 0; collect the objects to keep in the same
-        # pass. we do not cross-check against the pack's on-disk size: the caller already guarantees the
-        # two sets are the pack's complete object set.
+        # walk objects in offset order, collecting the survivors.
         kept = []  # (obj_offset, obj_id, obj_size), offset-ordered
         covered = 0
         for offset, obj_id, size, keep in located:
-            assert offset == covered, f"gap or overlap in pack {bin_to_hex(pack_id)} at offset {covered}"
+            if complete:  # keep+drop are the whole pack: they must tile it with no gap or overlap
+                assert offset == covered, f"gap or overlap in pack {bin_to_hex(pack_id)} at offset {covered}"
             covered += size
             if keep:
                 kept.append((offset, obj_id, size))

diff --git a/src/borg/testsuite/archiver/check_cmd_test.py b/src/borg/testsuite/archiver/check_cmd_test.py
@@ -374,7 +374,6 @@ def test_extra_chunks(archivers, request):
     cmd(archiver, "check", "-v", exit_code=0)  # check does not deal with orphans anymore
 
 
-@pytest.mark.skip(reason="TODO: test broken due to packs refactoring")
 @pytest.mark.parametrize("init_args", [["--encryption=aes256-ocb"], ["--encryption", "none"]])
 def test_verify_data(archivers, request, init_args):
     archiver = request.getfixturevalue(archivers)
@@ -390,10 +389,14 @@ def test_verify_data(archivers, request, init_args):
         for item in archive.iter_items():
             if item.path.endswith(src_file):
                 chunk = item.chunks[-1]
-                data = repository.get(chunk.id)
-                data = corrupt(data, 123)
+                data = corrupt(repository.get(chunk.id), 123)
+                # re-putting the same id would not repoint the index: close() persists new index
+                # entries incrementally, not updates to existing ones, so the next process would still
+                # read the old, good copy. delete first, then put the corrupted bytes as a fresh entry.
+                repository.delete(chunk.id)
                 repository.put(chunk.id, data)
                 break
+        repository.flush()  # make the corrupted put durable before close()
 
     # the normal archives check does not read file content data.
     cmd(archiver, "check", "--archives-only", exit_code=0)

diff --git a/src/borg/testsuite/repository_test.py b/src/borg/testsuite/repository_test.py
@@ -150,6 +150,22 @@ def test_consistency(repo_fixtures, request):
             repository.get(H(0))
 
 
+def test_delete_with_stale_earlier_object_in_pack(repo_fixtures, request):
+    # Re-putting H(0) leaves its old bytes ahead of H(1) in the first pack while its index entry moves
+    # to a new pack. delete(H(1)) then sees a partial pack view (H(1) at a non-zero offset) and must not
+    # trip compact_pack's contiguity assert.
+    with get_repository_from_fixture(repo_fixtures, request) as repository:
+        repository._pack_writer.max_count = 2  # H(0) and H(1) share the first pack
+        repository.put(H(0), fchunk(b"aaa"))
+        repository.put(H(1), fchunk(b"bbb"))  # fills the pack, flushing both objects
+        repository.put(H(0), fchunk(b"ccc"))  # re-put: H(0)'s index entry moves to a new pack
+        repository.flush()
+        repository.delete(H(1))
+        with pytest.raises(Repository.ObjectNotFound):
+            repository.get(H(1))
+        assert pdchunk(repository.get(H(0))) == b"ccc"  # H(0) still served from its new pack
+
+
 def test_multi_object_pack_roundtrip(repo_fixtures, request):
     # Two objects fill one pack and must both read back: the second from a non-zero offset, and
     # read_data=False returning only its header+meta. The test pins max_count=2 so it does not depend