Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 8 additions & 14 deletions src/borg/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -1841,11 +1841,7 @@ def verify_data(self):
pi.finish()
if defect_chunks:
if self.repair:
# We would remove the defect chunks here, but single-object delete is not implemented
# yet (Repository.delete is a no-op: a pack holds multiple objects, so dropping the
# whole pack to drop one chunk would take the good ones with it). So we recheck each
# chunk and only report the ones that keep failing; real removal will come via compact.
logger.warning("Found defect chunks. They can not be removed yet and are only reported.")
logger.warning("Found defect chunks, removing them from the repository.")
for defect_chunk in defect_chunks:
# remote repo (ssh): retry might help for strange network / NIC / RAM errors
# as the chunk will be retransmitted from remote server.
Expand All @@ -1857,18 +1853,16 @@ def verify_data(self):
# we must decompress, so it'll call assert_id() in there:
self.repo_objs.parse(defect_chunk, encrypted_data, decompress=True, ro_type=ROBJ_DONTCARE)
except IntegrityErrorBase:
# failed twice -> we would like to get rid of this defect chunk. We must not
# drop its whole pack here: the pack holds other, good chunks too.
# Repository.delete is a no-op for now (it just logs); real single-object
# removal will happen via compact.
# TODO: actually remove the defect chunk once single-object delete exists.
self.repository.delete(defect_chunk)
# failed twice -> remove this defect chunk. delete rewrites its pack without it,
# keeping the other chunks. persist=False: finish() rebuilds the index from the
# rewritten packs anyway, so a per-chunk full index write would be wasted.
self.repository.delete(defect_chunk, persist=False)
# drop it from our own index too, so rebuild_archives reports the file it belongs to.
del self.chunks[defect_chunk]
else:
logger.warning("chunk %s not deleted, did not consistently fail.", bin_to_hex(defect_chunk))
else:
logger.warning(
"Found defect chunks. Run with --repair to recheck them (removal is not implemented yet)."
)
logger.warning("Found defect chunks. Run with --repair to remove them.")
for defect_chunk in defect_chunks:
logger.debug("chunk %s is defect.", bin_to_hex(defect_chunk))
log = logger.error if errors else logger.info
Expand Down
55 changes: 26 additions & 29 deletions src/borg/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -798,48 +798,46 @@ def put(self, id, data):
# PackWriter shares this repository's index, so add() triggers the lazy build itself.
return self._pack_writer.add(id, data)

def delete(self, id):
def delete(self, id, *, persist=True):

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

persist is a bad name, maybe update_index or so?

"""Delete a single repo object by rewriting its pack without it.

A pack holds several objects, so we can not just drop the whole pack: that would take the
target's innocent neighbours with it. Route through compact_pack, which copies the survivors
into a new pack and repoints them in the chunk index. Rewriting a whole pack to remove one
object is slow, but delete is only used by special-purpose callers (tests, the debug command,
check --repair), never on a hot path, so the cost does not matter.
A pack holds several objects, so we rewrite it via compact_pack(complete=False) rather than
dropping the whole pack. With persist=True the full chunk index is written back so the next
Comment on lines +804 to +805

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please read these AI-made comments and make sure they don't talk about the bad hacks of the past?

Dropping a whole pack instead of a defect chunk is nothing we need to discuss or even mention.

borg process sees the deletion; bulk callers that rebuild the index themselves (check --repair)
pass persist=False to avoid a full index rewrite per deleted object.
"""
self._lock_refresh()
from .cache import write_chunkindex_to_repo

entry = self.chunks.get(id)
if entry is None:
raise self.ObjectNotFound(id, str(self._location))
pack_id = entry.pack_id
# keep every other object in the same pack; compact_pack rewrites the pack without the target.
# work off the live index (each entry already carries its offset/size after flush): do not
# rebuild from the packs here, that could not tell the target apart from stale duplicate copies
# of the same id left in other packs by re-puts (build keeps an arbitrary one of them).
# build keep_ids from the live index (offsets are filled in after flush), not from the packs:
# a rebuild could not tell the target from stale duplicates that re-puts left in other packs.
keep_ids = {cid for cid, e in self.chunks.iteritems() if e.pack_id == pack_id}
keep_ids.discard(id)
self.compact_pack(pack_id, keep_ids=keep_ids, drop_ids={id})
# persist a full index so a following borg process sees the deletion; close() only writes new
# entries incrementally and would not record the removal.
write_chunkindex_to_repo(self, self.chunks, incremental=False, force_write=True, delete_other=True)
self.compact_pack(pack_id, keep_ids=keep_ids, drop_ids={id}, complete=False)
if persist:
# close() only persists new entries incrementally, so write the full index here to record
# the removal for the next borg process.
from .cache import write_chunkindex_to_repo

write_chunkindex_to_repo(self, self.chunks, incremental=False, force_write=True, delete_other=True)

def compact_pack(self, pack_id, *, keep_ids: set, drop_ids: set):
def compact_pack(self, pack_id, *, keep_ids: set, drop_ids: set, complete: bool = True):
"""Rewrite pack <pack_id>, keeping <keep_ids> and dropping <drop_ids>, then delete the old pack.

keep_ids and drop_ids are sets of chunk ids that must together be every object of the pack, as
recorded in the chunk index. The caller guarantees that completeness (both callers build the two
sets by iterating the index for this pack_id); here we only assert their ranges tile contiguously
from offset 0 with no gap or overlap, and that their intersection is empty. Kept objects are
copied into a new pack via store.defrag and repointed in the chunk index; dropped objects' index
entries are removed.
keep_ids and drop_ids come from the caller iterating the chunk index for this pack_id; they
must not overlap. With complete=True (compact) they are the pack's whole object set, so their
ranges must tile it from offset 0 with no gap or overlap. With complete=False (delete) the index
view may be partial (a re-put can repoint an earlier object to another pack while its bytes stay
here), so we copy only the known survivors at their real offsets and allow gaps. Kept objects are
copied into a new pack via store.defrag and repointed; dropped objects' index entries are removed.
Comment on lines +829 to +834

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rewrite this to be more clear.

also remove crap like "come from the caller iterating chunk index for this pack id". a description of parameters needs to tell what they are and what we do here with them, not where a call comes from or what the caller is doing.


Returns the new pack_id, None if nothing is kept (pack dropped), or <pack_id> unchanged if the
kept objects reproduce the old pack (same sha256 name, nothing to delete).

Updates the in-memory chunk index only. The caller holds the exclusive lock and owns index
durability: invalidate the cached index before calling, write it back after, as compact does.
Updates the in-memory chunk index only; the caller holds the exclusive lock and owns index
durability (invalidate before, write back after, as compact does).
"""
self._lock_refresh()
pack_key = "packs/" + bin_to_hex(pack_id)
Expand All @@ -855,13 +853,12 @@ def compact_pack(self, pack_id, *, keep_ids: set, drop_ids: set):
located.append((entry.obj_offset, obj_id, entry.obj_size, keep))
located.sort()

# keep + drop tile the pack contiguously from offset 0; collect the objects to keep in the same
# pass. we do not cross-check against the pack's on-disk size: the caller already guarantees the
# two sets are the pack's complete object set.
# walk objects in offset order, collecting the survivors.
kept = [] # (obj_offset, obj_id, obj_size), offset-ordered
covered = 0
for offset, obj_id, size, keep in located:
assert offset == covered, f"gap or overlap in pack {bin_to_hex(pack_id)} at offset {covered}"
if complete: # keep+drop are the whole pack: they must tile it with no gap or overlap
assert offset == covered, f"gap or overlap in pack {bin_to_hex(pack_id)} at offset {covered}"
covered += size
if keep:
kept.append((offset, obj_id, size))
Expand Down
9 changes: 6 additions & 3 deletions src/borg/testsuite/archiver/check_cmd_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,6 @@ def test_extra_chunks(archivers, request):
cmd(archiver, "check", "-v", exit_code=0) # check does not deal with orphans anymore


@pytest.mark.skip(reason="TODO: test broken due to packs refactoring")
@pytest.mark.parametrize("init_args", [["--encryption=aes256-ocb"], ["--encryption", "none"]])
def test_verify_data(archivers, request, init_args):
archiver = request.getfixturevalue(archivers)
Expand All @@ -390,10 +389,14 @@ def test_verify_data(archivers, request, init_args):
for item in archive.iter_items():
if item.path.endswith(src_file):
chunk = item.chunks[-1]
data = repository.get(chunk.id)
data = corrupt(data, 123)
data = corrupt(repository.get(chunk.id), 123)
# re-putting the same id would not repoint the index: close() persists new index
# entries incrementally, not updates to existing ones, so the next process would still
# read the old, good copy. delete first, then put the corrupted bytes as a fresh entry.
repository.delete(chunk.id)
repository.put(chunk.id, data)
Comment on lines +392 to 397

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, that should work better.

even better than that would be to simulate reality of bit corruption more closely: use some low-level code that directly corrupts the file on disk without going through delete/put and without modifying the index.

break
repository.flush() # make the corrupted put durable before close()

# the normal archives check does not read file content data.
cmd(archiver, "check", "--archives-only", exit_code=0)
Expand Down
16 changes: 16 additions & 0 deletions src/borg/testsuite/repository_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,22 @@ def test_consistency(repo_fixtures, request):
repository.get(H(0))


def test_delete_with_stale_earlier_object_in_pack(repo_fixtures, request):
# Re-putting H(0) leaves its old bytes ahead of H(1) in the first pack while its index entry moves
# to a new pack. delete(H(1)) then sees a partial pack view (H(1) at a non-zero offset) and must not
# trip compact_pack's contiguity assert.
with get_repository_from_fixture(repo_fixtures, request) as repository:
repository._pack_writer.max_count = 2 # H(0) and H(1) share the first pack
repository.put(H(0), fchunk(b"aaa"))
repository.put(H(1), fchunk(b"bbb")) # fills the pack, flushing both objects
repository.put(H(0), fchunk(b"ccc")) # re-put: H(0)'s index entry moves to a new pack
repository.flush()
repository.delete(H(1))
with pytest.raises(Repository.ObjectNotFound):
repository.get(H(1))
assert pdchunk(repository.get(H(0))) == b"ccc" # H(0) still served from its new pack


def test_multi_object_pack_roundtrip(repo_fixtures, request):
# Two objects fill one pack and must both read back: the second from a non-zero offset, and
# read_data=False returning only its header+meta. The test pins max_count=2 so it does not depend
Expand Down
Loading