From 5f07e76ef71113993b559a83d9d1a3cc32d32644 Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Sat, 19 Apr 2025 20:43:10 +0200 Subject: [PATCH 01/22] Adds int_or_interval format parser Accepts either int or interval, first tries parsing int then tries parsing as interval if that fails. Returns a timedelta for easy date math later. Now allows intervals of length 0 as a 0-length timedelta is perfectly fine to work with. --- src/borg/archiver/prune_cmd.py | 6 +- src/borg/helpers/__init__.py | 2 +- src/borg/helpers/parseformat.py | 44 +++++++++-- .../testsuite/helpers/parseformat_test.py | 77 +++++++++++++++---- 4 files changed, 105 insertions(+), 24 deletions(-) diff --git a/src/borg/archiver/prune_cmd.py b/src/borg/archiver/prune_cmd.py index dfa9793fc9..0d949d492a 100644 --- a/src/borg/archiver/prune_cmd.py +++ b/src/borg/archiver/prune_cmd.py @@ -1,5 +1,5 @@ from collections import OrderedDict -from datetime import UTC, datetime, timedelta +from datetime import UTC, datetime import logging from operator import attrgetter import os @@ -17,8 +17,8 @@ logger = create_logger() -def prune_within(archives, seconds, kept_because): - target = datetime.now(UTC) - timedelta(seconds=seconds) +def prune_within(archives, delta, kept_because): + target = datetime.now(UTC) - delta kept_counter = 0 result = [] for a in archives: diff --git a/src/borg/helpers/__init__.py b/src/borg/helpers/__init__.py index 530952d96d..5f27f40390 100644 --- a/src/borg/helpers/__init__.py +++ b/src/borg/helpers/__init__.py @@ -27,7 +27,7 @@ from .misc import ChunkIteratorFileWrapper, open_item, chunkit, iter_separated, ErrorIgnoringTextIOWrapper from .parseformat import octal_int, bin_to_hex, hex_to_bin, safe_encode, safe_decode from .parseformat import text_to_json, binary_to_json, remove_surrogates, join_cmd -from .parseformat import eval_escapes, decode_dict, interval +from .parseformat import eval_escapes, decode_dict, interval, int_or_interval from .parseformat import ( PathSpec, FilesystemPathSpec, diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py index 61252d802f..b8d9f89a9a 100644 --- a/src/borg/helpers/parseformat.py +++ b/src/borg/helpers/parseformat.py @@ -11,7 +11,7 @@ import uuid from pathlib import Path from typing import ClassVar, Any, TYPE_CHECKING, Literal -from datetime import UTC, datetime +from datetime import UTC, datetime, timedelta from functools import partial from hashlib import sha256 from string import Formatter @@ -128,9 +128,14 @@ def decode_dict(d, keys, encoding="utf-8", errors="surrogateescape"): def interval(s): - """Convert a string representing a valid interval to a number of seconds.""" - if isinstance(s, int): + """Parse an interval string (e.g. ``7d``, ``2w``, ``30M``) into a timedelta. + + Supported units: y (years, 365d), m (months, 31d), w (weeks), d (days), + H (hours), M (minutes), S (seconds). The value must be nonnegative. + """ + if isinstance(s, timedelta): return s + seconds_in_a_minute = 60 seconds_in_an_hour = 60 * seconds_in_a_minute seconds_in_a_day = 24 * seconds_in_an_hour @@ -158,10 +163,37 @@ def interval(s): except ValueError: seconds = -1 - if seconds <= 0: - raise ArgumentTypeError(f'Invalid number "{number}": expected positive integer') + if seconds < 0: + raise ArgumentTypeError(f'Invalid number "{number}": expected nonnegative integer') + + return timedelta(seconds=seconds) + + +def int_or_interval(s): + """Parse *s* as an :class:`int` or, failing that, as an interval string. + + Returns :class:`int` if *s* can be parsed as an integer (e.g. ``"7"``), + or :class:`datetime.timedelta` if *s* is a valid interval (e.g. ``"7d"``). - return seconds + Raises :class:`ArgumentTypeError` if *s* is neither an integer nor + a valid interval. + """ + if isinstance(s, (int, timedelta)): + return s + + # Explicitly check 'all' as a shortcut to 'infinite' sentinel value `-1`. + if s == "all": + return -1 + + try: + return int(s) + except ValueError: + pass + + try: + return interval(s) + except ArgumentTypeError as e: + raise ArgumentTypeError(f"Value is neither an integer nor an interval: {e}") class CompressionSpec: diff --git a/src/borg/testsuite/helpers/parseformat_test.py b/src/borg/testsuite/helpers/parseformat_test.py index 6be33bc77c..75c52c684e 100644 --- a/src/borg/testsuite/helpers/parseformat_test.py +++ b/src/borg/testsuite/helpers/parseformat_test.py @@ -1,7 +1,8 @@ import base64 import os -from datetime import datetime, timezone +import re +from datetime import datetime, timedelta, timezone import pytest @@ -17,6 +18,7 @@ format_file_size, parse_file_size, interval, + int_or_interval, partial_format, clean_lines, format_line, @@ -409,13 +411,14 @@ def test_format_timedelta(): @pytest.mark.parametrize( "timeframe, num_secs", [ - ("5S", 5), - ("2M", 2 * 60), - ("1H", 60 * 60), - ("1d", 24 * 60 * 60), - ("1w", 7 * 24 * 60 * 60), - ("1m", 31 * 24 * 60 * 60), - ("1y", 365 * 24 * 60 * 60), + ("0S", timedelta(seconds=0)), + ("5S", timedelta(seconds=5)), + ("2M", timedelta(minutes=2)), + ("1H", timedelta(hours=1)), + ("1d", timedelta(days=1)), + ("1w", timedelta(days=7)), + ("1m", timedelta(days=31)), + ("1y", timedelta(days=365)), ], ) def test_interval(timeframe, num_secs): @@ -425,9 +428,9 @@ def test_interval(timeframe, num_secs): @pytest.mark.parametrize( "invalid_interval, error_tuple", [ - ("H", ('Invalid number "": expected positive integer',)), - ("-1d", ('Invalid number "-1": expected positive integer',)), - ("food", ('Invalid number "foo": expected positive integer',)), + ("H", ('Invalid number "": expected nonnegative integer',)), + ("-1d", ('Invalid number "-1": expected nonnegative integer',)), + ("food", ('Invalid number "foo": expected nonnegative integer',)), ], ) def test_interval_time_unit(invalid_interval, error_tuple): @@ -436,10 +439,56 @@ def test_interval_time_unit(invalid_interval, error_tuple): assert exc.value.args == error_tuple -def test_interval_number(): +@pytest.mark.parametrize( + "invalid_input, error_regex", + [ + ("x", r'^Unexpected time unit "x": choose from'), + ("-1t", r'^Unexpected time unit "t": choose from'), + ("fool", r'^Unexpected time unit "l": choose from'), + ("abc", r'^Unexpected time unit "c": choose from'), + (" abc ", r'^Unexpected time unit " ": choose from'), + ], +) +def test_interval_invalid_time_format(invalid_input, error_regex): + with pytest.raises(ArgumentTypeError) as exc: + interval(invalid_input) + assert re.search(error_regex, exc.value.args[0]) + + +@pytest.mark.parametrize( + "input, result", + [ + ("0", 0), + ("5", 5), + (" 999 ", 999), + ("-1", -1), + ("all", -1), + ("0S", timedelta(seconds=0)), + ("5S", timedelta(seconds=5)), + ("1m", timedelta(days=31)), + # already-converted values (jsonargparse idempotency) + (0, 0), + (5, 5), + (timedelta(seconds=5), timedelta(seconds=5)), + (timedelta(days=31), timedelta(days=31)), + ], +) +def test_int_or_interval(input, result): + assert int_or_interval(input) == result + + +@pytest.mark.parametrize( + "invalid_input, error_regex", + [ + ("H", r"Value is neither an integer nor an interval:"), + ("-1d", r"Value is neither an integer nor an interval:"), + ("food", r"Value is neither an integer nor an interval:"), + ], +) +def test_int_or_interval_time_unit(invalid_input, error_regex): with pytest.raises(ArgumentTypeError) as exc: - interval("5") - assert exc.value.args == ('Unexpected time unit "5": choose from y, m, w, d, H, M, S',) + int_or_interval(invalid_input) + assert re.search(error_regex, exc.value.args[0]) def test_parse_timestamp(): From 4bf60336b226b51a93d6d875be8259845d74d273 Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Sat, 19 Apr 2025 21:11:05 +0200 Subject: [PATCH 02/22] Adds optional interval support for all prune retention flags Support is added for setting prune retention with either an int (keep n archives) or an interval (keep within). This works much like --keep-within currently does, but extends support to all retention filters. Additionally adds a generic --keep flag to take over (or live alongside) both --keep-last and --keep-within. --keep-last is no longer an alias of --keep-secondly, now keeps archives made on the same second. Comparisons against archive timestamp are made to use local timezone instead of UTC. Should be equal result in practice, but allows for easier testing with frozen local time. --- requirements.d/development.lock.txt | 1 + requirements.d/development.txt | 1 + src/borg/archiver/prune_cmd.py | 432 ++++++++------ src/borg/constants.py | 2 + src/borg/testsuite/archiver/prune_cmd_test.py | 557 ++++++++++++++---- 5 files changed, 716 insertions(+), 277 deletions(-) diff --git a/requirements.d/development.lock.txt b/requirements.d/development.lock.txt index f2cc2f4902..08dbde72a8 100644 --- a/requirements.d/development.lock.txt +++ b/requirements.d/development.lock.txt @@ -11,6 +11,7 @@ pytest-xdist==3.8.0 coverage[toml]==7.14.0 pytest-cov==7.1.0 pytest-benchmark==5.2.3 +freezegun==1.5.5 Cython==3.2.5 pre-commit==4.6.0 types-PyYAML==6.0.12.20260518 diff --git a/requirements.d/development.txt b/requirements.d/development.txt index d46a28e42d..522c3e637d 100644 --- a/requirements.d/development.txt +++ b/requirements.d/development.txt @@ -11,6 +11,7 @@ pytest-xdist coverage[toml] pytest-cov pytest-benchmark +freezegun Cython pre-commit bandit[toml] diff --git a/src/borg/archiver/prune_cmd.py b/src/borg/archiver/prune_cmd.py index 0d949d492a..134d43f3bd 100644 --- a/src/borg/archiver/prune_cmd.py +++ b/src/borg/archiver/prune_cmd.py @@ -1,117 +1,171 @@ -from collections import OrderedDict -from datetime import UTC, datetime +from typing import Callable, NamedTuple +from datetime import datetime, timedelta import logging -from operator import attrgetter +import math +from functools import wraps import os - +from itertools import count, combinations from ._common import with_repository, Highlander from ..constants import * # NOQA -from ..helpers import ArchiveFormatter, interval, sig_int, ProgressIndicatorPercent, CommandError, Error -from ..helpers import archivename_validator +from ..helpers import ArchiveFormatter, ProgressIndicatorPercent, CommandError, Error +from ..helpers import archivename_validator, interval, int_or_interval, sig_int from ..helpers import json_print, basic_json_data from ..helpers.argparsing import ArgumentParser -from ..manifest import Manifest +from ..manifest import ArchiveInfo, Manifest from ..logger import create_logger logger = create_logger() -def prune_within(archives, delta, kept_because): - target = datetime.now(UTC) - delta - kept_counter = 0 - result = [] - for a in archives: - if a.ts > target: - kept_counter += 1 - kept_because[a.id] = ("within", kept_counter) - result.append(a) - return result +class PruningRule(NamedTuple): + key: str + period_func: Callable[[ArchiveInfo | datetime], str] + + def __str__(self): + return self.key + + +class KeepResult(NamedTuple): + rule: PruningRule + idx: int + oldest: bool = False + + def __str__(self): + return f"Keep(rule={self.rule}, idx={self.idx}{', oldest=True' if self.oldest else ''})" + + +def archive_datetime_dispatch(func: Callable[[datetime], str]) -> Callable[[ArchiveInfo | datetime], str]: + """ + Wraps a datetime-taking function with a dispatcher that can call that + function by extracting the timestamp from an archive. + """ + + @wraps(func) + def wrapper(arg): + if isinstance(arg, datetime): + return func(arg) + if isinstance(arg, ArchiveInfo): + return func(arg.ts) + raise TypeError(f"{func.__name__}(): expected datetime or Archive, " f"got {type(arg).__name__}") + + return wrapper + + +# The *_period_func group of functions create period grouping keys to group +# together archives falling within a certain period. Among archives in each of +# these groups, only the latest (by creation timestamp) is kept. + + +def unique_period_func(): + counter = count() + max_digits = math.ceil(math.log10(MAX_ARCHIVES)) + @archive_datetime_dispatch + def unique_values(_dt): + """Group archives by an incrementing counter, practically making each archive a group of 1""" + return str(next(counter)).zfill(max_digits) -def default_period_func(pattern): - def inner(a): + return unique_values + + +def pattern_period_func(pattern): + @archive_datetime_dispatch + def inner(dt): + """Group archives by extracting given strftime-pattern from their creation timestamp""" # compute in local timezone - return a.ts.astimezone().strftime(pattern) + return dt.astimezone().strftime(pattern) return inner -def quarterly_13weekly_period_func(a): - (year, week, _) = a.ts.astimezone().isocalendar() # local time - if week <= 13: - # Weeks containing Jan 4th to Mar 28th (leap year) or 29th- 91 (13*7) - # days later. - return (year, 1) - elif 14 <= week <= 26: - # Weeks containing Apr 4th (leap year) or 5th to Jun 27th or 28th- 91 - # days later. - return (year, 2) - elif 27 <= week <= 39: - # Weeks containing Jul 4th (leap year) or 5th to Sep 26th or 27th- - # at least 91 days later. - return (year, 3) - else: - # Everything else, Oct 3rd (leap year) or 4th onward, will always - # include week of Dec 26th (leap year) or Dec 27th, may also include - # up to possibly Jan 3rd of next year. - return (year, 4) - - -def quarterly_3monthly_period_func(a): - lt = a.ts.astimezone() # local time - if lt.month <= 3: - # 1-1 to 3-31 - return (lt.year, 1) - elif 4 <= lt.month <= 6: - # 4-1 to 6-30 - return (lt.year, 2) - elif 7 <= lt.month <= 9: - # 7-1 to 9-30 - return (lt.year, 3) +@archive_datetime_dispatch +def quarterly_13weekly_period_func(dt): + """Group archives by extracting the ISO-8601 13-week quarter from their creation timestamp""" + (year, week) = dt.astimezone().isocalendar()[:2] # local time + return f"{year}-{min(max((week - 1) // 13, 0), 3):02}" + + +@archive_datetime_dispatch +def quarterly_3monthly_period_func(dt): + """Group archives by extracting the 3-month quarter from their creation timestamp""" + (year, month) = dt.astimezone().timetuple()[:2] # local time + return f"{year}-{(month - 1) // 3:02}" + + +# Each archive is considered for keeping +PRUNE_WITHIN = PruningRule("within", unique_period_func()) +PRUNE_LAST = PruningRule("last", unique_period_func()) +PRUNE_KEEP = PruningRule("keep", unique_period_func()) +# Last archive (by creation timestamp) within period group is considered for keeping +PRUNE_SECONDLY = PruningRule("secondly", pattern_period_func("%Y-%m-%d %H:%M:%S")) +PRUNE_MINUTELY = PruningRule("minutely", pattern_period_func("%Y-%m-%d %H:%M")) +PRUNE_HOURLY = PruningRule("hourly", pattern_period_func("%Y-%m-%d %H")) +PRUNE_DAILY = PruningRule("daily", pattern_period_func("%Y-%m-%d")) +PRUNE_WEEKLY = PruningRule("weekly", pattern_period_func("%G-%V")) +PRUNE_MONTHLY = PruningRule("monthly", pattern_period_func("%Y-%m")) +PRUNE_QUARTERLY_13WEEKLY = PruningRule("quarterly_13weekly", quarterly_13weekly_period_func) +PRUNE_QUARTERLY_3MONTHLY = PruningRule("quarterly_3monthly", quarterly_3monthly_period_func) +PRUNE_YEARLY = PruningRule("yearly", pattern_period_func("%Y")) + +PRUNING_RULES = [ + PRUNE_WITHIN, + PRUNE_LAST, + PRUNE_KEEP, + PRUNE_SECONDLY, + PRUNE_MINUTELY, + PRUNE_HOURLY, + PRUNE_DAILY, + PRUNE_WEEKLY, + PRUNE_MONTHLY, + PRUNE_QUARTERLY_13WEEKLY, + PRUNE_QUARTERLY_3MONTHLY, + PRUNE_YEARLY, +] + + +def prune( + archives: list[ArchiveInfo], + rule: PruningRule, + n_or_interval: int | timedelta, + base_timestamp: datetime | None, + keep_oldest: bool, + previously_kept: frozenset[ArchiveInfo] = frozenset(), +) -> dict[ArchiveInfo, KeepResult]: + if len(archives) == 0 or n_or_interval in (0, timedelta(0)): + return {} + + if isinstance(n_or_interval, int): + n, earliest_timestamp = n_or_interval, None else: - # 10-1 to 12-31 - return (lt.year, 4) - - -PRUNING_PATTERNS = OrderedDict( - [ - ("secondly", default_period_func("%Y-%m-%d %H:%M:%S")), - ("minutely", default_period_func("%Y-%m-%d %H:%M")), - ("hourly", default_period_func("%Y-%m-%d %H")), - ("daily", default_period_func("%Y-%m-%d")), - ("weekly", default_period_func("%G-%V")), - ("monthly", default_period_func("%Y-%m")), - ("quarterly_13weekly", quarterly_13weekly_period_func), - ("quarterly_3monthly", quarterly_3monthly_period_func), - ("yearly", default_period_func("%Y")), - ] -) - - -def prune_split(archives, rule, n, kept_because=None): - last = None - keep = [] - period_func = PRUNING_PATTERNS[rule] - if kept_because is None: - kept_because = {} - if n == 0: - return keep - - a = None - for a in sorted(archives, key=attrgetter("ts"), reverse=True): - period = period_func(a) - if period != last: - last = period - if a.id not in kept_because: - keep.append(a) - kept_because[a.id] = (rule, len(keep)) - if len(keep) == n: - break - # Keep oldest archive if we didn't reach the target retention count - if a is not None and len(keep) < n and a.id not in kept_because: - keep.append(a) - kept_because[a.id] = (rule + "[oldest]", len(keep)) + if base_timestamp is None: + raise ValueError("base_timestamp is required when using interval-based pruning") + n, earliest_timestamp = None, base_timestamp - n_or_interval + + keep: dict[ArchiveInfo, KeepResult] = {} + + def can_retain(a): + if n is not None: + return n == -1 or len(keep) < n + else: + return a.ts > earliest_timestamp + + prev_period = None + for archive in archives: + if not can_retain(archive): + break + period = rule.period_func(archive) + if period != prev_period: + prev_period = period + if archive not in keep and archive not in previously_kept: + keep[archive] = KeepResult(rule=rule, idx=len(keep)) + + if keep_oldest: + # Keep oldest archive if we didn't reach the target retention. + oldest_archive = archives[-1] + if oldest_archive not in keep and oldest_archive not in previously_kept and can_retain(oldest_archive): + keep[oldest_archive] = KeepResult(rule=rule, idx=len(keep), oldest=True) + return keep @@ -119,26 +173,8 @@ class PruneMixIn: @with_repository(compatibility=(Manifest.Operation.DELETE,)) def do_prune(self, args, repository, manifest): """Prune archives according to specified rules.""" - if not any( - ( - args.secondly, - args.minutely, - args.hourly, - args.daily, - args.weekly, - args.monthly, - args.quarterly_13weekly, - args.quarterly_3monthly, - args.yearly, - args.within, - ) - ): - raise CommandError( - 'At least one of the "keep-within", "keep-last", ' - '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", ' - '"keep-weekly", "keep-monthly", "keep-13weekly", "keep-3monthly", ' - 'or "keep-yearly" settings must be specified.' - ) + self._validate_prune_args(args) + if args.format is not None: format = args.format @@ -152,21 +188,20 @@ def do_prune(self, args, repository, manifest): archives = manifest.archives.list(match=match, sort_by=["ts"], reverse=True) archives = [ai for ai in archives if "@PROT" not in ai.tags] - keep = [] - # collect the rule responsible for the keeping of each archive in this dict - # keys are archive ids, values are a tuple - # (, ) - kept_because = {} - - # find archives which need to be kept because of the keep-within rule - if args.within: - keep += prune_within(archives, args.within, kept_because) - - # find archives which need to be kept because of the various time period rules - for rule in PRUNING_PATTERNS.keys(): - num = getattr(args, rule, None) - if num is not None: - keep += prune_split(archives, rule, num, kept_because) + # Archives to keep along with the rule that ensured them being kept + keep = {} + + base_timestamp = datetime.now().astimezone() + active_rules = {rule: getattr(args, rule.key) for rule in PRUNING_RULES if getattr(args, rule.key) is not None} + for i, (rule, n_or_interval) in enumerate(active_rules.items(), 1): + keep |= prune( + archives=archives, + rule=rule, + n_or_interval=n_or_interval, + base_timestamp=base_timestamp, + keep_oldest=i == len(active_rules), # Activate keep_oldest rule only for the largest active interval + previously_kept=frozenset(keep), + ) to_delete = set(archives) - set(keep) if not args.json: @@ -202,12 +237,14 @@ def do_prune(self, args, repository, manifest): archive_data["kept"] = False archive_data["deleted_archive_number"] = archives_deleted else: - rule, num = kept_because[archive_info.id] - log_message = "Keeping archive (rule: {rule} #{num}):".format(rule=rule, num=num) + result = keep[archive_info] + result_message = f"{result.rule.key}{'[oldest]' if result.oldest else ''} #{result.idx + 1}" + log_message = f"Keeping archive (rule: {result_message}):" if args.json: archive_data["kept"] = True - archive_data["keep_rule"] = rule - archive_data["kept_archive_number"] = num + archive_data["keep_rule"] = result.rule.key + archive_data["kept_oldest"] = result.oldest + archive_data["kept_archive_number"] = result.idx + 1 if args.json: if ( args.output_list @@ -232,6 +269,59 @@ def do_prune(self, args, repository, manifest): if sig_int: raise Error("Got Ctrl-C / SIGINT.") + def _validate_prune_args(self, args): + keep_args = {rule.key: getattr(args, rule.key) for rule in PRUNING_RULES if getattr(args, rule.key) is not None} + + if len(keep_args) == 0: + raise CommandError( + 'At least one of the "keep", "keep-within", "keep-last", ' + '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", ' + '"keep-weekly", "keep-monthly", "keep-13weekly", "keep-3monthly", ' + 'or "keep-yearly" settings must be specified.' + ) + + if PRUNE_KEEP.key in keep_args and PRUNE_LAST.key in keep_args: + raise CommandError('Only one of the "keep" and "last" settings may be specified.') + + if PRUNE_KEEP.key in keep_args and PRUNE_WITHIN.key in keep_args: + raise CommandError('Only one of the "keep" and "within" settings may be specified.') + + if all(not bool(val) for val in keep_args.values()): + raise CommandError( + 'None of the "keep", "keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", "keep-weekly", ' + '"keep-monthly", "keep-13weekly", "keep-3monthly", or "keep-yearly" settings have a positive value. ' + "At least one must be non-zero." + ) + + def lo_hi_mismatch_errmsg(lo_arg, lo_val, hi_arg, hi_val): + return ( + f"The combination of \"{lo_arg}='{lo_val}'\" and \"{hi_arg}='{hi_val}'\" is invalid. It is effectively " + f"useless since every archive matched by {hi_arg} would have already been matched by {lo_arg}." + ) + + prune_keys = {rule.key for rule in PRUNING_RULES if rule != PRUNE_LAST} + interval_args = [ + (arg, val) + for arg, val in keep_args.items() + if arg in prune_keys and (isinstance(val, timedelta) or val == -1) + ] + for (lo_arg, lo_val), (hi_arg, hi_val) in combinations(interval_args, 2): + if hi_val == -1: + # 'Infinity' is always bigger + continue + + if lo_val == -1 or lo_val >= hi_val: + raise CommandError(lo_hi_mismatch_errmsg(lo_arg, lo_val, hi_arg, hi_val)) + + int_args = [ + (arg, val) + for arg, val in keep_args.items() + if any((arg == r.key for r in PRUNING_RULES)) and isinstance(val, int) + ] + for (lo_arg, lo_val), (hi_arg, hi_val) in combinations(int_args, 2): + if lo_val == -1: + raise CommandError(lo_hi_mismatch_errmsg(lo_arg, lo_val, hi_arg, hi_val)) + def build_parser_prune(self, subparsers, common_parser, mid_common_parser): from ._common import process_epilog from ._common import define_archive_filters_group @@ -332,87 +422,87 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser): subparser.add_argument( "--keep-within", metavar="INTERVAL", - dest="within", + dest=PRUNE_WITHIN.key, type=interval, action=Highlander, help="keep all archives within this time interval", ) subparser.add_argument( - "--keep-last", + "--keep-last", dest=PRUNE_LAST.key, type=int, action=Highlander, help="number of archives to keep" + ) + subparser.add_argument( + "--keep", + dest=PRUNE_KEEP.key, + type=int_or_interval, + action=Highlander, + help="number or time interval of archives to keep", + ) + subparser.add_argument( "--keep-secondly", - dest="secondly", - type=int, - default=0, + dest=PRUNE_SECONDLY.key, + type=int_or_interval, action=Highlander, - help="number of secondly archives to keep", + help="number or time interval of secondly archives to keep", ) subparser.add_argument( "--keep-minutely", - dest="minutely", - type=int, - default=0, + dest=PRUNE_MINUTELY.key, + type=int_or_interval, action=Highlander, - help="number of minutely archives to keep", + help="number or time interval of minutely archives to keep", ) subparser.add_argument( "-H", "--keep-hourly", - dest="hourly", - type=int, - default=0, + dest=PRUNE_HOURLY.key, + type=int_or_interval, action=Highlander, - help="number of hourly archives to keep", + help="number or time interval of hourly archives to keep", ) subparser.add_argument( "-d", "--keep-daily", - dest="daily", - type=int, - default=0, + dest=PRUNE_DAILY.key, + type=int_or_interval, action=Highlander, - help="number of daily archives to keep", + help="number or time interval of daily archives to keep", ) subparser.add_argument( "-w", "--keep-weekly", - dest="weekly", - type=int, - default=0, + dest=PRUNE_WEEKLY.key, + type=int_or_interval, action=Highlander, - help="number of weekly archives to keep", + help="number or time interval of weekly archives to keep", ) subparser.add_argument( "-m", "--keep-monthly", - dest="monthly", - type=int, - default=0, + dest=PRUNE_MONTHLY.key, + type=int_or_interval, action=Highlander, - help="number of monthly archives to keep", + help="number or time interval of monthly archives to keep", ) quarterly_group = subparser.add_mutually_exclusive_group() quarterly_group.add_argument( "--keep-13weekly", - dest="quarterly_13weekly", - type=int, - default=0, - help="number of quarterly archives to keep (13 week strategy)", + dest=PRUNE_QUARTERLY_13WEEKLY.key, + type=int_or_interval, + help="number or time interval of quarterly archives to keep (13 week strategy)", ) quarterly_group.add_argument( "--keep-3monthly", - dest="quarterly_3monthly", - type=int, - default=0, - help="number of quarterly archives to keep (3 month strategy)", + dest=PRUNE_QUARTERLY_3MONTHLY.key, + type=int_or_interval, + help="number or time interval of quarterly archives to keep (3 month strategy)", ) subparser.add_argument( "-y", "--keep-yearly", - dest="yearly", - type=int, - default=0, + dest=PRUNE_YEARLY.key, + type=int_or_interval, action=Highlander, - help="number of yearly archives to keep", + help="number or time interval of yearly archives to keep", ) define_archive_filters_group(subparser, sort_by=False, first_last=False) subparser.add_argument( diff --git a/src/borg/constants.py b/src/borg/constants.py index 5c88b6b89e..34c791622c 100644 --- a/src/borg/constants.py +++ b/src/borg/constants.py @@ -150,7 +150,9 @@ EXIT_SIGNAL_BASE = 128 # terminated due to signal, rc = 128 + sig_no ISO_FORMAT_NO_USECS = "%Y-%m-%dT%H:%M:%S" +ISO_FORMAT_NO_USECS_ZONE = ISO_FORMAT_NO_USECS + "%z" ISO_FORMAT = ISO_FORMAT_NO_USECS + ".%f" +ISO_FORMAT_ZONE = ISO_FORMAT + "%z" DASHES = "-" * 78 diff --git a/src/borg/testsuite/archiver/prune_cmd_test.py b/src/borg/testsuite/archiver/prune_cmd_test.py index 14420a7a5d..c1de83773a 100644 --- a/src/borg/testsuite/archiver/prune_cmd_test.py +++ b/src/borg/testsuite/archiver/prune_cmd_test.py @@ -1,23 +1,37 @@ +from itertools import product import json +import pytest import re +from operator import attrgetter from datetime import datetime, timezone, timedelta - -import pytest +from freezegun import freeze_time from ...constants import * # NOQA -from ...archiver.prune_cmd import prune_split, prune_within +from ...archiver.prune_cmd import ( + PRUNING_RULES, + prune, + PRUNE_DAILY, + PRUNE_HOURLY, + PRUNE_MINUTELY, + PRUNE_MONTHLY, + PRUNE_SECONDLY, + PRUNE_WEEKLY, + PRUNE_WITHIN, + PRUNE_YEARLY, +) +from ...helpers import CommandError, interval +from ...manifest import ArchiveInfo from . import cmd, RK_ENCRYPTION, generate_archiver_tests -from ...helpers import interval pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary") # NOQA -def _create_archive_ts(archiver, backup_files, name, y, m, d, H=0, M=0, S=0): +def _create_archive_ts(archiver, backup_files, name, y, m, d, H=0, M=0, S=0, us=0, tzinfo=None): cmd( archiver, "create", "--timestamp", - datetime(y, m, d, H, M, S, 0).strftime(ISO_FORMAT_NO_USECS), # naive == local time / local tz + datetime(y, m, d, H, M, S, us, tzinfo=tzinfo).strftime(ISO_FORMAT_ZONE), name, backup_files, ) @@ -150,6 +164,7 @@ def mk_name(tup): # Use 99 instead of -1 to test that oldest backup is kept. output = cmd(archiver, "prune", "--list", "--dry-run", f"--keep-{strat}=99") + print(output) for a in map(mk_name, to_prune): assert re.search(rf"Would prune:\s+{a}", output) @@ -257,150 +272,133 @@ def test_prune_ignore_protected(archivers, request): cmd(archiver, "create", "archive3", archiver.input_path) output = cmd(archiver, "prune", "--list", "--keep-last=1", "--match-archives=sh:archive*") assert "archive1" not in output # @PROT archives are completely ignored. - assert re.search(r"Keeping archive \(rule: secondly #1\):\s+archive3", output) + assert re.search(r"Keeping archive \(rule: last #1\):\s+archive3", output) assert re.search(r"Pruning archive \(.*?\):\s+archive2", output) output = cmd(archiver, "repo-list") assert "archive1" in output # @PROT protected archive1 from deletion assert "archive3" in output # last one -class MockArchive: - def __init__(self, ts, id): - self.ts = ts - self.id = id - - def __repr__(self): - return f"{self.id}: {self.ts.isoformat()}" +mock_id = 0 -# This is the local timezone of the system running the tests. -# We need this e.g. to construct archive timestamps for the prune tests, -# because borg prune operates in the local timezone (it first converts the -# archive timestamp to the local timezone). So, if we want the y/m/d/h/m/s -# values which prune uses to be exactly the ones we give [and NOT shift them -# by tzoffset], we need to give the timestamps in the same local timezone. -# Please note that the timestamps in a real borg archive or manifest are -# stored in UTC timezone. -local_tz = datetime.now(tz=timezone.utc).astimezone(tz=None).tzinfo +def mock_archive(ts, id=None): + """Create an ArchiveInfo with mocked/default values.""" + global mock_id + if id is None: + id = mock_id + mock_id += 1 + return ArchiveInfo(name="", id=id, ts=ts.replace(tzinfo=timezone.utc), tags=(), host="", user="") def test_prune_within(): - def subset(lst, indices): - return {lst[i] for i in indices} - - def dotest(test_archives, within, indices): - for ta in test_archives, reversed(test_archives): - kept_because = {} - keep = prune_within(ta, interval(within), kept_because) - assert set(keep) == subset(test_archives, indices) - assert all("within" == kept_because[a.id][0] for a in keep) - - # 1 minute, 1.5 hours, 2.5 hours, 3.5 hours, 25 hours, 49 hours - test_offsets = [60, 90 * 60, 150 * 60, 210 * 60, 25 * 60 * 60, 49 * 60 * 60] + test_deltas = [ + timedelta(minutes=1), + timedelta(hours=1.5), + timedelta(hours=2.5), + timedelta(hours=3.5), + timedelta(hours=25), + timedelta(hours=49), + ] now = datetime.now(timezone.utc) - test_dates = [now - timedelta(seconds=s) for s in test_offsets] - test_archives = [MockArchive(date, i) for i, date in enumerate(test_dates)] - - dotest(test_archives, "15S", []) - dotest(test_archives, "2M", [0]) - dotest(test_archives, "1H", [0]) - dotest(test_archives, "2H", [0, 1]) - dotest(test_archives, "3H", [0, 1, 2]) - dotest(test_archives, "24H", [0, 1, 2, 3]) - dotest(test_archives, "26H", [0, 1, 2, 3, 4]) - dotest(test_archives, "2d", [0, 1, 2, 3, 4]) - dotest(test_archives, "50H", [0, 1, 2, 3, 4, 5]) - dotest(test_archives, "3d", [0, 1, 2, 3, 4, 5]) - dotest(test_archives, "1w", [0, 1, 2, 3, 4, 5]) - dotest(test_archives, "1m", [0, 1, 2, 3, 4, 5]) - dotest(test_archives, "1y", [0, 1, 2, 3, 4, 5]) + test_dates = [now - d for d in test_deltas] + test_archives = [mock_archive(date) for date in test_dates] + + def dotest(within, indices): + keep = prune(test_archives, PRUNE_WITHIN, interval(within), now, False) + assert set(keep) == {test_archives[i] for i in indices} + assert all(keep[a].rule.key == "within" for a in keep) + + dotest("15S", []) + dotest("2M", [0]) + dotest("1H", [0]) + dotest("2H", [0, 1]) + dotest("3H", [0, 1, 2]) + dotest("24H", [0, 1, 2, 3]) + dotest("26H", [0, 1, 2, 3, 4]) + dotest("2d", [0, 1, 2, 3, 4]) + dotest("50H", [0, 1, 2, 3, 4, 5]) + dotest("3d", [0, 1, 2, 3, 4, 5]) + dotest("1w", [0, 1, 2, 3, 4, 5]) + dotest("1m", [0, 1, 2, 3, 4, 5]) + dotest("1y", [0, 1, 2, 3, 4, 5]) @pytest.mark.parametrize( - "rule,num_to_keep,expected_ids", + "rule,num_to_keep,expected_indices", [ - ("yearly", 3, (13, 2, 1)), - ("monthly", 3, (13, 8, 4)), - ("weekly", 2, (13, 8)), - ("daily", 3, (13, 8, 7)), - ("hourly", 3, (13, 10, 8)), - ("minutely", 3, (13, 10, 9)), - ("secondly", 4, (13, 12, 11, 10)), - ("daily", 0, []), + (PRUNE_YEARLY, 3, (12, 1, 0)), + (PRUNE_MONTHLY, 3, (12, 7, 3)), + (PRUNE_WEEKLY, 2, (12, 7)), + (PRUNE_DAILY, 3, (12, 7, 6)), + (PRUNE_HOURLY, 3, (12, 9, 7)), + (PRUNE_MINUTELY, 3, (12, 9, 8)), + (PRUNE_SECONDLY, 4, (12, 11, 10, 9)), + (PRUNE_DAILY, 0, []), + (PRUNE_DAILY, -1, (12, 7, 6, 5, 4, 3, 2, 1, 0)), ], ) -def test_prune_split(rule, num_to_keep, expected_ids): - def subset(lst, ids): - return {i for i in lst if i.id in ids} - +def test_prune(rule, num_to_keep, expected_indices): archives = [ # years apart - MockArchive(datetime(2015, 1, 1, 10, 0, 0, tzinfo=local_tz), 1), - MockArchive(datetime(2016, 1, 1, 10, 0, 0, tzinfo=local_tz), 2), - MockArchive(datetime(2017, 1, 1, 10, 0, 0, tzinfo=local_tz), 3), + mock_archive(datetime(2015, 1, 1, 10, 0, 0)), + mock_archive(datetime(2016, 1, 1, 10, 0, 0)), + mock_archive(datetime(2017, 1, 1, 10, 0, 0)), # months apart - MockArchive(datetime(2017, 2, 1, 10, 0, 0, tzinfo=local_tz), 4), - MockArchive(datetime(2017, 3, 1, 10, 0, 0, tzinfo=local_tz), 5), + mock_archive(datetime(2017, 2, 1, 10, 0, 0)), + mock_archive(datetime(2017, 3, 1, 10, 0, 0)), # days apart - MockArchive(datetime(2017, 3, 2, 10, 0, 0, tzinfo=local_tz), 6), - MockArchive(datetime(2017, 3, 3, 10, 0, 0, tzinfo=local_tz), 7), - MockArchive(datetime(2017, 3, 4, 10, 0, 0, tzinfo=local_tz), 8), + mock_archive(datetime(2017, 3, 2, 10, 0, 0)), + mock_archive(datetime(2017, 3, 3, 10, 0, 0)), + mock_archive(datetime(2017, 3, 4, 10, 0, 0)), # minutes apart - MockArchive(datetime(2017, 10, 1, 9, 45, 0, tzinfo=local_tz), 9), - MockArchive(datetime(2017, 10, 1, 9, 55, 0, tzinfo=local_tz), 10), + mock_archive(datetime(2017, 10, 1, 9, 45, 0)), + mock_archive(datetime(2017, 10, 1, 9, 55, 0)), # seconds apart - MockArchive(datetime(2017, 10, 1, 10, 0, 1, tzinfo=local_tz), 11), - MockArchive(datetime(2017, 10, 1, 10, 0, 3, tzinfo=local_tz), 12), - MockArchive(datetime(2017, 10, 1, 10, 0, 5, tzinfo=local_tz), 13), + mock_archive(datetime(2017, 10, 1, 10, 0, 1)), + mock_archive(datetime(2017, 10, 1, 10, 0, 3)), + mock_archive(datetime(2017, 10, 1, 10, 0, 5)), ] - kept_because = {} - keep = prune_split(archives, rule, num_to_keep, kept_because) - - assert set(keep) == subset(archives, expected_ids) - for item in keep: - assert kept_because[item.id][0] == rule + keep = prune(sorted(archives, key=attrgetter("ts"), reverse=True), rule, num_to_keep, None, False) + assert set(keep) == {archives[i] for i in expected_indices} + assert all(result.rule == rule for _, result in keep.items()) -def test_prune_split_keep_oldest(): - def subset(lst, ids): - return {i for i in lst if i.id in ids} +def test_prune_keep_oldest(): archives = [ # oldest backup, but not last in its year - MockArchive(datetime(2018, 1, 1, 10, 0, 0, tzinfo=local_tz), 1), + mock_archive(datetime(2018, 1, 1, 10, 0, 0)), # an interim backup - MockArchive(datetime(2018, 12, 30, 10, 0, 0, tzinfo=local_tz), 2), + mock_archive(datetime(2018, 12, 30, 10, 0, 0)), # year-end backups - MockArchive(datetime(2018, 12, 31, 10, 0, 0, tzinfo=local_tz), 3), - MockArchive(datetime(2019, 12, 31, 10, 0, 0, tzinfo=local_tz), 4), + mock_archive(datetime(2018, 12, 31, 10, 0, 0)), + mock_archive(datetime(2019, 12, 31, 10, 0, 0)), ] + sorted_archives = sorted(archives, key=attrgetter("ts"), reverse=True) # Keep oldest when retention target can't otherwise be met - kept_because = {} - keep = prune_split(archives, "yearly", 3, kept_because) + keep = prune(sorted_archives, PRUNE_YEARLY, 3, None, True) - assert set(keep) == subset(archives, [1, 3, 4]) - assert kept_because[1][0] == "yearly[oldest]" - assert kept_because[3][0] == "yearly" - assert kept_because[4][0] == "yearly" + assert keep[archives[0]].rule.key == "yearly" and keep[archives[0]].oldest is True + assert keep[archives[2]].rule.key == "yearly" and keep[archives[2]].oldest is False + assert keep[archives[3]].rule.key == "yearly" and keep[archives[3]].oldest is False + assert len(keep) == 3 # Otherwise, prune it - kept_because = {} - keep = prune_split(archives, "yearly", 2, kept_because) + keep = prune(sorted_archives, PRUNE_YEARLY, 2, None, True) - assert set(keep) == subset(archives, [3, 4]) - assert kept_because[3][0] == "yearly" - assert kept_because[4][0] == "yearly" + assert keep[archives[2]].rule.key == "yearly" and keep[archives[2]].oldest is False + assert keep[archives[3]].rule.key == "yearly" and keep[archives[3]].oldest is False + assert len(keep) == 2 -def test_prune_split_no_archives(): +def test_prune_no_archives(): archives = [] - kept_because = {} - keep = prune_split(archives, "yearly", 3, kept_because) + keep = prune(archives, PRUNE_YEARLY, 3, None, False) - assert keep == [] - assert kept_because == {} + assert keep == {} def test_prune_list_with_metadata_format(archivers, request, backup_files): @@ -437,6 +435,7 @@ def test_prune_json(archivers, request, backup_files): assert kept[0]["name"] == "test2" assert kept[0]["keep_rule"] == "daily" assert kept[0]["kept_archive_number"] == 1 + assert not kept[0]["kept_oldest"] assert "deleted_archive_number" not in kept[0] assert pruned[0]["name"] == "test1" assert pruned[0]["deleted_archive_number"] == 1 @@ -460,3 +459,349 @@ def test_prune_json_list_pruned(archivers, request, backup_files): assert archives[0]["name"] == "test1" assert archives[0]["kept"] is False assert archives[0]["deleted_archive_number"] == 1 + + +def test_prune_keep_last_same_second(archivers, request, backup_files): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + cmd(archiver, "create", "test1", backup_files) + cmd(archiver, "create", "test2", backup_files) + output = cmd(archiver, "prune", "--list", "--dry-run", "--keep-last=2") + # Both archives are kept even though they have the same timestamp to the second. Would previously have failed with + # old behavior of --keep-last. Archives sorted on seconds, order is undefined. + assert re.search(r"Keeping archive \(rule: last #\d\):\s+test1", output) + assert re.search(r"Keeping archive \(rule: last #\d\):\s+test2", output) + + +@freeze_time(datetime(2023, 12, 31, 23, 59, 59, tzinfo=None)) # Non-leap year ending on a Sunday +def test_prune_keep_int_or_interval(archivers, request, backup_files): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, backup_files, "test-1", 2023, 12, 31, 23, 59, 59) + _create_archive_ts(archiver, backup_files, "test-2", 2023, 12, 31, 23, 59, 59) + _create_archive_ts(archiver, backup_files, "test-3", 2023, 12, 31, 23, 59, 58) + for keep_arg in ["--keep=2", "--keep=1S"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Keeping archive \(rule: keep #\d\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: keep #\d\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + + +@pytest.mark.parametrize("keep_arg", ["--keep-daily=-1", "--keep-daily=all"]) +def test_prune_keep_all(archivers, request, backup_files, keep_arg): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, backup_files, "test-1", 2023, 12, 30, 23, 59, 59, tzinfo=timezone.utc) + _create_archive_ts(archiver, backup_files, "test-2", 2023, 12, 29, 23, 59, 59, tzinfo=timezone.utc) + _create_archive_ts(archiver, backup_files, "test-3", 2023, 12, 28, 23, 59, 59, tzinfo=timezone.utc) + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Keeping archive \(rule: daily #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: daily #2\):\s+test-2", output) + assert re.search(r"Keeping archive \(rule: daily #3\):\s+test-3", output) + + +@freeze_time(datetime(2023, 12, 31, 23, 59, 59, tzinfo=None)) +def test_prune_keep_secondly_int_or_interval(archivers, request, backup_files, keep_arg): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, backup_files, "test-1", 2023, 12, 31, 23, 59, 58) + _create_archive_ts(archiver, backup_files, "test-2", 2023, 12, 31, 23, 59, 57, 1) + _create_archive_ts(archiver, backup_files, "test-3", 2023, 12, 31, 23, 59, 57) + _create_archive_ts(archiver, backup_files, "test-4", 2023, 12, 31, 23, 59, 56, 999999) + for keep_arg in ["--keep-secondly=2", "--keep-secondly=2S"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg).splitlines() + assert re.search(r"Keeping archive \(rule: secondly #1\):\s+test-1", output.pop(0)) + assert re.search(r"Keeping archive \(rule: secondly #2\):\s+test-2", output.pop(0)) + assert re.search(r"Would prune:\s+test-3", output.pop(0)) + assert re.search(r"Would prune:\s+test-4", output.pop(0)) + + +@freeze_time(datetime(2023, 12, 31, 23, 59, 0, tzinfo=None)) +def test_prune_keep_minutely_int_or_interval(archivers, request, backup_files): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, backup_files, "test-1", 2023, 12, 31, 23, 58) + _create_archive_ts(archiver, backup_files, "test-2", 2023, 12, 31, 23, 57, 1) + _create_archive_ts(archiver, backup_files, "test-3", 2023, 12, 31, 23, 57) + _create_archive_ts(archiver, backup_files, "test-4", 2023, 12, 31, 23, 56, 0, 1) # Last possible microsecond + _create_archive_ts(archiver, backup_files, "test-5", 2023, 12, 31, 23, 56) + for keep_arg in ["--keep-minutely=3", "--keep-minutely=3M"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg).splitlines() + assert re.search(r"Keeping archive \(rule: minutely #1\):\s+test-1", output.pop(0)) + assert re.search(r"Keeping archive \(rule: minutely #2\):\s+test-2", output.pop(0)) + assert re.search(r"Would prune:\s+test-3", output.pop(0)) + assert re.search(r"Keeping archive \(rule: minutely #3\):\s+test-4", output.pop(0)) + assert re.search(r"Would prune:\s+test-5", output.pop(0)) + + +@freeze_time(datetime(2023, 12, 31, 23, 0, 0, tzinfo=None)) +def test_prune_keep_hourly_int_or_interval(archivers, request, backup_files): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, backup_files, "test-1", 2023, 12, 31, 22) + _create_archive_ts(archiver, backup_files, "test-2", 2023, 12, 31, 21, us=1) + _create_archive_ts(archiver, backup_files, "test-3", 2023, 12, 31, 21) + _create_archive_ts(archiver, backup_files, "test-4", 2023, 12, 31, 20, us=1) # Last possible microsecond + _create_archive_ts(archiver, backup_files, "test-5", 2023, 12, 31, 20) + for keep_arg in ["--keep-hourly=3", "--keep-hourly=3H"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg).splitlines() + assert re.search(r"Keeping archive \(rule: hourly #1\):\s+test-1", output.pop(0)) + assert re.search(r"Keeping archive \(rule: hourly #2\):\s+test-2", output.pop(0)) + assert re.search(r"Would prune:\s+test-3", output.pop(0)) + assert re.search(r"Keeping archive \(rule: hourly #3\):\s+test-4", output.pop(0)) + assert re.search(r"Would prune:\s+test-5", output.pop(0)) + + +@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) +def test_prune_keep_daily_int_or_interval(archivers, request, backup_files): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, backup_files, "test-1", 2023, 12, 30) + _create_archive_ts(archiver, backup_files, "test-2", 2023, 12, 29, S=1) + _create_archive_ts(archiver, backup_files, "test-3", 2023, 12, 29) + _create_archive_ts(archiver, backup_files, "test-4", 2023, 12, 28, us=1) # Last possible microsecond + _create_archive_ts(archiver, backup_files, "test-5", 2023, 12, 28) + for keep_arg in ["--keep-daily=3", "--keep-daily=3d"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg).splitlines() + assert re.search(r"Keeping archive \(rule: daily #1\):\s+test-1", output.pop(0)) + assert re.search(r"Keeping archive \(rule: daily #2\):\s+test-2", output.pop(0)) + assert re.search(r"Would prune:\s+test-3", output.pop(0)) + assert re.search(r"Keeping archive \(rule: daily #3\):\s+test-4", output.pop(0)) + assert re.search(r"Would prune:\s+test-5", output.pop(0)) + + +@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) +def test_prune_keep_weekly_int_or_interval(archivers, request, backup_files): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, backup_files, "test-1", 2023, 12, 24) + _create_archive_ts(archiver, backup_files, "test-2", 2023, 12, 17, us=1) + _create_archive_ts(archiver, backup_files, "test-3", 2023, 12, 17) + _create_archive_ts(archiver, backup_files, "test-4", 2023, 12, 10, us=1) # Last possible microsecond + _create_archive_ts(archiver, backup_files, "test-5", 2023, 12, 10) + for keep_arg in ["--keep-weekly=3", "--keep-weekly=3w"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg).splitlines() + assert re.search(r"Keeping archive \(rule: weekly #1\):\s+test-1", output.pop(0)) + assert re.search(r"Keeping archive \(rule: weekly #2\):\s+test-2", output.pop(0)) + assert re.search(r"Would prune:\s+test-3", output.pop(0)) + assert re.search(r"Keeping archive \(rule: weekly #3\):\s+test-4", output.pop(0)) + assert re.search(r"Would prune:\s+test-5", output.pop(0)) + + +@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) +def test_prune_keep_monthly_int_or_interval(archivers, request, backup_files): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, backup_files, "test-1", 2023, 11, 30) + _create_archive_ts( + archiver, backup_files, "test-2", 2023, 10, 30, us=1 + ) # Month defined as 31 days, so not Oct 31st + _create_archive_ts(archiver, backup_files, "test-3", 2023, 10, 30) + _create_archive_ts(archiver, backup_files, "test-4", 2023, 9, 29, us=1) # Last possible microsecond + _create_archive_ts(archiver, backup_files, "test-5", 2023, 9, 29) + for keep_arg in ["--keep-monthly=3", "--keep-monthly=3m"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg).splitlines() + assert re.search(r"Keeping archive \(rule: monthly #1\):\s+test-1", output.pop(0)) + assert re.search(r"Keeping archive \(rule: monthly #2\):\s+test-2", output.pop(0)) + assert re.search(r"Would prune:\s+test-3", output.pop(0)) + assert re.search(r"Keeping archive \(rule: monthly #3\):\s+test-4", output.pop(0)) + assert re.search(r"Would prune:\s+test-5", output.pop(0)) + + +# 2023-12-31 is Sunday, week 52. Makes these week calculations a little easier. +@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) +def test_prune_keep_13weekly_int_or_interval(archivers, request, backup_files): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, backup_files, "test-1", 2023, 10, 1) + _create_archive_ts(archiver, backup_files, "test-2", 2023, 7, 2, us=1) + _create_archive_ts(archiver, backup_files, "test-3", 2023, 7, 2) + _create_archive_ts(archiver, backup_files, "test-4", 2023, 4, 2, us=1) # Last possible microsecond + _create_archive_ts(archiver, backup_files, "test-5", 2023, 4, 2) + for keep_arg in ["--keep-13weekly=3", "--keep-13weekly=39w"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg).splitlines() + assert re.search(r"Keeping archive \(rule: quarterly_13weekly #1\):\s+test-1", output.pop(0)) + assert re.search(r"Keeping archive \(rule: quarterly_13weekly #2\):\s+test-2", output.pop(0)) + assert re.search(r"Would prune:\s+test-3", output.pop(0)) + assert re.search(r"Keeping archive \(rule: quarterly_13weekly #3\):\s+test-4", output.pop(0)) + assert re.search(r"Would prune:\s+test-5", output.pop(0)) + + +@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) +def test_prune_keep_3monthly_int_or_interval(archivers, request, backup_files): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, backup_files, "test-1", 2023, 9, 30) + _create_archive_ts(archiver, backup_files, "test-2", 2023, 6, 30, us=1) + _create_archive_ts(archiver, backup_files, "test-3", 2023, 6, 30) + _create_archive_ts(archiver, backup_files, "test-4", 2023, 3, 31, us=1) # Last possible microsecond + _create_archive_ts(archiver, backup_files, "test-5", 2023, 3, 31) + # 275d is the interval from now to 2023-03-31 + for keep_arg in ["--keep-3monthly=3", "--keep-3monthly=275d"]: + output = cmd(archiver, "prune", "--list", "--short", "--dry-run", keep_arg).splitlines() + assert re.search(r"Keeping archive \(rule: quarterly_3monthly #1\):\s+test-1", output.pop(0)) + assert re.search(r"Keeping archive \(rule: quarterly_3monthly #2\):\s+test-2", output.pop(0)) + assert re.search(r"Would prune:\s+test-3", output.pop(0)) + assert re.search(r"Keeping archive \(rule: quarterly_3monthly #3\):\s+test-4", output.pop(0)) + assert re.search(r"Would prune:\s+test-5", output.pop(0)) + + +@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) +def test_prune_keep_yearly_int_or_interval(archivers, request, backup_files): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, backup_files, "test-1", 2022, 12, 31) + _create_archive_ts(archiver, backup_files, "test-2", 2021, 12, 31, us=1) + _create_archive_ts(archiver, backup_files, "test-3", 2021, 12, 31) + _create_archive_ts(archiver, backup_files, "test-4", 2020, 12, 31, us=1) # Last possible microsecond + _create_archive_ts(archiver, backup_files, "test-5", 2020, 12, 31) + for keep_arg in ["--keep-yearly=3", "--keep-yearly=3y"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg).splitlines() + assert re.search(r"Keeping archive \(rule: yearly #1\):\s+test-1", output.pop(0)) + assert re.search(r"Keeping archive \(rule: yearly #2\):\s+test-2", output.pop(0)) + assert re.search(r"Would prune:\s+test-3", output.pop(0)) + assert re.search(r"Keeping archive \(rule: yearly #3\):\s+test-4", output.pop(0)) + assert re.search(r"Would prune:\s+test-5", output.pop(0)) + + +def test_prune_no_args(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + with pytest.raises(CommandError) as error: + cmd(archiver, "prune") + output = str(error.value) + assert re.search(r"At least one of the .* settings must be specified.", output) + assert re.search(r"keep(?!-)", output) + flags = [ + "last", + "within", + "secondly", + "minutely", + "hourly", + "daily", + "weekly", + "monthly", + "yearly", + "13weekly", + "3monthly", + ] + for flag in flags: + assert f"keep-{flag}" in output + + +def test_prune_errors_on_keep_and_last(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + with pytest.raises(CommandError) as error: + cmd(archiver, "prune", "--dry-run", "--keep-last=5", "--keep=3") + assert 'Only one of the "keep" and "last" settings may be specified.' in str(error.value) + + +def test_prune_errors_on_keep_and_within(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + with pytest.raises(CommandError) as error: + cmd(archiver, "prune", "--dry-run", "--keep-within=7d", "--keep=3") + assert 'Only one of the "keep" and "within" settings may be specified.' in str(error.value) + + +@pytest.mark.parametrize("keep_arg,value", product([rule.key for rule in PRUNING_RULES], ["0", "0S"])) +def test_prune_all_zero_args_one(archivers, request, keep_arg, value): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + + arg_with_prefix = "--keep" if keep_arg == "keep" else f"--keep-{keep_arg.replace('quarterly_', '')}" + output = _cmd_prune_error(archiver, f"{arg_with_prefix}={value}") + assert re.search(r"None of the .* settings have a positive value. At least one must be non-zero.", output) + + +def test_prune_all_zero_multiple_multiple(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + + output = _cmd_prune_error(archiver, "--keep-secondly=0S", "--keep-daily=0") + assert re.search(r"None of the .* settings have a positive value. At least one must be non-zero.", output) + + +@pytest.mark.parametrize( + "lo_val,hi_val", + [("14d", "7d"), ("-1", "7d"), ("-1", "1"), ("-1", "-1"), ("all", "7d"), ("all", "1"), ("all", "-1")], +) +def test_prune_warns_on_redundant_interval_flags(archivers, request, lo_val, hi_val): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + with pytest.raises(CommandError) as error: + cmd(archiver, "prune", "--dry-run", f"--keep-hourly={lo_val}", f"--keep-daily={hi_val}") + assert "hourly=" in str(error.value) + assert "daily=" in str(error.value) + assert "effectively useless" in str(error.value) + + +@pytest.mark.parametrize("lo_val,hi_val", [("7d", "14d"), ("7d", "-1"), ("1", "-1"), ("7d", "all"), ("1", "all")]) +def test_prune_does_not_warn_on_normal_interval_flags(archivers, request, lo_val, hi_val): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + output = cmd(archiver, "prune", "--dry-run", f"--keep-hourly={lo_val}", f"--keep-daily={hi_val}") + assert "effectively useless" not in output + + +def test_prune_int_rolling_schedule_oldest_retention(): + daily_n = 6 + monthly_n = 3 + start_date = datetime(2024, 1, 1, tzinfo=timezone.utc) + + previous_archives = [] + archives = [] + + for day_offset in range(97): + backup_ts = start_date + timedelta(days=day_offset) + previous_archives = archives + archives = [mock_archive(backup_ts, day_offset), *archives] + + keep = {} + keep |= prune(archives, PRUNE_DAILY, daily_n, None, False, keep) + keep |= prune(archives, PRUNE_MONTHLY, monthly_n, None, True, keep) + + archives = sorted(keep.keys(), key=lambda a: a.ts, reverse=True) + + # It is now 2024-04-06. The last run should have just pruned the jan-01 + # archive since the monthly retention count is now satisfied at jan-31. It + # was kept until now to satisfy the oldest-rule. + + assert previous_archives[-1].ts.strftime("%m-%d") == "01-01" + assert archives[-1].ts.strftime("%m-%d") == "01-31" + + +def test_prune_interval_rolling_schedule_oldest_retention(): + daily_interval = timedelta(days=6) + monthly_interval = timedelta(days=31 * 3) # Matching --keep-monthly=3m after argument parsing + start_date = datetime(2024, 1, 1, tzinfo=timezone.utc) + + previous_archives = [] + archives = [] + + for day_offset in range(94): + backup_ts = start_date + timedelta(days=day_offset) + previous_archives = archives + archives = [mock_archive(backup_ts, day_offset), *archives] + + keep = {} + keep |= prune(archives, PRUNE_DAILY, daily_interval, backup_ts, False, keep) + keep |= prune(archives, PRUNE_MONTHLY, monthly_interval, backup_ts, True, keep) + + print( + f"For backup+prune at {backup_ts.strftime('%m-%d')} ({day_offset})" + f" the following {len(archives)} archives are kept:" + ) + for a, result in keep.items(): + print(f" {a.id}: {a.ts.strftime('%Y-%m-%d')} {result}") + + archives = sorted(keep.keys(), key=lambda a: a.ts, reverse=True) + + # It is now 2024-04-03. The last run should have just pruned the jan-01 + # archive since it now falls outside the retention range (_exactly_ 93 days + # or 3 months ago, timestamp compared exclusively). It was kept until now + # to satisfy the oldest-rule. + + assert previous_archives[-1].ts.strftime("%m-%d") == "01-01" + assert archives[-1].ts.strftime("%m-%d") == "01-31" From eca719aeec5f8b8ad51923580fd89b52def25cc6 Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Mon, 25 May 2026 21:46:36 +0200 Subject: [PATCH 03/22] Assert output format without dry run in basic pruning test --- src/borg/testsuite/archiver/prune_cmd_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/borg/testsuite/archiver/prune_cmd_test.py b/src/borg/testsuite/archiver/prune_cmd_test.py index c1de83773a..a1b0dc05d6 100644 --- a/src/borg/testsuite/archiver/prune_cmd_test.py +++ b/src/borg/testsuite/archiver/prune_cmd_test.py @@ -49,7 +49,8 @@ def test_prune_repository(archivers, request, backup_files): output = cmd(archiver, "repo-list") assert "test1" in output assert "test2" in output - cmd(archiver, "prune", "--keep-daily=1") + output = cmd(archiver, "prune", "--list", "--keep-daily=1") + assert re.search(r"Pruning archive \(1/1\):\s+test1", output) output = cmd(archiver, "repo-list") assert "test1" not in output # The latest archive must still be there: From 72e72dc43106662ee8b8029a139ddd3d7a229df1 Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Mon, 25 May 2026 22:11:18 +0200 Subject: [PATCH 04/22] Adds 'prune --since', base timestamp to prune from A flag to explicitly "save" all archives older than a certain timestamp, and to function as a base timestamp from which to base interval timedelta calculations. Allows for precise time interval manipulation for superusers and as a bonus simplifies time-based testing and alleviates the need for an external dependency to freeze time in test. Includes some refactoring of do_prune for logical flow & naming that came up while iterating on these changes. --- requirements.d/development.lock.txt | 1 - requirements.d/development.txt | 1 - src/borg/archiver/prune_cmd.py | 111 ++++-- src/borg/testsuite/archiver/prune_cmd_test.py | 346 +++++++++--------- 4 files changed, 254 insertions(+), 205 deletions(-) diff --git a/requirements.d/development.lock.txt b/requirements.d/development.lock.txt index 08dbde72a8..f2cc2f4902 100644 --- a/requirements.d/development.lock.txt +++ b/requirements.d/development.lock.txt @@ -11,7 +11,6 @@ pytest-xdist==3.8.0 coverage[toml]==7.14.0 pytest-cov==7.1.0 pytest-benchmark==5.2.3 -freezegun==1.5.5 Cython==3.2.5 pre-commit==4.6.0 types-PyYAML==6.0.12.20260518 diff --git a/requirements.d/development.txt b/requirements.d/development.txt index 522c3e637d..d46a28e42d 100644 --- a/requirements.d/development.txt +++ b/requirements.d/development.txt @@ -11,7 +11,6 @@ pytest-xdist coverage[toml] pytest-cov pytest-benchmark -freezegun Cython pre-commit bandit[toml] diff --git a/src/borg/archiver/prune_cmd.py b/src/borg/archiver/prune_cmd.py index 134d43f3bd..70654f0784 100644 --- a/src/borg/archiver/prune_cmd.py +++ b/src/borg/archiver/prune_cmd.py @@ -8,7 +8,7 @@ from ._common import with_repository, Highlander from ..constants import * # NOQA from ..helpers import ArchiveFormatter, ProgressIndicatorPercent, CommandError, Error -from ..helpers import archivename_validator, interval, int_or_interval, sig_int +from ..helpers import archivename_validator, interval, int_or_interval, sig_int, timestamp from ..helpers import json_print, basic_json_data from ..helpers.argparsing import ArgumentParser from ..manifest import ArchiveInfo, Manifest @@ -108,6 +108,9 @@ def quarterly_3monthly_period_func(dt): PRUNE_QUARTERLY_3MONTHLY = PruningRule("quarterly_3monthly", quarterly_3monthly_period_func) PRUNE_YEARLY = PruningRule("yearly", pattern_period_func("%Y")) +# Fake rule used to indicate archives skipped by --since +PRUNE_SINCE = PruningRule("skip", unique_period_func()) + PRUNING_RULES = [ PRUNE_WITHIN, PRUNE_LAST, @@ -128,7 +131,7 @@ def prune( archives: list[ArchiveInfo], rule: PruningRule, n_or_interval: int | timedelta, - base_timestamp: datetime | None, + since_timestamp: datetime | None, keep_oldest: bool, previously_kept: frozenset[ArchiveInfo] = frozenset(), ) -> dict[ArchiveInfo, KeepResult]: @@ -138,9 +141,9 @@ def prune( if isinstance(n_or_interval, int): n, earliest_timestamp = n_or_interval, None else: - if base_timestamp is None: - raise ValueError("base_timestamp is required when using interval-based pruning") - n, earliest_timestamp = None, base_timestamp - n_or_interval + if since_timestamp is None: + raise ValueError("since_timestamp is required when using interval-based pruning") + n, earliest_timestamp = None, since_timestamp - n_or_interval keep: dict[ArchiveInfo, KeepResult] = {} @@ -175,15 +178,6 @@ def do_prune(self, args, repository, manifest): """Prune archives according to specified rules.""" self._validate_prune_args(args) - - if args.format is not None: - format = args.format - elif args.short: - format = "{archive}" - else: - format = os.environ.get("BORG_PRUNE_FORMAT", "{archive:<36} {time} [{id}]") - formatter = ArchiveFormatter(format, repository, manifest, manifest.key, iec=args.iec) - match = [args.name] if args.name else args.match_archives archives = manifest.archives.list(match=match, sort_by=["ts"], reverse=True) archives = [ai for ai in archives if "@PROT" not in ai.tags] @@ -191,30 +185,57 @@ def do_prune(self, args, repository, manifest): # Archives to keep along with the rule that ensured them being kept keep = {} - base_timestamp = datetime.now().astimezone() - active_rules = {rule: getattr(args, rule.key) for rule in PRUNING_RULES if getattr(args, rule.key) is not None} - for i, (rule, n_or_interval) in enumerate(active_rules.items(), 1): + since = getattr(args, PRUNE_SINCE.key) + candidate_archives = archives + + if since is not None: + # `--since` is a prefilter: Archives from after this time are kept by default. They are not considered for + # pruning at all. They won't falsely occupy an active retention period. + for archive in archives: + if archive.ts <= since: + break + keep[archive] = KeepResult(rule=PRUNE_SINCE, idx=len(keep)) + candidate_archives = archives[len(keep) :] + + # Apply each retention rule to all candidate archives. The + # `previously_kept` parameter prevents later (coarser-grained) rules + # from double-counting archives already retained by earlier rules. + active_rules = [ + (rule, getattr(args, rule.key)) for rule in PRUNING_RULES if getattr(args, rule.key) is not None + ] + for rule, n_or_interval in active_rules: keep |= prune( - archives=archives, + archives=candidate_archives, rule=rule, n_or_interval=n_or_interval, - base_timestamp=base_timestamp, - keep_oldest=i == len(active_rules), # Activate keep_oldest rule only for the largest active interval + since_timestamp=(since if since is not None else datetime.now().astimezone()), + keep_oldest=( + rule == active_rules[-1][0] + ), # Activate keep_oldest rule only for the largest active interval previously_kept=frozenset(keep), ) - to_delete = set(archives) - set(keep) - if not args.json: - logger.info("Repository contains %d archives.", manifest.archives.count()) - logger.info("Applying rules to the matching %d archives...", len(archives)) - logger.info("Keeping %d archives, pruning %d archives.", len(keep), len(to_delete)) + archives_to_prune = set(archives) - set(keep) + + if args.format is not None: + format = args.format + elif args.short: + format = "{archive}" + else: + format = os.environ.get("BORG_PRUNE_FORMAT", "{archive:<36} {time} [{id}]") + formatter = ArchiveFormatter(format, repository, manifest, manifest.key, iec=args.iec) + if args.json: output_data = [] + else: + logger.info("Repository contains %d archives.", manifest.archives.count()) + logger.info("Applying rules to the matching %d archives...", len(archives)) + logger.info("Keeping %d archives, pruning %d archives.", len(keep), len(archives_to_prune)) + list_logger = logging.getLogger("borg.output.list") # set up counters for the progress display - to_delete_len = len(to_delete) - archives_deleted = 0 - pi = ProgressIndicatorPercent(total=len(to_delete), msg="Pruning archives %3.0f%%", msgid="prune") + num_archives_deleted = 0 + pi = ProgressIndicatorPercent(total=len(archives_to_prune), msg="Pruning archives %3.0f%%", msgid="prune") for archive_info in archives: if sig_int and sig_int.action_done(): break @@ -224,18 +245,18 @@ def do_prune(self, args, repository, manifest): archive_data = formatter.get_item_data(archive_info, jsonline=True) else: archive_formatted = formatter.format_item(archive_info, jsonline=False) - if archive_info in to_delete: + if archive_info in archives_to_prune: if not args.json: pi.show() - archives_deleted += 1 + num_archives_deleted += 1 if args.dry_run: log_message = "Would prune:" else: - log_message = "Pruning archive (%d/%d):" % (archives_deleted, to_delete_len) + log_message = f"Pruning archive ({num_archives_deleted}/{len(archives_to_prune)}):" manifest.archives.delete_by_id(archive_info.id) if args.json: archive_data["kept"] = False - archive_data["deleted_archive_number"] = archives_deleted + archive_data["deleted_archive_number"] = num_archives_deleted else: result = keep[archive_info] result_message = f"{result.rule.key}{'[oldest]' if result.oldest else ''} #{result.idx + 1}" @@ -249,21 +270,21 @@ def do_prune(self, args, repository, manifest): if ( args.output_list or not (args.list_pruned or args.list_kept) - or (args.list_pruned and archive_info in to_delete) - or (args.list_kept and archive_info not in to_delete) + or (args.list_pruned and archive_info in archives_to_prune) + or (args.list_kept and archive_info not in archives_to_prune) ): output_data.append(archive_data) elif ( args.output_list - or (args.list_pruned and archive_info in to_delete) - or (args.list_kept and archive_info not in to_delete) + or (args.list_pruned and archive_info in archives_to_prune) + or (args.list_kept and archive_info not in archives_to_prune) ): list_logger.info(f"{log_message:<44} {archive_formatted}") if not args.json: pi.finish() if args.json: json_print(basic_json_data(manifest, extra={"archives": output_data})) - if archives_deleted > 0 and not args.dry_run: + if num_archives_deleted > 0 and not args.dry_run: manifest.write() self.print_warning('Done. Run "borg compact" to free space.', wc=None) if sig_int: @@ -371,6 +392,14 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser): timezone of the system where borg prune runs, and weeks go from Monday to Sunday. Specifying a negative number of archives to keep means that there is no limit. + The ``--since`` option restricts pruning to archives older than the given + TIMESTAMP. Archives newer than this timestamp are kept unconditionally + as a pre-filter. When ``--since`` is used together with interval-based + ``--keep-*`` options (e.g. ``--keep-daily 7d``), the interval is + measured backwards from the given timestamp rather than from the + current time. Count-based retention does not count the unconditionally + kept archives. + Borg will retain the oldest archive if any of the secondly, minutely, hourly, daily, weekly, monthly, quarterly, or yearly rules was not otherwise able to meet its retention target. This enables the first chronological archive to @@ -419,6 +448,14 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser): "but keys used in it are added to the JSON output. " "Some keys are always present. Note: JSON can only represent text.", ) + subparser.add_argument( + "--since", + metavar="TIMESTAMP", + dest=PRUNE_SINCE.key, + type=timestamp, + action=Highlander, + help="only consider archives older than this for pruning", + ) subparser.add_argument( "--keep-within", metavar="INTERVAL", diff --git a/src/borg/testsuite/archiver/prune_cmd_test.py b/src/borg/testsuite/archiver/prune_cmd_test.py index a1b0dc05d6..94cdbf023d 100644 --- a/src/borg/testsuite/archiver/prune_cmd_test.py +++ b/src/borg/testsuite/archiver/prune_cmd_test.py @@ -4,8 +4,6 @@ import re from operator import attrgetter from datetime import datetime, timezone, timedelta -from freezegun import freeze_time - from ...constants import * # NOQA from ...archiver.prune_cmd import ( PRUNING_RULES, @@ -26,15 +24,14 @@ pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary") # NOQA -def _create_archive_ts(archiver, backup_files, name, y, m, d, H=0, M=0, S=0, us=0, tzinfo=None): - cmd( - archiver, - "create", - "--timestamp", - datetime(y, m, d, H, M, S, us, tzinfo=tzinfo).strftime(ISO_FORMAT_ZONE), - name, - backup_files, - ) +def _create_archive_dt(archiver, backup_files, name, dt, tzinfo=timezone.utc): + if dt.tzinfo is None: + dt = dt.replace(tzinfo=tzinfo) + cmd(archiver, "create", "--timestamp", dt.strftime(ISO_FORMAT_ZONE), name, backup_files) + + +def _create_archive_ts(archiver, backup_files, name, y, m, d, H=0, M=0, S=0, us=0, tzinfo=timezone.utc): + _create_archive_dt(archiver, backup_files, name, datetime(y, m, d, H, M, S, us, tzinfo=tzinfo)) def test_prune_repository(archivers, request, backup_files): @@ -165,7 +162,6 @@ def mk_name(tup): # Use 99 instead of -1 to test that oldest backup is kept. output = cmd(archiver, "prune", "--list", "--dry-run", f"--keep-{strat}=99") - print(output) for a in map(mk_name, to_prune): assert re.search(rf"Would prune:\s+{a}", output) @@ -474,195 +470,194 @@ def test_prune_keep_last_same_second(archivers, request, backup_files): assert re.search(r"Keeping archive \(rule: last #\d\):\s+test2", output) -@freeze_time(datetime(2023, 12, 31, 23, 59, 59, tzinfo=None)) # Non-leap year ending on a Sunday -def test_prune_keep_int_or_interval(archivers, request, backup_files): +@pytest.mark.parametrize("keep_arg", ["--keep=2", "--keep=1S"]) +def test_prune_keep_int_or_interval(archivers, request, backup_files, keep_arg): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) - _create_archive_ts(archiver, backup_files, "test-1", 2023, 12, 31, 23, 59, 59) - _create_archive_ts(archiver, backup_files, "test-2", 2023, 12, 31, 23, 59, 59) - _create_archive_ts(archiver, backup_files, "test-3", 2023, 12, 31, 23, 59, 58) - for keep_arg in ["--keep=2", "--keep=1S"]: - output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) - assert re.search(r"Keeping archive \(rule: keep #\d\):\s+test-1", output) - assert re.search(r"Keeping archive \(rule: keep #\d\):\s+test-2", output) - assert re.search(r"Would prune:\s+test-3", output) + dt = datetime(2023, 12, 31, 23, 59, 59, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt) + _create_archive_dt(archiver, backup_files, "test-2", dt) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(seconds=1)) + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Keeping archive \(rule: keep #\d\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: keep #\d\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) @pytest.mark.parametrize("keep_arg", ["--keep-daily=-1", "--keep-daily=all"]) def test_prune_keep_all(archivers, request, backup_files, keep_arg): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) - _create_archive_ts(archiver, backup_files, "test-1", 2023, 12, 30, 23, 59, 59, tzinfo=timezone.utc) - _create_archive_ts(archiver, backup_files, "test-2", 2023, 12, 29, 23, 59, 59, tzinfo=timezone.utc) - _create_archive_ts(archiver, backup_files, "test-3", 2023, 12, 28, 23, 59, 59, tzinfo=timezone.utc) - output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + dt = datetime(2023, 12, 31, 23, 59, 59, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(days=1)) + _create_archive_dt(archiver, backup_files, "test-2", dt - timedelta(days=2)) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=3)) + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) assert re.search(r"Keeping archive \(rule: daily #1\):\s+test-1", output) assert re.search(r"Keeping archive \(rule: daily #2\):\s+test-2", output) assert re.search(r"Keeping archive \(rule: daily #3\):\s+test-3", output) -@freeze_time(datetime(2023, 12, 31, 23, 59, 59, tzinfo=None)) +@pytest.mark.parametrize("keep_arg", ["--keep-secondly=2", "--keep-secondly=2S"]) def test_prune_keep_secondly_int_or_interval(archivers, request, backup_files, keep_arg): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) - _create_archive_ts(archiver, backup_files, "test-1", 2023, 12, 31, 23, 59, 58) - _create_archive_ts(archiver, backup_files, "test-2", 2023, 12, 31, 23, 59, 57, 1) - _create_archive_ts(archiver, backup_files, "test-3", 2023, 12, 31, 23, 59, 57) - _create_archive_ts(archiver, backup_files, "test-4", 2023, 12, 31, 23, 59, 56, 999999) - for keep_arg in ["--keep-secondly=2", "--keep-secondly=2S"]: - output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg).splitlines() - assert re.search(r"Keeping archive \(rule: secondly #1\):\s+test-1", output.pop(0)) - assert re.search(r"Keeping archive \(rule: secondly #2\):\s+test-2", output.pop(0)) - assert re.search(r"Would prune:\s+test-3", output.pop(0)) - assert re.search(r"Would prune:\s+test-4", output.pop(0)) - - -@freeze_time(datetime(2023, 12, 31, 23, 59, 0, tzinfo=None)) -def test_prune_keep_minutely_int_or_interval(archivers, request, backup_files): + dt = datetime(2023, 12, 31, 23, 59, 59, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(seconds=1)) + _create_archive_dt(archiver, backup_files, "test-2", dt - timedelta(seconds=1, microseconds=999999)) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(seconds=2)) + _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(seconds=2, microseconds=1)) + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Keeping archive \(rule: secondly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: secondly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Would prune:\s+test-4", output) + + +@pytest.mark.parametrize("keep_arg", ["--keep-minutely=3", "--keep-minutely=3M"]) +def test_prune_keep_minutely_int_or_interval(archivers, request, backup_files, keep_arg): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) - _create_archive_ts(archiver, backup_files, "test-1", 2023, 12, 31, 23, 58) - _create_archive_ts(archiver, backup_files, "test-2", 2023, 12, 31, 23, 57, 1) - _create_archive_ts(archiver, backup_files, "test-3", 2023, 12, 31, 23, 57) - _create_archive_ts(archiver, backup_files, "test-4", 2023, 12, 31, 23, 56, 0, 1) # Last possible microsecond - _create_archive_ts(archiver, backup_files, "test-5", 2023, 12, 31, 23, 56) - for keep_arg in ["--keep-minutely=3", "--keep-minutely=3M"]: - output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg).splitlines() - assert re.search(r"Keeping archive \(rule: minutely #1\):\s+test-1", output.pop(0)) - assert re.search(r"Keeping archive \(rule: minutely #2\):\s+test-2", output.pop(0)) - assert re.search(r"Would prune:\s+test-3", output.pop(0)) - assert re.search(r"Keeping archive \(rule: minutely #3\):\s+test-4", output.pop(0)) - assert re.search(r"Would prune:\s+test-5", output.pop(0)) - - -@freeze_time(datetime(2023, 12, 31, 23, 0, 0, tzinfo=None)) -def test_prune_keep_hourly_int_or_interval(archivers, request, backup_files): + dt = datetime(2023, 12, 31, 23, 59, 0, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(minutes=1)) + _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(minutes=2)).replace(second=1)) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(minutes=2)) + _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(minutes=3)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(minutes=3)) + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Keeping archive \(rule: minutely #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: minutely #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: minutely #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@pytest.mark.parametrize("keep_arg", ["--keep-hourly=3", "--keep-hourly=3H"]) +def test_prune_keep_hourly_int_or_interval(archivers, request, backup_files, keep_arg): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) - _create_archive_ts(archiver, backup_files, "test-1", 2023, 12, 31, 22) - _create_archive_ts(archiver, backup_files, "test-2", 2023, 12, 31, 21, us=1) - _create_archive_ts(archiver, backup_files, "test-3", 2023, 12, 31, 21) - _create_archive_ts(archiver, backup_files, "test-4", 2023, 12, 31, 20, us=1) # Last possible microsecond - _create_archive_ts(archiver, backup_files, "test-5", 2023, 12, 31, 20) - for keep_arg in ["--keep-hourly=3", "--keep-hourly=3H"]: - output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg).splitlines() - assert re.search(r"Keeping archive \(rule: hourly #1\):\s+test-1", output.pop(0)) - assert re.search(r"Keeping archive \(rule: hourly #2\):\s+test-2", output.pop(0)) - assert re.search(r"Would prune:\s+test-3", output.pop(0)) - assert re.search(r"Keeping archive \(rule: hourly #3\):\s+test-4", output.pop(0)) - assert re.search(r"Would prune:\s+test-5", output.pop(0)) - - -@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) -def test_prune_keep_daily_int_or_interval(archivers, request, backup_files): + dt = datetime(2023, 12, 31, 23, 0, 0, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(hours=1)) + _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(hours=2)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(hours=2)) + _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(hours=3)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(hours=3)) + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Keeping archive \(rule: hourly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: hourly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: hourly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@pytest.mark.parametrize("keep_arg", ["--keep-daily=3", "--keep-daily=3d"]) +def test_prune_keep_daily_int_or_interval(archivers, request, backup_files, keep_arg): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) - _create_archive_ts(archiver, backup_files, "test-1", 2023, 12, 30) - _create_archive_ts(archiver, backup_files, "test-2", 2023, 12, 29, S=1) - _create_archive_ts(archiver, backup_files, "test-3", 2023, 12, 29) - _create_archive_ts(archiver, backup_files, "test-4", 2023, 12, 28, us=1) # Last possible microsecond - _create_archive_ts(archiver, backup_files, "test-5", 2023, 12, 28) - for keep_arg in ["--keep-daily=3", "--keep-daily=3d"]: - output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg).splitlines() - assert re.search(r"Keeping archive \(rule: daily #1\):\s+test-1", output.pop(0)) - assert re.search(r"Keeping archive \(rule: daily #2\):\s+test-2", output.pop(0)) - assert re.search(r"Would prune:\s+test-3", output.pop(0)) - assert re.search(r"Keeping archive \(rule: daily #3\):\s+test-4", output.pop(0)) - assert re.search(r"Would prune:\s+test-5", output.pop(0)) - - -@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) -def test_prune_keep_weekly_int_or_interval(archivers, request, backup_files): + dt = datetime(2023, 12, 31, 0, 0, 0, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(days=1)) + _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(days=2)).replace(second=1)) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=2)) + _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(days=3)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=3)) + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Keeping archive \(rule: daily #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: daily #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: daily #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@pytest.mark.parametrize("keep_arg", ["--keep-weekly=3", "--keep-weekly=3w"]) +def test_prune_keep_weekly_int_or_interval(archivers, request, backup_files, keep_arg): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) - _create_archive_ts(archiver, backup_files, "test-1", 2023, 12, 24) - _create_archive_ts(archiver, backup_files, "test-2", 2023, 12, 17, us=1) - _create_archive_ts(archiver, backup_files, "test-3", 2023, 12, 17) - _create_archive_ts(archiver, backup_files, "test-4", 2023, 12, 10, us=1) # Last possible microsecond - _create_archive_ts(archiver, backup_files, "test-5", 2023, 12, 10) - for keep_arg in ["--keep-weekly=3", "--keep-weekly=3w"]: - output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg).splitlines() - assert re.search(r"Keeping archive \(rule: weekly #1\):\s+test-1", output.pop(0)) - assert re.search(r"Keeping archive \(rule: weekly #2\):\s+test-2", output.pop(0)) - assert re.search(r"Would prune:\s+test-3", output.pop(0)) - assert re.search(r"Keeping archive \(rule: weekly #3\):\s+test-4", output.pop(0)) - assert re.search(r"Would prune:\s+test-5", output.pop(0)) - - -@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) -def test_prune_keep_monthly_int_or_interval(archivers, request, backup_files): + dt = datetime(2023, 12, 31, 0, 0, 0, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(days=7)) + _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(days=14)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=14)) + _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(days=21)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=21)) + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Keeping archive \(rule: weekly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: weekly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: weekly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@pytest.mark.parametrize("keep_arg", ["--keep-monthly=3", "--keep-monthly=3m"]) +def test_prune_keep_monthly_int_or_interval(archivers, request, backup_files, keep_arg): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) - _create_archive_ts(archiver, backup_files, "test-1", 2023, 11, 30) - _create_archive_ts( - archiver, backup_files, "test-2", 2023, 10, 30, us=1 - ) # Month defined as 31 days, so not Oct 31st - _create_archive_ts(archiver, backup_files, "test-3", 2023, 10, 30) - _create_archive_ts(archiver, backup_files, "test-4", 2023, 9, 29, us=1) # Last possible microsecond - _create_archive_ts(archiver, backup_files, "test-5", 2023, 9, 29) - for keep_arg in ["--keep-monthly=3", "--keep-monthly=3m"]: - output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg).splitlines() - assert re.search(r"Keeping archive \(rule: monthly #1\):\s+test-1", output.pop(0)) - assert re.search(r"Keeping archive \(rule: monthly #2\):\s+test-2", output.pop(0)) - assert re.search(r"Would prune:\s+test-3", output.pop(0)) - assert re.search(r"Keeping archive \(rule: monthly #3\):\s+test-4", output.pop(0)) - assert re.search(r"Would prune:\s+test-5", output.pop(0)) + dt = datetime(2023, 12, 31, 0, 0, 0, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(days=31)) + _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(days=62)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=62)) + _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(days=93)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=93)) + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Keeping archive \(rule: monthly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: monthly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: monthly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) # 2023-12-31 is Sunday, week 52. Makes these week calculations a little easier. -@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) -def test_prune_keep_13weekly_int_or_interval(archivers, request, backup_files): +@pytest.mark.parametrize("keep_arg", ["--keep-13weekly=3", "--keep-13weekly=39w"]) +def test_prune_keep_13weekly_int_or_interval(archivers, request, backup_files, keep_arg): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) - _create_archive_ts(archiver, backup_files, "test-1", 2023, 10, 1) - _create_archive_ts(archiver, backup_files, "test-2", 2023, 7, 2, us=1) - _create_archive_ts(archiver, backup_files, "test-3", 2023, 7, 2) - _create_archive_ts(archiver, backup_files, "test-4", 2023, 4, 2, us=1) # Last possible microsecond - _create_archive_ts(archiver, backup_files, "test-5", 2023, 4, 2) - for keep_arg in ["--keep-13weekly=3", "--keep-13weekly=39w"]: - output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg).splitlines() - assert re.search(r"Keeping archive \(rule: quarterly_13weekly #1\):\s+test-1", output.pop(0)) - assert re.search(r"Keeping archive \(rule: quarterly_13weekly #2\):\s+test-2", output.pop(0)) - assert re.search(r"Would prune:\s+test-3", output.pop(0)) - assert re.search(r"Keeping archive \(rule: quarterly_13weekly #3\):\s+test-4", output.pop(0)) - assert re.search(r"Would prune:\s+test-5", output.pop(0)) - - -@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) -def test_prune_keep_3monthly_int_or_interval(archivers, request, backup_files): + dt = datetime(2023, 12, 31, 0, 0, 0, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(days=91)) + _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(days=182)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=182)) + _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(days=273)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=273)) + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Keeping archive \(rule: quarterly_13weekly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: quarterly_13weekly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: quarterly_13weekly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@pytest.mark.parametrize("keep_arg", ["--keep-3monthly=3", "--keep-3monthly=275d"]) +def test_prune_keep_3monthly_int_or_interval(archivers, request, backup_files, keep_arg): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) - _create_archive_ts(archiver, backup_files, "test-1", 2023, 9, 30) - _create_archive_ts(archiver, backup_files, "test-2", 2023, 6, 30, us=1) - _create_archive_ts(archiver, backup_files, "test-3", 2023, 6, 30) - _create_archive_ts(archiver, backup_files, "test-4", 2023, 3, 31, us=1) # Last possible microsecond - _create_archive_ts(archiver, backup_files, "test-5", 2023, 3, 31) - # 275d is the interval from now to 2023-03-31 - for keep_arg in ["--keep-3monthly=3", "--keep-3monthly=275d"]: - output = cmd(archiver, "prune", "--list", "--short", "--dry-run", keep_arg).splitlines() - assert re.search(r"Keeping archive \(rule: quarterly_3monthly #1\):\s+test-1", output.pop(0)) - assert re.search(r"Keeping archive \(rule: quarterly_3monthly #2\):\s+test-2", output.pop(0)) - assert re.search(r"Would prune:\s+test-3", output.pop(0)) - assert re.search(r"Keeping archive \(rule: quarterly_3monthly #3\):\s+test-4", output.pop(0)) - assert re.search(r"Would prune:\s+test-5", output.pop(0)) - - -@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) -def test_prune_keep_yearly_int_or_interval(archivers, request, backup_files): + dt = datetime(2023, 12, 31, 0, 0, 0, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(days=92)) + _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(days=184)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=184)) + _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(days=275)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=275)) + # 275d is the interval from dt to the oldest kept monthly archive + output = cmd(archiver, "prune", "--list", "--short", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Keeping archive \(rule: quarterly_3monthly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: quarterly_3monthly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: quarterly_3monthly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@pytest.mark.parametrize("keep_arg", ["--keep-yearly=3", "--keep-yearly=3y"]) +def test_prune_keep_yearly_int_or_interval(archivers, request, backup_files, keep_arg): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) - _create_archive_ts(archiver, backup_files, "test-1", 2022, 12, 31) - _create_archive_ts(archiver, backup_files, "test-2", 2021, 12, 31, us=1) - _create_archive_ts(archiver, backup_files, "test-3", 2021, 12, 31) - _create_archive_ts(archiver, backup_files, "test-4", 2020, 12, 31, us=1) # Last possible microsecond - _create_archive_ts(archiver, backup_files, "test-5", 2020, 12, 31) - for keep_arg in ["--keep-yearly=3", "--keep-yearly=3y"]: - output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg).splitlines() - assert re.search(r"Keeping archive \(rule: yearly #1\):\s+test-1", output.pop(0)) - assert re.search(r"Keeping archive \(rule: yearly #2\):\s+test-2", output.pop(0)) - assert re.search(r"Would prune:\s+test-3", output.pop(0)) - assert re.search(r"Keeping archive \(rule: yearly #3\):\s+test-4", output.pop(0)) - assert re.search(r"Would prune:\s+test-5", output.pop(0)) + dt = datetime(2023, 12, 31, 0, 0, 0, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(days=365)) + _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(days=730)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=730)) + _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(days=1095)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=1095)) + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Keeping archive \(rule: yearly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: yearly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: yearly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) def test_prune_no_args(archivers, request): @@ -773,6 +768,25 @@ def test_prune_int_rolling_schedule_oldest_retention(): assert archives[-1].ts.strftime("%m-%d") == "01-31" +def test_prune_since_prefiltered_archives_ignored_in_pruning(archivers, request, backup_files): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + dt = datetime(2024, 6, 6, 12, 0, 0, tzinfo=timezone.utc) + + _create_archive_dt(archiver, backup_files, "test-a", dt + timedelta(hours=1)) + _create_archive_dt(archiver, backup_files, "test-b", dt - timedelta(hours=1)) + _create_archive_dt(archiver, backup_files, "test-c", dt - timedelta(days=1)) + + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), "--keep-daily=1") + + # 'test-b' is kept, meaning 'test-a' was entirely skipped for pruning consideration. + # They would otherwise have occupied the same period. + assert re.search(r"Keeping archive \(rule: skip #1\):\s+test-a", output) + assert re.search(r"Keeping archive \(rule: daily #1\):\s+test-b", output) + + assert re.search(r"Would prune:\s+test-c", output) + + def test_prune_interval_rolling_schedule_oldest_retention(): daily_interval = timedelta(days=6) monthly_interval = timedelta(days=31 * 3) # Matching --keep-monthly=3m after argument parsing From 369fd466f14c276b5a072c61a36cf479981cbc5b Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Tue, 26 May 2026 00:15:01 +0200 Subject: [PATCH 05/22] Fixes timestamp parsing error on Python 3.10 --- src/borg/constants.py | 2 -- src/borg/testsuite/archiver/prune_cmd_test.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/borg/constants.py b/src/borg/constants.py index 34c791622c..5c88b6b89e 100644 --- a/src/borg/constants.py +++ b/src/borg/constants.py @@ -150,9 +150,7 @@ EXIT_SIGNAL_BASE = 128 # terminated due to signal, rc = 128 + sig_no ISO_FORMAT_NO_USECS = "%Y-%m-%dT%H:%M:%S" -ISO_FORMAT_NO_USECS_ZONE = ISO_FORMAT_NO_USECS + "%z" ISO_FORMAT = ISO_FORMAT_NO_USECS + ".%f" -ISO_FORMAT_ZONE = ISO_FORMAT + "%z" DASHES = "-" * 78 diff --git a/src/borg/testsuite/archiver/prune_cmd_test.py b/src/borg/testsuite/archiver/prune_cmd_test.py index 94cdbf023d..09c67bd908 100644 --- a/src/borg/testsuite/archiver/prune_cmd_test.py +++ b/src/borg/testsuite/archiver/prune_cmd_test.py @@ -27,7 +27,7 @@ def _create_archive_dt(archiver, backup_files, name, dt, tzinfo=timezone.utc): if dt.tzinfo is None: dt = dt.replace(tzinfo=tzinfo) - cmd(archiver, "create", "--timestamp", dt.strftime(ISO_FORMAT_ZONE), name, backup_files) + cmd(archiver, "create", "--timestamp", dt.isoformat(timespec="microseconds"), name, backup_files) def _create_archive_ts(archiver, backup_files, name, y, m, d, H=0, M=0, S=0, us=0, tzinfo=timezone.utc): From 318c53fcf94775f552a750c3f5b1f11bb516a2b6 Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Sun, 31 May 2026 22:36:06 +0200 Subject: [PATCH 06/22] Adds full-scale example using pruning intervals Also rewrites some of the older example to match terminology and wording from the newer example. --- docs/misc/prune-example-interval.txt | 91 +++++++++++++++++++ docs/misc/prune-example.txt | 11 ++- src/borg/testsuite/archiver/prune_cmd_test.py | 61 +++++++++++++ 3 files changed, 160 insertions(+), 3 deletions(-) create mode 100644 docs/misc/prune-example-interval.txt diff --git a/docs/misc/prune-example-interval.txt b/docs/misc/prune-example-interval.txt new file mode 100644 index 0000000000..53ce26cd51 --- /dev/null +++ b/docs/misc/prune-example-interval.txt @@ -0,0 +1,91 @@ +borg prune visualized (count and interval mixed) +================================================================================ + +Assume it is 2026-06-04 16:00. You have been creating backup archives at 16:00 +on most days going back to late 2025, with pruning running immediately after +each archival. Todays archive has just been made and the following prune +operation is about to start. + +This example shows what would be kept/pruned when running the following prune +command. Note the yearly rule keeping _any two_ yearly archives. + +borg prune \ + --since '2026-06-04 16:00' \ + --keep-daily 1w \ + --keep-monthly 5m \ + --keep-yearly 2 + +Archives kept by the `--keep-daily` rule are marked by a "d", +archives kept by the `--keep-monthly` rule are marked by an "m", and +archives kept by the `--keep-yearly` rule are marked by a "y" to the +right. + +The first archive was made on 2025-11-15. You missed the backups on 2026-03-31 +and 2026-06-03. + + +Calendar view +------------- + 2025 + November December + 1 2 3 4 5 6 7 + 8 9 10 11 12 13 14 + 15y16 15 16 17 18 19 20 21 + 17 18 19 20 21 22 23 22 23 24 25 26 27 28 + 24 25 26 27 28 29 30 29 30 31y + + 2026 + January February March + 1 2 3 4 1 1 + 5 6 7 8 9 10 11 2 3 4 5 6 7 8 2 3 4 5 6 7 8 +12 13 14 15 16 17 18 9 10 11 12 13 14 15 9 10 11 12 13 14 15 +19 20 21 22 23 24 25 16 17 18 19 20 21 22 16 17 18 19 20 21 22 +26 27 28 29 30 31m 23 24 25 26 27 28m 23 24 25 26 27 28 29 + 30m31 + + April May June + 1 2 3 4 5 1 2 3 1d 2d 3 4d + 6 7 8 9 10 11 12 4 5 6 7 8 9 10 +13 14 15 16 17 18 19 11 12 13 14 15 16 17 +20 21 22 23 24 25 26 18 19 20 21 22 23 24 +27 28 29 30m 25 26 27 28 29d30d31d + + +List view +--------- + +--keep-daily 1w --keep-monthly 5m --keep-yearly 2 +-------------------------------------------------------------- +1. 2026-06-04 1. 2026-04-30 1. 2025-12-31 +2. 2026-06-02 2. 2026-03-30 2. 2025-11-15 (oldest) +3. 2026-06-01 3. 2026-02-28 +4. 2026-05-31 4. 2026-01-31 +5. 2026-05-30 +6. 2026-05-29 + + +Notes +----- + +2026-06-03 was skipped, so no archive on that day. No compensation is made for +this, so the "daily" rule simply keeps one fewer archive. 2026-05-28 16:00 is +exactly one week before `--since` and so would be excluded and pruned in this +prune run. + +2026-03-31 was skipped, so 2026-03-30 is the monthly candidate for that month. +2025-12-31 16:00 is exactly 5 months (5 * 31 days) from today and so that day's +archive is no longer kept by the "monthly" rule but instead is now kept as the +first true yearly candidate. + +Since interval rules define time windows rather than competing for a fixed +number of slots, their interplay is simpler than count-based rules. An archive +is kept by an interval rule as long as it falls within the specified window; +the next rule simply considers whatever remains. + +Intervals and counts can be mixed freely. Yearly retention in this example is +done by retention count instead of intervals. A count rule paired with interval +rules behaves just as it would if all preceding rules were also counts: +Archives already kept by earlier rules are excluded from consideration. In this +example there is only one "true" yearly candidate, so the oldest archive at +2025-11-15 is kept. This oldest archive will be kept until the rolling backup +scheme reaches "steady state" (when all retention rules are fully satisfied). diff --git a/docs/misc/prune-example.txt b/docs/misc/prune-example.txt index d77451a858..1e98c7b4e7 100644 --- a/docs/misc/prune-example.txt +++ b/docs/misc/prune-example.txt @@ -82,10 +82,15 @@ Notes 2015-12-31 is kept due to the --keep-daily 14 rule (because it is applied first), not due to the --keep-monthly or --keep-yearly rule. +Rules are applied in the order given: archives already kept by an earlier +rule are excluded from consideration by later rules. + The --keep-yearly 1 rule does not consider the December 31st backup because it -has already been kept due to the daily rule. There are no backups available -from previous years, so the --keep-yearly target of 1 backup is not satisfied. -Because of this, the 2015-01-01 archive (the oldest archive available) is kept. +has already been kept due to the daily rule. There are no backups from +previous years, so there are no "true" yearly candidates. The oldest archive +at 2015-01-01 fills the remaining slot and will be kept until the rolling +backup scheme reaches "steady state" (when all retention rules are fully +satisfied). The --keep-monthly 6 rule keeps Nov, Oct, Sep, Aug, Jul and Jun. December is not considered for this rule, because that backup was already kept because of diff --git a/src/borg/testsuite/archiver/prune_cmd_test.py b/src/borg/testsuite/archiver/prune_cmd_test.py index 09c67bd908..cd49159bfe 100644 --- a/src/borg/testsuite/archiver/prune_cmd_test.py +++ b/src/borg/testsuite/archiver/prune_cmd_test.py @@ -117,6 +117,67 @@ def test_prune_repository_example(archivers, request, backup_files): assert "test%02d" % i not in output +# This test must match docs/misc/prune-example-interval.txt +def test_prune_repository_example_interval(archivers, request, backup_files): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + + # All timestamps are at exactly 16:00 UTC. + # This models the example: backups on most days from 2025-11-15 to + # 2026-06-04, with skips on 2026-03-31 and 2026-06-03. Of these only + # 2026-05-28 should be pruned after today's pruning. + archive_dates = [ + (2025, 11, 15), + (2025, 12, 31), + (2026, 1, 31), + (2026, 2, 28), + (2026, 3, 30), + (2026, 4, 30), + (2026, 5, 28), + (2026, 5, 29), + (2026, 5, 30), + (2026, 5, 31), + (2026, 6, 1), + (2026, 6, 2), + (2026, 6, 4), + ] + + names = [f"backup_{y:04d}-{m:02d}-{d:02d}" for y, m, d in archive_dates] + for (y, m, d), name in zip(archive_dates, names): + _create_archive_ts(archiver, backup_files, name, y, m, d) + + output = cmd( + archiver, + "prune", + "--list", + "--dry-run", + "--since=2026-06-04T16:00:00+00:00", + "--keep-daily=1w", + "--keep-monthly=5m", + "--keep-yearly=2", + ) + + daily_kept = [ + "backup_2026-06-04", + "backup_2026-06-02", + "backup_2026-06-01", + "backup_2026-05-31", + "backup_2026-05-30", + "backup_2026-05-29", + ] + for i, name in enumerate(daily_kept, 1): + assert re.search(rf"Keeping archive \(rule: daily #{i}\):\s+{name}", output) + + monthly_kept = ["backup_2026-04-30", "backup_2026-03-30", "backup_2026-02-28", "backup_2026-01-31"] + for i, name in enumerate(monthly_kept, 1): + assert re.search(rf"Keeping archive \(rule: monthly #{i}\):\s+{name}", output) + + assert re.search(r"Keeping archive \(rule: yearly #1\):\s+backup_2025-12-31", output) + assert re.search(r"Keeping archive \(rule: yearly\[oldest\] #2\):\s+backup_2025-11-15", output) + + assert re.search(r"Would prune:\s+backup_2026-05-28", output) + + def test_prune_quarterly(archivers, request, backup_files): # Example worked through by hand when developing the quarterly # strategy, based on existing backups where the quarterly strategy From f7a848ed6f97064ac3d8e0ee8f3d40756a5942bb Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Sun, 31 May 2026 22:39:17 +0200 Subject: [PATCH 07/22] Rewords original prune example with more precise terminology --- docs/misc/prune-example.txt | 54 ++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/docs/misc/prune-example.txt b/docs/misc/prune-example.txt index 1e98c7b4e7..bdbe0fc594 100644 --- a/docs/misc/prune-example.txt +++ b/docs/misc/prune-example.txt @@ -1,18 +1,17 @@ borg prune visualized ===================== -Assume it is 2016-01-01. Today's backup has not yet been made. You have -created at least one backup on each day in 2015 except on 2015-12-19 (no +Assume it is 2016-01-01. Today's archive has not yet been made. You have +created at least one archive on each day in 2015 except on 2015-12-19 (no backup was made on that day), and you started backing up with Borg on 2015-01-01. This is what borg prune --keep-daily 14 --keep-monthly 6 --keep-yearly 1 would keep. -Backups kept by the --keep-daily rule are marked by a "d" to the right, -backups kept by the --keep-monthly rule are marked by a "m" to the right, -and backups kept by the --keep-yearly rule are marked by a "y" to the -right. +Archives kept by the --keep-daily rule are marked by a "d" to the right, +archives kept by the --keep-monthly rule are marked by a "m" to the right, and +archives kept by the --keep-yearly rule are marked by a "y" to the right. Calendar view ------------- @@ -71,7 +70,7 @@ List view 10. 2015-12-22 11. 2015-12-21 12. 2015-12-20 - (no backup made on 2015-12-19) + (no archive made on 2015-12-19) 13. 2015-12-18 14. 2015-12-17 @@ -85,20 +84,19 @@ first), not due to the --keep-monthly or --keep-yearly rule. Rules are applied in the order given: archives already kept by an earlier rule are excluded from consideration by later rules. -The --keep-yearly 1 rule does not consider the December 31st backup because it -has already been kept due to the daily rule. There are no backups from -previous years, so there are no "true" yearly candidates. The oldest archive -at 2015-01-01 fills the remaining slot and will be kept until the rolling -backup scheme reaches "steady state" (when all retention rules are fully -satisfied). +The --keep-yearly 1 rule does not consider the December 31st archive because it +has already been kept due to the daily rule. There are no archives from +previous years, so there are no "true" yearly candidates. The oldest archive at +2015-01-01 fills the remaining slot and will be kept until the rolling backup +scheme reaches "steady state" (when all retention rules are fully satisfied). The --keep-monthly 6 rule keeps Nov, Oct, Sep, Aug, Jul and Jun. December is -not considered for this rule, because that backup was already kept because of +not considered for this rule, because that archive was already kept because of the daily rule. -2015-12-17 is kept to satisfy the --keep-daily 14 rule, because no backup was -made on 2015-12-19. If a backup had been made on that day, it would not keep -the one from 2015-12-17. +2015-12-17 is kept to satisfy the --keep-daily 14 rule, because no archive was +made on 2015-12-19. If an archive had been made on that day, it would not keep +the archive from 2015-12-17. We did not include weekly, hourly, minutely, or secondly rules to keep this example simple. They all work in basically the same way. @@ -106,26 +104,26 @@ example simple. They all work in basically the same way. The weekly rule is easy to understand roughly, but hard to understand in all details. If you are interested, read "ISO 8601:2000 standard week-based year". -The 13weekly and 3monthly rules are two different strategies for keeping one backup -every quarter of a year. There are `multiple ways` to define a quarter-year; -borg prune recognizes two: +The 13weekly and 3monthly rules are two different strategies for keeping one +archive every quarter of a year. There are `multiple ways` to define a +quarter-year; borg prune recognizes two: -* --keep-13weekly keeps one backup every 13 weeks using ISO 8601:2000's +* --keep-13weekly keeps one archive every 13 weeks using ISO 8601:2000's definition of the week-based year. January 4th is always included in the first week of a year, and January 1st to 3rd may be in week 52 or 53 of the previous year. Week 53 is also in the fourth quarter of the year. -* --keep-3monthly keeps one backup every 3 months. January 1st to - March 31, April 1st to June 30th, July 1st to September 30th, and October 1st - to December 31st form the quarters. +* --keep-3monthly keeps one archive every 3 months. January 1st to March 31, + April 1st to June 30th, July 1st to September 30th, and October 1st to + December 31st form the quarters. If the subtleties of the definition of a quarter-year don't matter to you, a short summary of behavior is: -* --keep-13weekly favors keeping backups at the beginning of Jan, Apr, Jul, +* --keep-13weekly favors keeping archives at the beginning of Jan, Apr, Jul, and Oct. -* --keep-3monthly favors keeping backups at the end of Dec, Mar, Jun, and Sep. -* Both strategies will have some overlap in which backups are kept. -* The differences are negligible unless backups considered for deletion were +* --keep-3monthly favors keeping archives at the end of Dec, Mar, Jun, and Sep. +* Both strategies will have some overlap in which archives are kept. +* The differences are negligible unless archives considered for deletion were created weekly or more frequently. .. _multiple ways: https://en.wikipedia.org/wiki/Calendar_year#Quarter_year From 794862544203b4d7ee30302667268578bbc8732a Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Sun, 31 May 2026 23:04:39 +0200 Subject: [PATCH 08/22] Fixes prune error tests with binary borg --- src/borg/testsuite/archiver/prune_cmd_test.py | 32 +++++++++++-------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/src/borg/testsuite/archiver/prune_cmd_test.py b/src/borg/testsuite/archiver/prune_cmd_test.py index cd49159bfe..ca6a91a015 100644 --- a/src/borg/testsuite/archiver/prune_cmd_test.py +++ b/src/borg/testsuite/archiver/prune_cmd_test.py @@ -721,12 +721,19 @@ def test_prune_keep_yearly_int_or_interval(archivers, request, backup_files, kee assert re.search(r"Would prune:\s+test-5", output) +def _cmd_prune_error(archiver, *args): + """Run prune expecting a CommandError. Returns error string for assertions.""" + if archiver.FORK_DEFAULT: + return cmd(archiver, "prune", *args, exit_code=CommandError().exit_code) + with pytest.raises(CommandError) as error: + cmd(archiver, "prune", *args) + return str(error.value) + + def test_prune_no_args(archivers, request): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) - with pytest.raises(CommandError) as error: - cmd(archiver, "prune") - output = str(error.value) + output = _cmd_prune_error(archiver) assert re.search(r"At least one of the .* settings must be specified.", output) assert re.search(r"keep(?!-)", output) flags = [ @@ -749,17 +756,15 @@ def test_prune_no_args(archivers, request): def test_prune_errors_on_keep_and_last(archivers, request): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) - with pytest.raises(CommandError) as error: - cmd(archiver, "prune", "--dry-run", "--keep-last=5", "--keep=3") - assert 'Only one of the "keep" and "last" settings may be specified.' in str(error.value) + output = _cmd_prune_error(archiver, "--dry-run", "--keep-last=5", "--keep=3") + assert 'Only one of the "keep" and "last" settings may be specified.' in output def test_prune_errors_on_keep_and_within(archivers, request): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) - with pytest.raises(CommandError) as error: - cmd(archiver, "prune", "--dry-run", "--keep-within=7d", "--keep=3") - assert 'Only one of the "keep" and "within" settings may be specified.' in str(error.value) + output = _cmd_prune_error(archiver, "--dry-run", "--keep-within=7d", "--keep=3") + assert 'Only one of the "keep" and "within" settings may be specified.' in output @pytest.mark.parametrize("keep_arg,value", product([rule.key for rule in PRUNING_RULES], ["0", "0S"])) @@ -787,11 +792,10 @@ def test_prune_all_zero_multiple_multiple(archivers, request): def test_prune_warns_on_redundant_interval_flags(archivers, request, lo_val, hi_val): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) - with pytest.raises(CommandError) as error: - cmd(archiver, "prune", "--dry-run", f"--keep-hourly={lo_val}", f"--keep-daily={hi_val}") - assert "hourly=" in str(error.value) - assert "daily=" in str(error.value) - assert "effectively useless" in str(error.value) + output = _cmd_prune_error(archiver, "--dry-run", f"--keep-hourly={lo_val}", f"--keep-daily={hi_val}") + assert "hourly=" in output + assert "daily=" in output + assert "effectively useless" in output @pytest.mark.parametrize("lo_val,hi_val", [("7d", "14d"), ("7d", "-1"), ("1", "-1"), ("7d", "all"), ("1", "all")]) From 4382b3a8f28de5279f8bdcde6df3c97c968a6e4b Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Sun, 31 May 2026 23:33:58 +0200 Subject: [PATCH 09/22] Updates usage docs for prune and new int/interval handling --- docs/usage/general/date-time.rst.inc | 6 ++++++ docs/usage/prune.rst | 25 ++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/docs/usage/general/date-time.rst.inc b/docs/usage/general/date-time.rst.inc index edd97c6a64..5738732e92 100644 --- a/docs/usage/general/date-time.rst.inc +++ b/docs/usage/general/date-time.rst.inc @@ -16,3 +16,9 @@ Some options accept a TIMESPAN parameter, which can be given as a number of years (e.g. ``2y``), months (e.g. ``12m``), weeks (e.g. ``2w``), days (e.g. ``7d``), hours (e.g. ``8H``), minutes (e.g. ``30M``), or seconds (e.g. ``150S``). + +The ``borg prune`` ``--keep-*`` retention options accept either a plain count +(e.g. ``--keep-daily 7``, keeping up to 7 daily archives) or a time interval +(e.g. ``--keep-daily 7d``, keeping one daily archive per day within a 7-day window). +When using interval-based retention, ``--since`` may be specified to set the +reference timestamp for the interval (defaults to the current time). diff --git a/docs/usage/prune.rst b/docs/usage/prune.rst index 74aeed49eb..5572ef922e 100644 --- a/docs/usage/prune.rst +++ b/docs/usage/prune.rst @@ -23,6 +23,13 @@ first, so you will see what it would do without it actually doing anything. Do not forget to run ``borg compact -v`` after prune to actually free disk space. +The ``--keep-*`` options accept either a **count** (e.g. ``--keep-daily 7``) or +a **time interval** (e.g. ``--keep-daily 7d``). A count keeps up to *N* archives +per period (e.g. the last 7 daily archives), while an interval keeps one +archive per period within that time span (e.g. one daily archive per day in the +last 7-day window). When using intervals, you may also specify ``--since`` to +set the reference timestamp for interval calculation. + :: # Keep 7 end of day and 4 additional end of week archives. @@ -44,8 +51,24 @@ Do not forget to run ``borg compact -v`` after prune to actually free disk space # and an end of month archive for every month: $ borg prune -v --list --keep-within=10d --keep-weekly=4 --keep-monthly=-1 -There is also a visualized prune example in ``docs/misc/prune-example.txt``: + # Keep daily archives from the last 7 days: + $ borg prune -v --list --dry-run --keep-daily=7d + + # Same as above, but with a fixed reference timestamp: + $ borg prune -v --list --dry-run --since 2025-12-01T00:00:00+02:00 --keep-daily=7d + + # Keep the last 14 archives using `--keep` (same as `--keep-last 14`): + $ borg prune -v --list --dry-run --keep 14 + + # Keep all archives from the last 30 days using `--keep` (same as `--keep-within 30d`): + $ borg prune -v --list --dry-run --keep 30d + +There are also visualized prune examples in ``docs/misc/prune-example.txt`` and +``docs/misc/prune-example-interval.txt``: .. highlight:: none .. include:: ../misc/prune-example.txt :literal: + +.. include:: ../misc/prune-example-interval.txt + :literal: From d9f9f1dd45f9e83378444c31d965ba099f277203 Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Mon, 1 Jun 2026 00:39:22 +0200 Subject: [PATCH 10/22] Updates prune parser epilog to match new int/interval behavior Along with reordering and improvements. --- docs/usage/general/date-time.rst.inc | 10 +-- src/borg/archiver/prune_cmd.py | 95 +++++++++++++++++++++------- 2 files changed, 77 insertions(+), 28 deletions(-) diff --git a/docs/usage/general/date-time.rst.inc b/docs/usage/general/date-time.rst.inc index 5738732e92..5496824a6e 100644 --- a/docs/usage/general/date-time.rst.inc +++ b/docs/usage/general/date-time.rst.inc @@ -10,12 +10,12 @@ Unless otherwise noted, we display local date and time. Internally, we store and process date and time as UTC. -.. rubric:: TIMESPAN +.. rubric:: TIMESPAN / INTERVAL -Some options accept a TIMESPAN parameter, which can be given as a number of -years (e.g. ``2y``), months (e.g. ``12m``), weeks (e.g. ``2w``), -days (e.g. ``7d``), hours (e.g. ``8H``), minutes (e.g. ``30M``), -or seconds (e.g. ``150S``). +Some options accept a TIMESPAN or an INTERVAL parameter, which can be given as +a number of years (e.g. ``2y``), months (e.g. ``12m``), weeks (e.g. ``2w``), +days (e.g. ``7d``), hours (e.g. ``8H``), minutes (e.g. ``30M``), or seconds +(e.g. ``150S``). The ``borg prune`` ``--keep-*`` retention options accept either a plain count (e.g. ``--keep-daily 7``, keeping up to 7 daily archives) or a time interval diff --git a/src/borg/archiver/prune_cmd.py b/src/borg/archiver/prune_cmd.py index 70654f0784..d83484deec 100644 --- a/src/borg/archiver/prune_cmd.py +++ b/src/borg/archiver/prune_cmd.py @@ -377,20 +377,33 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser): from different machines) in one shared repository, use one prune call per series. - The ``--keep-within`` option takes an argument of the form "", - where char is "y", "m", "w", "d", "H", "M", or "S". For example, - ``--keep-within 2d`` means to keep all archives that were created within - the past 2 days. "1m" is taken to mean "31d". The archives kept with - this option do not count towards the totals specified by any other options. - - A good procedure is to thin out more and more the older your backups get. - As an example, ``--keep-daily 7`` means to keep the latest backup on each day, - up to 7 most recent days with backups (days without backups do not count). - The rules are applied from secondly to yearly, and backups selected by previous - rules do not count towards those of later rules. The time that each backup - starts is used for pruning purposes. Dates and times are interpreted in the local - timezone of the system where borg prune runs, and weeks go from Monday to Sunday. - Specifying a negative number of archives to keep means that there is no limit. + The ``--keep`` option is the simplest way to specify a basic retention + policy. It accepts a count or a time interval for retention (e.g. + ``10`` or ``7d``, ``4w``). With a count it keeps at most that many + recent archives; with an interval it keeps all archives created within + that time window. When ``--since`` is given together with an interval + retention, the interval is measured backwards from that timestamp + instead of from the current time. See ``Date and Time`` docs for exact + INTERVAL format. + + The ``--keep-last N`` and ``--keep-within INTERVAL`` options are + alternatives with equivalent functionality to ``--keep`` with a count + or interval respectively. ``--keep`` cannot be used together with + ``--keep-last`` or ``--keep-within``. + + The ``--keep-secondly``, ``--keep-minutely``, ``--keep-hourly``, + ``--keep-daily``, ``--keep-weekly``, ``--keep-monthly``, + ``--keep-13weekly``, ``--keep-3monthly``, and ``--keep-yearly`` options + specify time period retention policies. They accept either a count N for + retention or a time interval INTERVAL for retention, same as for ``--keep``. + With a retention count, they keep at most that many archives (one per + period, e.g. one per day or one per month until the retention count is + met). With a retention interval, they keep one archive per period + within that time span (e.g. at most one per day in a span of seven + days, even if some days had none) -- measured from ``--since`` if given, + otherwise from the current time. Specifying a count of ``-1`` (or the + word ``all``) means no limit. A zero count or zero-length interval + keeps nothing. The ``--since`` option restricts pruning to archives older than the given TIMESTAMP. Archives newer than this timestamp are kept unconditionally @@ -400,18 +413,54 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser): current time. Count-based retention does not count the unconditionally kept archives. - Borg will retain the oldest archive if any of the secondly, minutely, hourly, - daily, weekly, monthly, quarterly, or yearly rules was not otherwise able to - meet its retention target. This enables the first chronological archive to - continue aging until it is replaced by a newer archive that meets the retention - criteria. - The ``--keep-13weekly`` and ``--keep-3monthly`` rules are two different strategies for keeping archives every quarter year. - The ``--keep-last N`` option is doing the same as ``--keep-secondly N`` (and it will - keep the last N archives under the assumption that you do not create more than one - backup archive in the same second). + The oldest archive is kept as long as the coarsest retention rule + covers it -- ``--keep-yearly=3`` will keep the oldest archive if it + couldn't otherwise find three candidates, ``--keep-yearly=5y`` will + keep the oldest archive as long as it is within the 5y interval. This + is useful for rolling tiered backup schemes, where the earliest backup + in a retention window should survive until the next tier's interval + naturally replaces it. + + When using interval-based pruning with multiple ``--keep-*`` options, + the intervals must be specified in increasing length matching the + periods chosen. For example, ``--keep-daily 7d --keep-weekly 4w`` is + valid, but ``--keep-daily 30d --keep-weekly 7d`` is not, because the + weekly interval is already covered by the daily one and so the weekly + interval is effectively useless. An error is emitted upon running + ``borg prune`` if such a combination of flags is given. The order of + flags on the command line is not significant. + + + A practical approach for recurring backups is to use rules + with increasing coarseness so that most of recent history is kept and + older history gradually thins out with time. For example, + ``--keep-daily 7d --keep-weekly 4w --keep-monthly 6`` keeps an + archive per day for the past week, per week for the past month, and + one per month for six months after that. Combine this with ``--since`` + to align time windows to calendar boundaries rather than the exact + moment you run prune for more predictable behavior of coarser rules: + ``--keep-daily 7d --keep-weekly 4w --since $(date +%F)``. + + Count-based retention keeps archives less bound to time. For instance, + ``--keep-yearly 3`` retains 3 yearly archives however far back they + span and ``--keep-daily 20`` keeps 20 archives no matter if you missed + a week in between. This can be useful for less regular archive + creation, or if your use case does not map well to specific time + intervals, or if you simply prefer to think of archive retention in + numbers rather than intervals. + + For count-based retention, backups selected by more granular rules do + not count towards those of coarser rules. ``--keep 3 --keep-monthly 2`` + will first keep the 3 latest archives and then keep 2 monthly archives, + skipping ones that were already kept by ``--keep 3``. + + The time that each archive creation started is used to match archives + to pruning periods. Dates and times are interpreted in the local + timezone of your system. Weeks go from Monday to Sunday. + You can influence how the ``--list`` output is formatted by using the ``--short`` option (less wide output) or by giving a custom format using ``--format`` (see From 304a2fa5b9a9c902d3a7cb0f4b6f36a6b43e2a8b Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Sat, 6 Jun 2026 20:16:47 +0200 Subject: [PATCH 11/22] Removes `--keep-last` and `--keep-within`, superseded by `--keep` --- docs/usage/prune.rst | 6 +- src/borg/archiver/prune_cmd.py | 37 ++------- src/borg/testsuite/archiver/prune_cmd_test.py | 78 +++---------------- 3 files changed, 17 insertions(+), 104 deletions(-) diff --git a/docs/usage/prune.rst b/docs/usage/prune.rst index 5572ef922e..cedcc92a62 100644 --- a/docs/usage/prune.rst +++ b/docs/usage/prune.rst @@ -49,7 +49,7 @@ set the reference timestamp for interval calculation. # Keep all backups in the last 10 days, 4 additional end of week archives, # and an end of month archive for every month: - $ borg prune -v --list --keep-within=10d --keep-weekly=4 --keep-monthly=-1 + $ borg prune -v --list --keep=10d --keep-weekly=4 --keep-monthly=-1 # Keep daily archives from the last 7 days: $ borg prune -v --list --dry-run --keep-daily=7d @@ -57,10 +57,10 @@ set the reference timestamp for interval calculation. # Same as above, but with a fixed reference timestamp: $ borg prune -v --list --dry-run --since 2025-12-01T00:00:00+02:00 --keep-daily=7d - # Keep the last 14 archives using `--keep` (same as `--keep-last 14`): + # Keep the last 14 archives using `--keep`: $ borg prune -v --list --dry-run --keep 14 - # Keep all archives from the last 30 days using `--keep` (same as `--keep-within 30d`): + # Keep all archives from the last 30 days using `--keep`: $ borg prune -v --list --dry-run --keep 30d There are also visualized prune examples in ``docs/misc/prune-example.txt`` and diff --git a/src/borg/archiver/prune_cmd.py b/src/borg/archiver/prune_cmd.py index d83484deec..1eaee38b4a 100644 --- a/src/borg/archiver/prune_cmd.py +++ b/src/borg/archiver/prune_cmd.py @@ -8,7 +8,7 @@ from ._common import with_repository, Highlander from ..constants import * # NOQA from ..helpers import ArchiveFormatter, ProgressIndicatorPercent, CommandError, Error -from ..helpers import archivename_validator, interval, int_or_interval, sig_int, timestamp +from ..helpers import archivename_validator, int_or_interval, sig_int, timestamp from ..helpers import json_print, basic_json_data from ..helpers.argparsing import ArgumentParser from ..manifest import ArchiveInfo, Manifest @@ -94,8 +94,6 @@ def quarterly_3monthly_period_func(dt): # Each archive is considered for keeping -PRUNE_WITHIN = PruningRule("within", unique_period_func()) -PRUNE_LAST = PruningRule("last", unique_period_func()) PRUNE_KEEP = PruningRule("keep", unique_period_func()) # Last archive (by creation timestamp) within period group is considered for keeping PRUNE_SECONDLY = PruningRule("secondly", pattern_period_func("%Y-%m-%d %H:%M:%S")) @@ -112,8 +110,6 @@ def quarterly_3monthly_period_func(dt): PRUNE_SINCE = PruningRule("skip", unique_period_func()) PRUNING_RULES = [ - PRUNE_WITHIN, - PRUNE_LAST, PRUNE_KEEP, PRUNE_SECONDLY, PRUNE_MINUTELY, @@ -295,18 +291,11 @@ def _validate_prune_args(self, args): if len(keep_args) == 0: raise CommandError( - 'At least one of the "keep", "keep-within", "keep-last", ' - '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", ' - '"keep-weekly", "keep-monthly", "keep-13weekly", "keep-3monthly", ' - 'or "keep-yearly" settings must be specified.' + 'At least one of the "keep", "keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", ' + '"keep-weekly", "keep-monthly", "keep-13weekly", "keep-3monthly", or "keep-yearly" settings must be ' + "specified." ) - if PRUNE_KEEP.key in keep_args and PRUNE_LAST.key in keep_args: - raise CommandError('Only one of the "keep" and "last" settings may be specified.') - - if PRUNE_KEEP.key in keep_args and PRUNE_WITHIN.key in keep_args: - raise CommandError('Only one of the "keep" and "within" settings may be specified.') - if all(not bool(val) for val in keep_args.values()): raise CommandError( 'None of the "keep", "keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", "keep-weekly", ' @@ -320,7 +309,7 @@ def lo_hi_mismatch_errmsg(lo_arg, lo_val, hi_arg, hi_val): f"useless since every archive matched by {hi_arg} would have already been matched by {lo_arg}." ) - prune_keys = {rule.key for rule in PRUNING_RULES if rule != PRUNE_LAST} + prune_keys = {rule.key for rule in PRUNING_RULES} interval_args = [ (arg, val) for arg, val in keep_args.items() @@ -386,11 +375,6 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser): instead of from the current time. See ``Date and Time`` docs for exact INTERVAL format. - The ``--keep-last N`` and ``--keep-within INTERVAL`` options are - alternatives with equivalent functionality to ``--keep`` with a count - or interval respectively. ``--keep`` cannot be used together with - ``--keep-last`` or ``--keep-within``. - The ``--keep-secondly``, ``--keep-minutely``, ``--keep-hourly``, ``--keep-daily``, ``--keep-weekly``, ``--keep-monthly``, ``--keep-13weekly``, ``--keep-3monthly``, and ``--keep-yearly`` options @@ -505,17 +489,6 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser): action=Highlander, help="only consider archives older than this for pruning", ) - subparser.add_argument( - "--keep-within", - metavar="INTERVAL", - dest=PRUNE_WITHIN.key, - type=interval, - action=Highlander, - help="keep all archives within this time interval", - ) - subparser.add_argument( - "--keep-last", dest=PRUNE_LAST.key, type=int, action=Highlander, help="number of archives to keep" - ) subparser.add_argument( "--keep", dest=PRUNE_KEEP.key, diff --git a/src/borg/testsuite/archiver/prune_cmd_test.py b/src/borg/testsuite/archiver/prune_cmd_test.py index ca6a91a015..bd39cd0586 100644 --- a/src/borg/testsuite/archiver/prune_cmd_test.py +++ b/src/borg/testsuite/archiver/prune_cmd_test.py @@ -14,10 +14,9 @@ PRUNE_MONTHLY, PRUNE_SECONDLY, PRUNE_WEEKLY, - PRUNE_WITHIN, PRUNE_YEARLY, ) -from ...helpers import CommandError, interval +from ...helpers import CommandError from ...manifest import ArchiveInfo from . import cmd, RK_ENCRYPTION, generate_archiver_tests @@ -328,9 +327,9 @@ def test_prune_ignore_protected(archivers, request): cmd(archiver, "tag", "--set=@PROT", "archive1") # do not delete archive1! cmd(archiver, "create", "archive2", archiver.input_path) cmd(archiver, "create", "archive3", archiver.input_path) - output = cmd(archiver, "prune", "--list", "--keep-last=1", "--match-archives=sh:archive*") + output = cmd(archiver, "prune", "--list", "--keep=1", "--match-archives=sh:archive*") assert "archive1" not in output # @PROT archives are completely ignored. - assert re.search(r"Keeping archive \(rule: last #1\):\s+archive3", output) + assert re.search(r"Keeping archive \(rule: keep #1\):\s+archive3", output) assert re.search(r"Pruning archive \(.*?\):\s+archive2", output) output = cmd(archiver, "repo-list") assert "archive1" in output # @PROT protected archive1 from deletion @@ -349,39 +348,6 @@ def mock_archive(ts, id=None): return ArchiveInfo(name="", id=id, ts=ts.replace(tzinfo=timezone.utc), tags=(), host="", user="") -def test_prune_within(): - test_deltas = [ - timedelta(minutes=1), - timedelta(hours=1.5), - timedelta(hours=2.5), - timedelta(hours=3.5), - timedelta(hours=25), - timedelta(hours=49), - ] - now = datetime.now(timezone.utc) - test_dates = [now - d for d in test_deltas] - test_archives = [mock_archive(date) for date in test_dates] - - def dotest(within, indices): - keep = prune(test_archives, PRUNE_WITHIN, interval(within), now, False) - assert set(keep) == {test_archives[i] for i in indices} - assert all(keep[a].rule.key == "within" for a in keep) - - dotest("15S", []) - dotest("2M", [0]) - dotest("1H", [0]) - dotest("2H", [0, 1]) - dotest("3H", [0, 1, 2]) - dotest("24H", [0, 1, 2, 3]) - dotest("26H", [0, 1, 2, 3, 4]) - dotest("2d", [0, 1, 2, 3, 4]) - dotest("50H", [0, 1, 2, 3, 4, 5]) - dotest("3d", [0, 1, 2, 3, 4, 5]) - dotest("1w", [0, 1, 2, 3, 4, 5]) - dotest("1m", [0, 1, 2, 3, 4, 5]) - dotest("1y", [0, 1, 2, 3, 4, 5]) - - @pytest.mark.parametrize( "rule,num_to_keep,expected_indices", [ @@ -519,16 +485,16 @@ def test_prune_json_list_pruned(archivers, request, backup_files): assert archives[0]["deleted_archive_number"] == 1 -def test_prune_keep_last_same_second(archivers, request, backup_files): +def test_prune_keep_same_second(archivers, request, backup_files): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) cmd(archiver, "create", "test1", backup_files) cmd(archiver, "create", "test2", backup_files) - output = cmd(archiver, "prune", "--list", "--dry-run", "--keep-last=2") + output = cmd(archiver, "prune", "--list", "--dry-run", "--keep=2") # Both archives are kept even though they have the same timestamp to the second. Would previously have failed with # old behavior of --keep-last. Archives sorted on seconds, order is undefined. - assert re.search(r"Keeping archive \(rule: last #\d\):\s+test1", output) - assert re.search(r"Keeping archive \(rule: last #\d\):\s+test2", output) + assert re.search(r"Keeping archive \(rule: keep #\d\):\s+test1", output) + assert re.search(r"Keeping archive \(rule: keep #\d\):\s+test2", output) @pytest.mark.parametrize("keep_arg", ["--keep=2", "--keep=1S"]) @@ -734,39 +700,14 @@ def test_prune_no_args(archivers, request): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) output = _cmd_prune_error(archiver) + assert re.search(r"At least one of the .* settings must be specified.", output) assert re.search(r"keep(?!-)", output) - flags = [ - "last", - "within", - "secondly", - "minutely", - "hourly", - "daily", - "weekly", - "monthly", - "yearly", - "13weekly", - "3monthly", - ] + flags = ["secondly", "minutely", "hourly", "daily", "weekly", "monthly", "yearly", "13weekly", "3monthly"] for flag in flags: assert f"keep-{flag}" in output -def test_prune_errors_on_keep_and_last(archivers, request): - archiver = request.getfixturevalue(archivers) - cmd(archiver, "repo-create", RK_ENCRYPTION) - output = _cmd_prune_error(archiver, "--dry-run", "--keep-last=5", "--keep=3") - assert 'Only one of the "keep" and "last" settings may be specified.' in output - - -def test_prune_errors_on_keep_and_within(archivers, request): - archiver = request.getfixturevalue(archivers) - cmd(archiver, "repo-create", RK_ENCRYPTION) - output = _cmd_prune_error(archiver, "--dry-run", "--keep-within=7d", "--keep=3") - assert 'Only one of the "keep" and "within" settings may be specified.' in output - - @pytest.mark.parametrize("keep_arg,value", product([rule.key for rule in PRUNING_RULES], ["0", "0S"])) def test_prune_all_zero_args_one(archivers, request, keep_arg, value): archiver = request.getfixturevalue(archivers) @@ -784,7 +725,6 @@ def test_prune_all_zero_multiple_multiple(archivers, request): output = _cmd_prune_error(archiver, "--keep-secondly=0S", "--keep-daily=0") assert re.search(r"None of the .* settings have a positive value. At least one must be non-zero.", output) - @pytest.mark.parametrize( "lo_val,hi_val", [("14d", "7d"), ("-1", "7d"), ("-1", "1"), ("-1", "-1"), ("all", "7d"), ("all", "1"), ("all", "-1")], From d9b35f2eeb2dd8c6c5bc8dffb66d039be6257207 Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Sat, 6 Jun 2026 23:55:00 +0200 Subject: [PATCH 12/22] Simplifies retention granularity ordering check --- src/borg/archiver/prune_cmd.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/src/borg/archiver/prune_cmd.py b/src/borg/archiver/prune_cmd.py index 1eaee38b4a..37a2cbcc8f 100644 --- a/src/borg/archiver/prune_cmd.py +++ b/src/borg/archiver/prune_cmd.py @@ -309,12 +309,7 @@ def lo_hi_mismatch_errmsg(lo_arg, lo_val, hi_arg, hi_val): f"useless since every archive matched by {hi_arg} would have already been matched by {lo_arg}." ) - prune_keys = {rule.key for rule in PRUNING_RULES} - interval_args = [ - (arg, val) - for arg, val in keep_args.items() - if arg in prune_keys and (isinstance(val, timedelta) or val == -1) - ] + interval_args = [(arg, val) for arg, val in keep_args.items() if isinstance(val, timedelta) or val == -1] for (lo_arg, lo_val), (hi_arg, hi_val) in combinations(interval_args, 2): if hi_val == -1: # 'Infinity' is always bigger @@ -323,11 +318,7 @@ def lo_hi_mismatch_errmsg(lo_arg, lo_val, hi_arg, hi_val): if lo_val == -1 or lo_val >= hi_val: raise CommandError(lo_hi_mismatch_errmsg(lo_arg, lo_val, hi_arg, hi_val)) - int_args = [ - (arg, val) - for arg, val in keep_args.items() - if any((arg == r.key for r in PRUNING_RULES)) and isinstance(val, int) - ] + int_args = [(arg, val) for arg, val in keep_args.items() if isinstance(val, int)] for (lo_arg, lo_val), (hi_arg, hi_val) in combinations(int_args, 2): if lo_val == -1: raise CommandError(lo_hi_mismatch_errmsg(lo_arg, lo_val, hi_arg, hi_val)) From 1073da9eb1c452587921fb2587c9be0a82b60b7e Mon Sep 17 00:00:00 2001 From: Daniel Rudolf Date: Thu, 11 Jun 2026 01:15:09 +0200 Subject: [PATCH 13/22] Refactor the interval-based pruning example Co-Authored-By: Hugo Wallenburg --- docs/misc/prune-example-interval.txt | 132 +++++++++++++----- docs/usage/general/date-time.rst.inc | 5 + src/borg/testsuite/archiver/prune_cmd_test.py | 1 + 3 files changed, 101 insertions(+), 37 deletions(-) diff --git a/docs/misc/prune-example-interval.txt b/docs/misc/prune-example-interval.txt index 53ce26cd51..f318e4f423 100644 --- a/docs/misc/prune-example-interval.txt +++ b/docs/misc/prune-example-interval.txt @@ -1,27 +1,54 @@ borg prune visualized (count and interval mixed) ================================================================================ -Assume it is 2026-06-04 16:00. You have been creating backup archives at 16:00 -on most days going back to late 2025, with pruning running immediately after -each archival. Todays archive has just been made and the following prune -operation is about to start. +Scenario: You use borg to perform daily backups. As backups age, the day-to-day +changes become less important, so to save storage space you want older archives +to "thin out" over time while retaining most recent archives. Your backup +script runs `borg create`, immediately followed by `borg prune`. -This example shows what would be kept/pruned when running the following prune -command. Note the yearly rule keeping _any two_ yearly archives. +Assume today is 2026-06-04 and you always start your backups at 16:00. You have +been creating backup archives starting at 16:00 on most days going back to late +2025. Today, `borg create` took a little longer than usual. It's 16:12 now and +you run `borg prune`. +You want Borg to keep one archive per day for one week, four weekly archives, +one archive per month for five months, and two yearly backups. For that, you +use the following command: + +``` borg prune \ - --since '2026-06-04 16:00' \ --keep-daily 1w \ + --keep-weekly 4 \ --keep-monthly 5m \ - --keep-yearly 2 + --keep-yearly 2 \ + --since '2026-06-04 16:00' +``` + +The `--keep-*` options reflect the intended retention policy exactly. Note the +different wording in the retention policy for weekly and yearly archives: They +aren't *interval*-based, but *count*-based. + +Another important detail here is `--since`. Without it, intervals would be +calculated relative to the actual start time of `prune` - in this case 16:12. +Since your backups are always created at 16:00, this 12-minute shift would move +the cutoff point of intervals and could cause archives near the boundary to +unexpectedly fall outside the expected time window. + +By specifying `--since '2026-06-04 16:00'`, all intervals are anchored to the +intended reference time (16:00), not the moment `prune` happens to run. This +ensures stable and predictable retention behavior, independent of when `prune` +actually runs. -Archives kept by the `--keep-daily` rule are marked by a "d", -archives kept by the `--keep-monthly` rule are marked by an "m", and -archives kept by the `--keep-yearly` rule are marked by a "y" to the -right. +The first archive was made on 2025-11-15. You missed the backups on 2026-03-31, +2026-05-24, and 2026-06-03. -The first archive was made on 2025-11-15. You missed the backups on 2026-03-31 -and 2026-06-03. +Below you find an overview of what archives `prune` will keep. + +Archives kept by the `--keep-daily` rule are marked by a "d" to the right, +archives kept by the `--keep-weekly` rule are marked by a "w" to the right, +archives kept by the `--keep-monthly` rule are marked by a "m" to the right, +archives kept by the `--keep-yearly` rule are marked by a "y" to the right, and +archives kept by the `--since` rule are marked by a "x" to the right. Calendar view @@ -32,7 +59,7 @@ Calendar view 8 9 10 11 12 13 14 15y16 15 16 17 18 19 20 21 17 18 19 20 21 22 23 22 23 24 25 26 27 28 - 24 25 26 27 28 29 30 29 30 31y + 24 25 26 27 28 29 30 29 30 31m 2026 January February March @@ -44,38 +71,69 @@ Calendar view 30m31 April May June - 1 2 3 4 5 1 2 3 1d 2d 3 4d - 6 7 8 9 10 11 12 4 5 6 7 8 9 10 -13 14 15 16 17 18 19 11 12 13 14 15 16 17 -20 21 22 23 24 25 26 18 19 20 21 22 23 24 -27 28 29 30m 25 26 27 28 29d30d31d + 1 2 3 4 5 1 2 3w 1d 2d 3 4x + 6 7 8 9 10 11 12 4 5 6 7 8 9 10w +13 14 15 16 17 18 19 11 12 13 14 15 16 17w +20 21 22 23 24 25 26 18 19 20 21 22 23w24 +27 28 29 30m 25 26 27 28d29d30d31d List view --------- ---keep-daily 1w --keep-monthly 5m --keep-yearly 2 --------------------------------------------------------------- -1. 2026-06-04 1. 2026-04-30 1. 2025-12-31 -2. 2026-06-02 2. 2026-03-30 2. 2025-11-15 (oldest) -3. 2026-06-01 3. 2026-02-28 -4. 2026-05-31 4. 2026-01-31 -5. 2026-05-30 -6. 2026-05-29 +--keep-daily 1w --keep-weekly 4 --keep-monthly 5m --keep-yearly 2 +-------------------------------------------------------------------------------- + 1. 2025-11-15 (oldest) +1. 2026-06-02 1. 2026-05-23 1. 2026-04-30 +2. 2026-06-01 2. 2026-05-17 2. 2026-03-30 +3. 2026-05-31 3. 2026-05-10 3. 2026-02-28 +4. 2026-05-30 4. 2026-05-03 4. 2026-01-31 +5. 2026-05-29 5. 2025-12-31 +6. 2026-05-28 + +2026-06-04 is additionally kept due to `--since`. Notes ----- -2026-06-03 was skipped, so no archive on that day. No compensation is made for -this, so the "daily" rule simply keeps one fewer archive. 2026-05-28 16:00 is -exactly one week before `--since` and so would be excluded and pruned in this -prune run. - -2026-03-31 was skipped, so 2026-03-30 is the monthly candidate for that month. -2025-12-31 16:00 is exactly 5 months (5 * 31 days) from today and so that day's -archive is no longer kept by the "monthly" rule but instead is now kept as the -first true yearly candidate. +The current day's archive is always kept, because `create` ran after the date +given with `--since`. For `prune`, it's as if this archive doesn't exist (yet). + +2026-06-03 was skipped, so no archive can be kept with `--keep-daily` for that +day. Other than with a *count*-based policy, no compensation is made for an +*interval* like `--keep-daily 1w`, so the rule simply keeps one archive fewer. + +2026-05-28 16:00 is exactly one week before `--since`. Since `create` always +runs after 16:00, the archive created on 2026-05-28 is kept, too. Without +`--since`, Borg would cut off at 2026-05-28 16:12 instead, which would likely +mean that the archive created on 2026-05-28 would be pruned. `--since` ensures +that 2026-05-28 is consistently kept. If you want it consistently pruned, try a +later reference time, e.g. `--since '2026-06-04 23:59:59'`. + +2026-05-31 is considered not only by `--keep-daily`, but by `--keep-weekly` +and `--keep-monthly`, too. The archive is effectively kept by `--keep-daily`, +but how this affects other rules differs between *count*- and *interval*-based +policies. For *interval*-based rules like `--keep-monthly 5m` it has no effect: +The rule simply keeps one archive fewer. + +For *count*-based rules like `--keep-weekly 4` it has an effect: The policy +tells Borg to keep 4 weekly archives, and if 2026-05-31 is kept by another +rule already, Borg compensates by keeping an older archive instead. +Consequently, Borg will also keep the 2026-05-03 archive. + +Since 2026-05-24 and 2026-03-31 were skipped, there are no perfect candidates +for that week and month. Borg chooses the next best candidate, so it keeps +2026-05-23 as the weekly and 2026-03-30 as the monthly candidate. + +The implementation of `--keep-monthly 5m` is somewhat special: Borg defines a +month as a fixed 31-day period, independent of the actual calendar dates +involved. As a result, `5m` corresponds to 5 × 31 = 155 days. The archive from +2025-12-31 16:00 is exactly 155 days older than the reference time and is +therefore retained by `--keep-monthly`. + +As a result, there are no true yearly candidates. In the absence of a better +candidate, `--keep-yearly 2` only matches the oldest archive, 2025-11-15. Since interval rules define time windows rather than competing for a fixed number of slots, their interplay is simpler than count-based rules. An archive diff --git a/docs/usage/general/date-time.rst.inc b/docs/usage/general/date-time.rst.inc index 5496824a6e..8a44ab6726 100644 --- a/docs/usage/general/date-time.rst.inc +++ b/docs/usage/general/date-time.rst.inc @@ -22,3 +22,8 @@ The ``borg prune`` ``--keep-*`` retention options accept either a plain count (e.g. ``--keep-daily 7d``, keeping one daily archive per day within a 7-day window). When using interval-based retention, ``--since`` may be specified to set the reference timestamp for the interval (defaults to the current time). + +Please note that Borg treats months (e.g. ``12m``) as fixed 31-day periods +rather than calendar months. As a result, ``12m`` corresponds to +12 × 31 = 372 days. Similarly, years (e.g. ``2y``) are treated as fixed +365-day periods and do not take leap years into account. diff --git a/src/borg/testsuite/archiver/prune_cmd_test.py b/src/borg/testsuite/archiver/prune_cmd_test.py index bd39cd0586..15c234eee9 100644 --- a/src/borg/testsuite/archiver/prune_cmd_test.py +++ b/src/borg/testsuite/archiver/prune_cmd_test.py @@ -725,6 +725,7 @@ def test_prune_all_zero_multiple_multiple(archivers, request): output = _cmd_prune_error(archiver, "--keep-secondly=0S", "--keep-daily=0") assert re.search(r"None of the .* settings have a positive value. At least one must be non-zero.", output) + @pytest.mark.parametrize( "lo_val,hi_val", [("14d", "7d"), ("-1", "7d"), ("-1", "1"), ("-1", "-1"), ("all", "7d"), ("all", "1"), ("all", "-1")], From 01aff2b7e2d759a543243f9d4eedd3a29661c160 Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Tue, 16 Jun 2026 21:42:24 +0200 Subject: [PATCH 14/22] Sets base_timestamp for prune only once --- src/borg/archiver/prune_cmd.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/borg/archiver/prune_cmd.py b/src/borg/archiver/prune_cmd.py index 37a2cbcc8f..9575991943 100644 --- a/src/borg/archiver/prune_cmd.py +++ b/src/borg/archiver/prune_cmd.py @@ -127,7 +127,7 @@ def prune( archives: list[ArchiveInfo], rule: PruningRule, n_or_interval: int | timedelta, - since_timestamp: datetime | None, + base_timestamp: datetime | None, keep_oldest: bool, previously_kept: frozenset[ArchiveInfo] = frozenset(), ) -> dict[ArchiveInfo, KeepResult]: @@ -137,9 +137,9 @@ def prune( if isinstance(n_or_interval, int): n, earliest_timestamp = n_or_interval, None else: - if since_timestamp is None: - raise ValueError("since_timestamp is required when using interval-based pruning") - n, earliest_timestamp = None, since_timestamp - n_or_interval + if base_timestamp is None: + raise ValueError("base_timestamp is required when using interval-based pruning") + n, earliest_timestamp = None, base_timestamp - n_or_interval keep: dict[ArchiveInfo, KeepResult] = {} @@ -185,6 +185,8 @@ def do_prune(self, args, repository, manifest): candidate_archives = archives if since is not None: + base_timestamp = since + # `--since` is a prefilter: Archives from after this time are kept by default. They are not considered for # pruning at all. They won't falsely occupy an active retention period. for archive in archives: @@ -192,6 +194,8 @@ def do_prune(self, args, repository, manifest): break keep[archive] = KeepResult(rule=PRUNE_SINCE, idx=len(keep)) candidate_archives = archives[len(keep) :] + else: + base_timestamp = datetime.now().astimezone() # Apply each retention rule to all candidate archives. The # `previously_kept` parameter prevents later (coarser-grained) rules @@ -204,7 +208,7 @@ def do_prune(self, args, repository, manifest): archives=candidate_archives, rule=rule, n_or_interval=n_or_interval, - since_timestamp=(since if since is not None else datetime.now().astimezone()), + base_timestamp=base_timestamp, keep_oldest=( rule == active_rules[-1][0] ), # Activate keep_oldest rule only for the largest active interval From 47d88682c158aff81eaeecdd54f26c3c19e458dc Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Tue, 16 Jun 2026 21:42:42 +0200 Subject: [PATCH 15/22] Fixes pytest warning for non-list in `parametrize` --- src/borg/testsuite/archiver/prune_cmd_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/borg/testsuite/archiver/prune_cmd_test.py b/src/borg/testsuite/archiver/prune_cmd_test.py index 15c234eee9..a5308bd621 100644 --- a/src/borg/testsuite/archiver/prune_cmd_test.py +++ b/src/borg/testsuite/archiver/prune_cmd_test.py @@ -708,7 +708,7 @@ def test_prune_no_args(archivers, request): assert f"keep-{flag}" in output -@pytest.mark.parametrize("keep_arg,value", product([rule.key for rule in PRUNING_RULES], ["0", "0S"])) +@pytest.mark.parametrize("keep_arg,value", list(product([rule.key for rule in PRUNING_RULES], ["0", "0S"]))) def test_prune_all_zero_args_one(archivers, request, keep_arg, value): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) From 79388320c2695585af50135e846d2d89f1dff551 Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Tue, 16 Jun 2026 22:20:53 +0200 Subject: [PATCH 16/22] Makes archive interval timestamp check inclusive --- src/borg/archiver/prune_cmd.py | 30 +++---- src/borg/testsuite/archiver/prune_cmd_test.py | 87 +++++++++++-------- 2 files changed, 65 insertions(+), 52 deletions(-) diff --git a/src/borg/archiver/prune_cmd.py b/src/borg/archiver/prune_cmd.py index 9575991943..e81b38d785 100644 --- a/src/borg/archiver/prune_cmd.py +++ b/src/borg/archiver/prune_cmd.py @@ -147,7 +147,7 @@ def can_retain(a): if n is not None: return n == -1 or len(keep) < n else: - return a.ts > earliest_timestamp + return a.ts >= earliest_timestamp prev_period = None for archive in archives: @@ -187,10 +187,10 @@ def do_prune(self, args, repository, manifest): if since is not None: base_timestamp = since - # `--since` is a prefilter: Archives from after this time are kept by default. They are not considered for - # pruning at all. They won't falsely occupy an active retention period. + # `--since` is a prefilter: Archives made at or after this time are kept by default. They are not considered + # for pruning at all and thus won't falsely occupy an active retention period. for archive in archives: - if archive.ts <= since: + if archive.ts < since: break keep[archive] = KeepResult(rule=PRUNE_SINCE, idx=len(keep)) candidate_archives = archives[len(keep) :] @@ -385,23 +385,21 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser): keeps nothing. The ``--since`` option restricts pruning to archives older than the given - TIMESTAMP. Archives newer than this timestamp are kept unconditionally + TIMESTAMP. Archives made at or after this timestamp are kept unconditionally as a pre-filter. When ``--since`` is used together with interval-based - ``--keep-*`` options (e.g. ``--keep-daily 7d``), the interval is - measured backwards from the given timestamp rather than from the - current time. Count-based retention does not count the unconditionally - kept archives. + ``--keep-*`` options (e.g. ``--keep-daily 7d``), the interval is measured + backwards from the given timestamp rather than from the current time. + Count-based retention does not count the unconditionally kept archives. The ``--keep-13weekly`` and ``--keep-3monthly`` rules are two different strategies for keeping archives every quarter year. - The oldest archive is kept as long as the coarsest retention rule - covers it -- ``--keep-yearly=3`` will keep the oldest archive if it - couldn't otherwise find three candidates, ``--keep-yearly=5y`` will - keep the oldest archive as long as it is within the 5y interval. This - is useful for rolling tiered backup schemes, where the earliest backup - in a retention window should survive until the next tier's interval - naturally replaces it. + The oldest archive is kept as long as the coarsest retention rule covers it -- + ``--keep-yearly=3`` will keep the oldest archive if it couldn't otherwise find + three candidates, ``--keep-yearly=5y`` will keep the oldest archive as long as + it is at or within the 5y interval. This is useful for rolling tiered backup + schemes, where the earliest backup in a retention window should survive until + the next tier's interval naturally replaces it. When using interval-based pruning with multiple ``--keep-*`` options, the intervals must be specified in increasing length matching the diff --git a/src/borg/testsuite/archiver/prune_cmd_test.py b/src/borg/testsuite/archiver/prune_cmd_test.py index a5308bd621..11251e94f5 100644 --- a/src/borg/testsuite/archiver/prune_cmd_test.py +++ b/src/borg/testsuite/archiver/prune_cmd_test.py @@ -121,10 +121,11 @@ def test_prune_repository_example_interval(archivers, request, backup_files): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) - # All timestamps are at exactly 16:00 UTC. - # This models the example: backups on most days from 2025-11-15 to - # 2026-06-04, with skips on 2026-03-31 and 2026-06-03. Of these only - # 2026-05-28 should be pruned after today's pruning. + # All timestamps are at exactly 16:00 UTC (matching the example). + # Backups on most days from 2025-11-15 to 2026-06-04, with skips on + # 2026-03-31 and 2026-06-03. At the inclusive interval boundaries, + # 2026-05-28 (1w before --since) and 2025-12-31 (155d before --since) + # are kept. archive_dates = [ (2025, 11, 15), (2025, 12, 31), @@ -143,7 +144,7 @@ def test_prune_repository_example_interval(archivers, request, backup_files): names = [f"backup_{y:04d}-{m:02d}-{d:02d}" for y, m, d in archive_dates] for (y, m, d), name in zip(archive_dates, names): - _create_archive_ts(archiver, backup_files, name, y, m, d) + _create_archive_ts(archiver, backup_files, name, y, m, d, H=16) output = cmd( archiver, @@ -156,25 +157,34 @@ def test_prune_repository_example_interval(archivers, request, backup_files): "--keep-yearly=2", ) + # 2026-06-04 is kept unconditionally by the --since prefilter. + assert re.search(r"Keeping archive \(rule: skip #1\):\s+backup_2026-06-04", output) + daily_kept = [ - "backup_2026-06-04", "backup_2026-06-02", "backup_2026-06-01", "backup_2026-05-31", "backup_2026-05-30", "backup_2026-05-29", + # 2026-05-28 is at the inclusive boundary (exactly 1w before --since). + "backup_2026-05-28", ] for i, name in enumerate(daily_kept, 1): assert re.search(rf"Keeping archive \(rule: daily #{i}\):\s+{name}", output) - monthly_kept = ["backup_2026-04-30", "backup_2026-03-30", "backup_2026-02-28", "backup_2026-01-31"] + monthly_kept = [ + "backup_2026-04-30", + "backup_2026-03-30", + "backup_2026-02-28", + "backup_2026-01-31", + # 2025-12-31 is at the inclusive boundary (exactly 155d before --since). + "backup_2025-12-31", + ] for i, name in enumerate(monthly_kept, 1): assert re.search(rf"Keeping archive \(rule: monthly #{i}\):\s+{name}", output) - assert re.search(r"Keeping archive \(rule: yearly #1\):\s+backup_2025-12-31", output) - assert re.search(r"Keeping archive \(rule: yearly\[oldest\] #2\):\s+backup_2025-11-15", output) - - assert re.search(r"Would prune:\s+backup_2026-05-28", output) + # No true yearly candidates remain; only the oldest archive fills the slot. + assert re.search(r"Keeping archive \(rule: yearly\[oldest\] #1\):\s+backup_2025-11-15", output) def test_prune_quarterly(archivers, request, backup_files): @@ -502,13 +512,18 @@ def test_prune_keep_int_or_interval(archivers, request, backup_files, keep_arg): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) dt = datetime(2023, 12, 31, 23, 59, 59, tzinfo=timezone.utc) + # Two archives at the --since boundary prove the inclusive timestamp check. _create_archive_dt(archiver, backup_files, "test-1", dt) - _create_archive_dt(archiver, backup_files, "test-2", dt) + _create_archive_dt( + archiver, backup_files, "test-2", dt - timedelta(microseconds=999999) + ) # Would be pruned if `secondly`-rule was active. _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(seconds=1)) + _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(seconds=1, microseconds=1)) output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) - assert re.search(r"Keeping archive \(rule: keep #\d\):\s+test-1", output) - assert re.search(r"Keeping archive \(rule: keep #\d\):\s+test-2", output) - assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: skip #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: keep #1\):\s+test-2", output) + assert re.search(r"Keeping archive \(rule: keep #2\):\s+test-3", output) + assert re.search(r"Would prune:\s+test-4", output) @pytest.mark.parametrize("keep_arg", ["--keep-daily=-1", "--keep-daily=all"]) @@ -549,8 +564,8 @@ def test_prune_keep_minutely_int_or_interval(archivers, request, backup_files, k _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(minutes=1)) _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(minutes=2)).replace(second=1)) _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(minutes=2)) - _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(minutes=3)).replace(microsecond=1)) - _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(minutes=3)) + _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(minutes=3)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(minutes=3, microseconds=1)) output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) assert re.search(r"Keeping archive \(rule: minutely #1\):\s+test-1", output) assert re.search(r"Keeping archive \(rule: minutely #2\):\s+test-2", output) @@ -567,8 +582,8 @@ def test_prune_keep_hourly_int_or_interval(archivers, request, backup_files, kee _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(hours=1)) _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(hours=2)).replace(microsecond=1)) _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(hours=2)) - _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(hours=3)).replace(microsecond=1)) - _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(hours=3)) + _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(hours=3)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(hours=3, microseconds=1)) output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) assert re.search(r"Keeping archive \(rule: hourly #1\):\s+test-1", output) assert re.search(r"Keeping archive \(rule: hourly #2\):\s+test-2", output) @@ -585,8 +600,8 @@ def test_prune_keep_daily_int_or_interval(archivers, request, backup_files, keep _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(days=1)) _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(days=2)).replace(second=1)) _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=2)) - _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(days=3)).replace(microsecond=1)) - _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=3)) + _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(days=3)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=3, microseconds=1)) output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) assert re.search(r"Keeping archive \(rule: daily #1\):\s+test-1", output) assert re.search(r"Keeping archive \(rule: daily #2\):\s+test-2", output) @@ -603,8 +618,8 @@ def test_prune_keep_weekly_int_or_interval(archivers, request, backup_files, kee _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(days=7)) _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(days=14)).replace(microsecond=1)) _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=14)) - _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(days=21)).replace(microsecond=1)) - _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=21)) + _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(days=21)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=21, microseconds=1)) output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) assert re.search(r"Keeping archive \(rule: weekly #1\):\s+test-1", output) assert re.search(r"Keeping archive \(rule: weekly #2\):\s+test-2", output) @@ -621,8 +636,8 @@ def test_prune_keep_monthly_int_or_interval(archivers, request, backup_files, ke _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(days=31)) _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(days=62)).replace(microsecond=1)) _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=62)) - _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(days=93)).replace(microsecond=1)) - _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=93)) + _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(days=93)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=93, microseconds=1)) output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) assert re.search(r"Keeping archive \(rule: monthly #1\):\s+test-1", output) assert re.search(r"Keeping archive \(rule: monthly #2\):\s+test-2", output) @@ -640,8 +655,8 @@ def test_prune_keep_13weekly_int_or_interval(archivers, request, backup_files, k _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(days=91)) _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(days=182)).replace(microsecond=1)) _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=182)) - _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(days=273)).replace(microsecond=1)) - _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=273)) + _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(days=273)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=273, microseconds=1)) output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) assert re.search(r"Keeping archive \(rule: quarterly_13weekly #1\):\s+test-1", output) assert re.search(r"Keeping archive \(rule: quarterly_13weekly #2\):\s+test-2", output) @@ -658,8 +673,8 @@ def test_prune_keep_3monthly_int_or_interval(archivers, request, backup_files, k _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(days=92)) _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(days=184)).replace(microsecond=1)) _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=184)) - _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(days=275)).replace(microsecond=1)) - _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=275)) + _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(days=275)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=275, microseconds=1)) # 275d is the interval from dt to the oldest kept monthly archive output = cmd(archiver, "prune", "--list", "--short", "--dry-run", "--since", dt.isoformat(), keep_arg) assert re.search(r"Keeping archive \(rule: quarterly_3monthly #1\):\s+test-1", output) @@ -677,8 +692,8 @@ def test_prune_keep_yearly_int_or_interval(archivers, request, backup_files, kee _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(days=365)) _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(days=730)).replace(microsecond=1)) _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=730)) - _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(days=1095)).replace(microsecond=1)) - _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=1095)) + _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(days=1095)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=1095, microseconds=1)) output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) assert re.search(r"Keeping archive \(rule: yearly #1\):\s+test-1", output) assert re.search(r"Keeping archive \(rule: yearly #2\):\s+test-2", output) @@ -801,7 +816,7 @@ def test_prune_interval_rolling_schedule_oldest_retention(): previous_archives = [] archives = [] - for day_offset in range(94): + for day_offset in range(95): backup_ts = start_date + timedelta(days=day_offset) previous_archives = archives archives = [mock_archive(backup_ts, day_offset), *archives] @@ -819,10 +834,10 @@ def test_prune_interval_rolling_schedule_oldest_retention(): archives = sorted(keep.keys(), key=lambda a: a.ts, reverse=True) - # It is now 2024-04-03. The last run should have just pruned the jan-01 - # archive since it now falls outside the retention range (_exactly_ 93 days - # or 3 months ago, timestamp compared exclusively). It was kept until now - # to satisfy the oldest-rule. + # It is now 2024-04-04. On this final run jan-01 falls outside the + # 93d monthly interval and is pruned. It was kept until now by the + # oldest-retention rule — on 2024-04-03 it was exactly at the + # inclusive boundary and retained as oldest. assert previous_archives[-1].ts.strftime("%m-%d") == "01-01" assert archives[-1].ts.strftime("%m-%d") == "01-31" From 790871ee59f182fd7a593f95408474ead12d7288 Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Sat, 20 Jun 2026 14:39:11 +0200 Subject: [PATCH 17/22] Comments on zfill While the exact stated reason doesn't currently apply in the code, I believe that any future work and debugging is aided by this ordering guarantee. --- src/borg/archiver/prune_cmd.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/borg/archiver/prune_cmd.py b/src/borg/archiver/prune_cmd.py index e81b38d785..1e371e04b2 100644 --- a/src/borg/archiver/prune_cmd.py +++ b/src/borg/archiver/prune_cmd.py @@ -64,6 +64,7 @@ def unique_period_func(): @archive_datetime_dispatch def unique_values(_dt): """Group archives by an incrementing counter, practically making each archive a group of 1""" + # zfill ensures lexicographic ordering matches number ordering in case of comparisons return str(next(counter)).zfill(max_digits) return unique_values From 378f7c27ccaa6791b0e44095905ddcede40b0140 Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Sat, 20 Jun 2026 15:22:54 +0200 Subject: [PATCH 18/22] Moves keep-all test Moves it out of the nicely organized increasing retention rule tests. --- src/borg/testsuite/archiver/prune_cmd_test.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/borg/testsuite/archiver/prune_cmd_test.py b/src/borg/testsuite/archiver/prune_cmd_test.py index 11251e94f5..23c12029d8 100644 --- a/src/borg/testsuite/archiver/prune_cmd_test.py +++ b/src/borg/testsuite/archiver/prune_cmd_test.py @@ -526,20 +526,6 @@ def test_prune_keep_int_or_interval(archivers, request, backup_files, keep_arg): assert re.search(r"Would prune:\s+test-4", output) -@pytest.mark.parametrize("keep_arg", ["--keep-daily=-1", "--keep-daily=all"]) -def test_prune_keep_all(archivers, request, backup_files, keep_arg): - archiver = request.getfixturevalue(archivers) - cmd(archiver, "repo-create", RK_ENCRYPTION) - dt = datetime(2023, 12, 31, 23, 59, 59, tzinfo=timezone.utc) - _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(days=1)) - _create_archive_dt(archiver, backup_files, "test-2", dt - timedelta(days=2)) - _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=3)) - output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) - assert re.search(r"Keeping archive \(rule: daily #1\):\s+test-1", output) - assert re.search(r"Keeping archive \(rule: daily #2\):\s+test-2", output) - assert re.search(r"Keeping archive \(rule: daily #3\):\s+test-3", output) - - @pytest.mark.parametrize("keep_arg", ["--keep-secondly=2", "--keep-secondly=2S"]) def test_prune_keep_secondly_int_or_interval(archivers, request, backup_files, keep_arg): archiver = request.getfixturevalue(archivers) @@ -702,6 +688,20 @@ def test_prune_keep_yearly_int_or_interval(archivers, request, backup_files, kee assert re.search(r"Would prune:\s+test-5", output) +@pytest.mark.parametrize("keep_arg", ["--keep-daily=-1", "--keep-daily=all"]) +def test_prune_keep_all(archivers, request, backup_files, keep_arg): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + dt = datetime(2023, 12, 31, 23, 59, 59, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(days=1)) + _create_archive_dt(archiver, backup_files, "test-2", dt - timedelta(days=2)) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=3)) + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Keeping archive \(rule: daily #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: daily #2\):\s+test-2", output) + assert re.search(r"Keeping archive \(rule: daily #3\):\s+test-3", output) + + def _cmd_prune_error(archiver, *args): """Run prune expecting a CommandError. Returns error string for assertions.""" if archiver.FORK_DEFAULT: From 89c6a2ccccacfe98ae61d3f5afdf09783b2c453b Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Sat, 20 Jun 2026 18:03:55 +0200 Subject: [PATCH 19/22] Improves interval example Makes specific notes on week numbers, clears some ambiguity in the yearly candidate wording, and more. The matching test was previously missing assertions on the weekly archives, now fixed along with better explanations. --- docs/misc/prune-example-interval.txt | 32 +++++++++++-------- src/borg/testsuite/archiver/prune_cmd_test.py | 28 +++++++++++++--- 2 files changed, 42 insertions(+), 18 deletions(-) diff --git a/docs/misc/prune-example-interval.txt b/docs/misc/prune-example-interval.txt index f318e4f423..0106a69f29 100644 --- a/docs/misc/prune-example-interval.txt +++ b/docs/misc/prune-example-interval.txt @@ -7,9 +7,9 @@ to "thin out" over time while retaining most recent archives. Your backup script runs `borg create`, immediately followed by `borg prune`. Assume today is 2026-06-04 and you always start your backups at 16:00. You have -been creating backup archives starting at 16:00 on most days going back to late -2025. Today, `borg create` took a little longer than usual. It's 16:12 now and -you run `borg prune`. +been creating backup archives starting at 16:00, followed by pruning, on most +days going back to late 2025. Today, `borg create` took a little longer than +usual. It's 16:12 now and you run `borg prune`. You want Borg to keep one archive per day for one week, four weekly archives, one archive per month for five months, and two yearly backups. For that, you @@ -115,16 +115,22 @@ later reference time, e.g. `--since '2026-06-04 23:59:59'`. and `--keep-monthly`, too. The archive is effectively kept by `--keep-daily`, but how this affects other rules differs between *count*- and *interval*-based policies. For *interval*-based rules like `--keep-monthly 5m` it has no effect: -The rule simply keeps one archive fewer. +The rule simply keeps one archive fewer in that case. For *count*-based rules like `--keep-weekly 4` it has an effect: The policy -tells Borg to keep 4 weekly archives, and if 2026-05-31 is kept by another -rule already, Borg compensates by keeping an older archive instead. -Consequently, Borg will also keep the 2026-05-03 archive. +tells Borg to keep 4 weekly archives. Weekly slots are identified by ISO week +number; in this example the four kept archives cover weeks 18 through 21. If +2026-05-31 is kept by another rule already, Borg compensates by keeping an +older archive instead. Consequently, Borg will also keep the 2026-05-03 archive. -Since 2026-05-24 and 2026-03-31 were skipped, there are no perfect candidates -for that week and month. Borg chooses the next best candidate, so it keeps -2026-05-23 as the weekly and 2026-03-30 as the monthly candidate. +The week 22 slot is consumed by that same 2026-05-31 archive. Any other +archive from that week is not kept by weekly, even if it falls within the +daily window boundary. This is why 2026-05-27 is pruned: it has just fallen +out of the daily window, and no other rule covers it. + +Since 2026-05-24 and 2026-03-31 were skipped, Borg substitutes the next best +candidate for each: 2026-05-23 fills the week 21 weekly slot, and 2026-03-30 +fills the March monthly slot. The implementation of `--keep-monthly 5m` is somewhat special: Borg defines a month as a fixed 31-day period, independent of the actual calendar dates @@ -144,6 +150,6 @@ Intervals and counts can be mixed freely. Yearly retention in this example is done by retention count instead of intervals. A count rule paired with interval rules behaves just as it would if all preceding rules were also counts: Archives already kept by earlier rules are excluded from consideration. In this -example there is only one "true" yearly candidate, so the oldest archive at -2025-11-15 is kept. This oldest archive will be kept until the rolling backup -scheme reaches "steady state" (when all retention rules are fully satisfied). +example, every yearly slot is consumed by earlier rules; no true yearly +candidate remains, so the oldest archive at 2025-11-15 is kept to preserve +rolling backup semantics. diff --git a/src/borg/testsuite/archiver/prune_cmd_test.py b/src/borg/testsuite/archiver/prune_cmd_test.py index 23c12029d8..6b1617f731 100644 --- a/src/borg/testsuite/archiver/prune_cmd_test.py +++ b/src/borg/testsuite/archiver/prune_cmd_test.py @@ -123,9 +123,7 @@ def test_prune_repository_example_interval(archivers, request, backup_files): # All timestamps are at exactly 16:00 UTC (matching the example). # Backups on most days from 2025-11-15 to 2026-06-04, with skips on - # 2026-03-31 and 2026-06-03. At the inclusive interval boundaries, - # 2026-05-28 (1w before --since) and 2025-12-31 (155d before --since) - # are kept. + # 2026-03-31, 2026-05-24, and 2026-06-03. archive_dates = [ (2025, 11, 15), (2025, 12, 31), @@ -133,6 +131,11 @@ def test_prune_repository_example_interval(archivers, request, backup_files): (2026, 2, 28), (2026, 3, 30), (2026, 4, 30), + (2026, 5, 3), + (2026, 5, 10), + (2026, 5, 17), + (2026, 5, 23), + (2026, 5, 27), (2026, 5, 28), (2026, 5, 29), (2026, 5, 30), @@ -153,6 +156,7 @@ def test_prune_repository_example_interval(archivers, request, backup_files): "--dry-run", "--since=2026-06-04T16:00:00+00:00", "--keep-daily=1w", + "--keep-weekly=4", "--keep-monthly=5m", "--keep-yearly=2", ) @@ -166,18 +170,27 @@ def test_prune_repository_example_interval(archivers, request, backup_files): "backup_2026-05-31", "backup_2026-05-30", "backup_2026-05-29", - # 2026-05-28 is at the inclusive boundary (exactly 1w before --since). "backup_2026-05-28", ] for i, name in enumerate(daily_kept, 1): assert re.search(rf"Keeping archive \(rule: daily #{i}\):\s+{name}", output) + # Weekly W22 slot is consumed by 05-31 (already kept by daily), + # so weekly reaches back to W18 to fill all 4 slots. + weekly_kept = [ + "backup_2026-05-23", # W21 — no Sunday candidate (05-24 skipped) + "backup_2026-05-17", # W20 + "backup_2026-05-10", # W19 + "backup_2026-05-03", # W18 + ] + for i, name in enumerate(weekly_kept, 1): + assert re.search(rf"Keeping archive \(rule: weekly #{i}\):\s+{name}", output) + monthly_kept = [ "backup_2026-04-30", "backup_2026-03-30", "backup_2026-02-28", "backup_2026-01-31", - # 2025-12-31 is at the inclusive boundary (exactly 155d before --since). "backup_2025-12-31", ] for i, name in enumerate(monthly_kept, 1): @@ -186,6 +199,11 @@ def test_prune_repository_example_interval(archivers, request, backup_files): # No true yearly candidates remain; only the oldest archive fills the slot. assert re.search(r"Keeping archive \(rule: yearly\[oldest\] #1\):\s+backup_2025-11-15", output) + # 05-27 was kept by daily yesterday (window started 05-26) but falls out + # today (window shifted to 05-28). W22 slot is consumed by daily-held 05-31, + # so weekly doesn't save it either. + assert re.search(r"Would prune:\s+backup_2026-05-27", output) + def test_prune_quarterly(archivers, request, backup_files): # Example worked through by hand when developing the quarterly From fe0d2d117ddd8b640ec949c5400a58a05e5f4ed9 Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Sat, 20 Jun 2026 18:35:28 +0200 Subject: [PATCH 20/22] Adds a specific keep=all test --- src/borg/testsuite/archiver/prune_cmd_test.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/borg/testsuite/archiver/prune_cmd_test.py b/src/borg/testsuite/archiver/prune_cmd_test.py index 6b1617f731..4d14f3cd45 100644 --- a/src/borg/testsuite/archiver/prune_cmd_test.py +++ b/src/borg/testsuite/archiver/prune_cmd_test.py @@ -707,7 +707,7 @@ def test_prune_keep_yearly_int_or_interval(archivers, request, backup_files, kee @pytest.mark.parametrize("keep_arg", ["--keep-daily=-1", "--keep-daily=all"]) -def test_prune_keep_all(archivers, request, backup_files, keep_arg): +def test_prune_keep_daily_all(archivers, request, backup_files, keep_arg): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) dt = datetime(2023, 12, 31, 23, 59, 59, tzinfo=timezone.utc) @@ -720,6 +720,23 @@ def test_prune_keep_all(archivers, request, backup_files, keep_arg): assert re.search(r"Keeping archive \(rule: daily #3\):\s+test-3", output) +@pytest.mark.parametrize("keep_arg", ["--keep=-1", "--keep=all"]) +def test_prune_keep_flat_all(archivers, request, backup_files, keep_arg): + """--keep=all / --keep=-1 keeps every archive.""" + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + dt = datetime(2023, 12, 31, 23, 59, 59, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(microseconds=1)) + _create_archive_dt(archiver, backup_files, "test-2", dt - timedelta(microseconds=2)) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=3)) + _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(days=33333)) + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Keeping archive \(rule: keep #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: keep #2\):\s+test-2", output) + assert re.search(r"Keeping archive \(rule: keep #3\):\s+test-3", output) + assert re.search(r"Keeping archive \(rule: keep #4\):\s+test-4", output) + + def _cmd_prune_error(archiver, *args): """Run prune expecting a CommandError. Returns error string for assertions.""" if archiver.FORK_DEFAULT: From c0a69611a85e30653e6c0022bd4b84460a3d80eb Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Sat, 20 Jun 2026 14:37:29 +0200 Subject: [PATCH 21/22] Renames --since to --from --- docs/misc/prune-example-interval.txt | 20 +++++------ docs/usage/general/date-time.rst.inc | 2 +- docs/usage/prune.rst | 4 +-- src/borg/archiver/prune_cmd.py | 32 ++++++++--------- src/borg/testsuite/archiver/prune_cmd_test.py | 34 +++++++++---------- 5 files changed, 46 insertions(+), 46 deletions(-) diff --git a/docs/misc/prune-example-interval.txt b/docs/misc/prune-example-interval.txt index 0106a69f29..e96e0a0baa 100644 --- a/docs/misc/prune-example-interval.txt +++ b/docs/misc/prune-example-interval.txt @@ -21,20 +21,20 @@ borg prune \ --keep-weekly 4 \ --keep-monthly 5m \ --keep-yearly 2 \ - --since '2026-06-04 16:00' + --from '2026-06-04 16:00' ``` The `--keep-*` options reflect the intended retention policy exactly. Note the different wording in the retention policy for weekly and yearly archives: They aren't *interval*-based, but *count*-based. -Another important detail here is `--since`. Without it, intervals would be +Another important detail here is `--from`. Without it, intervals would be calculated relative to the actual start time of `prune` - in this case 16:12. Since your backups are always created at 16:00, this 12-minute shift would move the cutoff point of intervals and could cause archives near the boundary to unexpectedly fall outside the expected time window. -By specifying `--since '2026-06-04 16:00'`, all intervals are anchored to the +By specifying `--from '2026-06-04 16:00'`, all intervals are anchored to the intended reference time (16:00), not the moment `prune` happens to run. This ensures stable and predictable retention behavior, independent of when `prune` actually runs. @@ -48,7 +48,7 @@ Archives kept by the `--keep-daily` rule are marked by a "d" to the right, archives kept by the `--keep-weekly` rule are marked by a "w" to the right, archives kept by the `--keep-monthly` rule are marked by a "m" to the right, archives kept by the `--keep-yearly` rule are marked by a "y" to the right, and -archives kept by the `--since` rule are marked by a "x" to the right. +archives kept by the `--from` rule are marked by a "x" to the right. Calendar view @@ -91,25 +91,25 @@ List view 5. 2026-05-29 5. 2025-12-31 6. 2026-05-28 -2026-06-04 is additionally kept due to `--since`. +2026-06-04 is additionally kept due to `--from`. Notes ----- The current day's archive is always kept, because `create` ran after the date -given with `--since`. For `prune`, it's as if this archive doesn't exist (yet). +given with `--from`. For `prune`, it's as if this archive doesn't exist (yet). 2026-06-03 was skipped, so no archive can be kept with `--keep-daily` for that day. Other than with a *count*-based policy, no compensation is made for an *interval* like `--keep-daily 1w`, so the rule simply keeps one archive fewer. -2026-05-28 16:00 is exactly one week before `--since`. Since `create` always +2026-05-28 16:00 is exactly one week before `--from`. Since `create` always runs after 16:00, the archive created on 2026-05-28 is kept, too. Without -`--since`, Borg would cut off at 2026-05-28 16:12 instead, which would likely -mean that the archive created on 2026-05-28 would be pruned. `--since` ensures +`--from`, Borg would cut off at 2026-05-28 16:12 instead, which would likely +mean that the archive created on 2026-05-28 would be pruned. `--from` ensures that 2026-05-28 is consistently kept. If you want it consistently pruned, try a -later reference time, e.g. `--since '2026-06-04 23:59:59'`. +later reference time, e.g. `--from '2026-06-04 23:59:59'`. 2026-05-31 is considered not only by `--keep-daily`, but by `--keep-weekly` and `--keep-monthly`, too. The archive is effectively kept by `--keep-daily`, diff --git a/docs/usage/general/date-time.rst.inc b/docs/usage/general/date-time.rst.inc index 8a44ab6726..db1fcca608 100644 --- a/docs/usage/general/date-time.rst.inc +++ b/docs/usage/general/date-time.rst.inc @@ -20,7 +20,7 @@ days (e.g. ``7d``), hours (e.g. ``8H``), minutes (e.g. ``30M``), or seconds The ``borg prune`` ``--keep-*`` retention options accept either a plain count (e.g. ``--keep-daily 7``, keeping up to 7 daily archives) or a time interval (e.g. ``--keep-daily 7d``, keeping one daily archive per day within a 7-day window). -When using interval-based retention, ``--since`` may be specified to set the +When using interval-based retention, ``--from`` may be specified to set the reference timestamp for the interval (defaults to the current time). Please note that Borg treats months (e.g. ``12m``) as fixed 31-day periods diff --git a/docs/usage/prune.rst b/docs/usage/prune.rst index cedcc92a62..9fecbef8d0 100644 --- a/docs/usage/prune.rst +++ b/docs/usage/prune.rst @@ -27,7 +27,7 @@ The ``--keep-*`` options accept either a **count** (e.g. ``--keep-daily 7``) or a **time interval** (e.g. ``--keep-daily 7d``). A count keeps up to *N* archives per period (e.g. the last 7 daily archives), while an interval keeps one archive per period within that time span (e.g. one daily archive per day in the -last 7-day window). When using intervals, you may also specify ``--since`` to +last 7-day window). When using intervals, you may also specify ``--from`` to set the reference timestamp for interval calculation. :: @@ -55,7 +55,7 @@ set the reference timestamp for interval calculation. $ borg prune -v --list --dry-run --keep-daily=7d # Same as above, but with a fixed reference timestamp: - $ borg prune -v --list --dry-run --since 2025-12-01T00:00:00+02:00 --keep-daily=7d + $ borg prune -v --list --dry-run --from 2025-12-01T00:00:00+02:00 --keep-daily=7d # Keep the last 14 archives using `--keep`: $ borg prune -v --list --dry-run --keep 14 diff --git a/src/borg/archiver/prune_cmd.py b/src/borg/archiver/prune_cmd.py index 1e371e04b2..1911b864a1 100644 --- a/src/borg/archiver/prune_cmd.py +++ b/src/borg/archiver/prune_cmd.py @@ -107,8 +107,8 @@ def quarterly_3monthly_period_func(dt): PRUNE_QUARTERLY_3MONTHLY = PruningRule("quarterly_3monthly", quarterly_3monthly_period_func) PRUNE_YEARLY = PruningRule("yearly", pattern_period_func("%Y")) -# Fake rule used to indicate archives skipped by --since -PRUNE_SINCE = PruningRule("skip", unique_period_func()) +# Fake rule used to indicate archives skipped by --from +PRUNE_FROM = PruningRule("skip", unique_period_func()) PRUNING_RULES = [ PRUNE_KEEP, @@ -182,18 +182,18 @@ def do_prune(self, args, repository, manifest): # Archives to keep along with the rule that ensured them being kept keep = {} - since = getattr(args, PRUNE_SINCE.key) + from_timestamp = getattr(args, PRUNE_FROM.key) candidate_archives = archives - if since is not None: - base_timestamp = since + if from_timestamp is not None: + base_timestamp = from_timestamp - # `--since` is a prefilter: Archives made at or after this time are kept by default. They are not considered + # `--from` is a prefilter: Archives made at or after this time are kept by default. They are not considered # for pruning at all and thus won't falsely occupy an active retention period. for archive in archives: - if archive.ts < since: + if archive.ts < from_timestamp: break - keep[archive] = KeepResult(rule=PRUNE_SINCE, idx=len(keep)) + keep[archive] = KeepResult(rule=PRUNE_FROM, idx=len(keep)) candidate_archives = archives[len(keep) :] else: base_timestamp = datetime.now().astimezone() @@ -366,7 +366,7 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser): policy. It accepts a count or a time interval for retention (e.g. ``10`` or ``7d``, ``4w``). With a count it keeps at most that many recent archives; with an interval it keeps all archives created within - that time window. When ``--since`` is given together with an interval + that time window. When ``--from`` is given together with an interval retention, the interval is measured backwards from that timestamp instead of from the current time. See ``Date and Time`` docs for exact INTERVAL format. @@ -380,14 +380,14 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser): period, e.g. one per day or one per month until the retention count is met). With a retention interval, they keep one archive per period within that time span (e.g. at most one per day in a span of seven - days, even if some days had none) -- measured from ``--since`` if given, + days, even if some days had none) -- measured from ``--from`` if given, otherwise from the current time. Specifying a count of ``-1`` (or the word ``all``) means no limit. A zero count or zero-length interval keeps nothing. - The ``--since`` option restricts pruning to archives older than the given + The ``--from`` option restricts pruning to archives older than the given TIMESTAMP. Archives made at or after this timestamp are kept unconditionally - as a pre-filter. When ``--since`` is used together with interval-based + as a pre-filter. When ``--from`` is used together with interval-based ``--keep-*`` options (e.g. ``--keep-daily 7d``), the interval is measured backwards from the given timestamp rather than from the current time. Count-based retention does not count the unconditionally kept archives. @@ -417,10 +417,10 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser): older history gradually thins out with time. For example, ``--keep-daily 7d --keep-weekly 4w --keep-monthly 6`` keeps an archive per day for the past week, per week for the past month, and - one per month for six months after that. Combine this with ``--since`` + one per month for six months after that. Combine this with ``--from`` to align time windows to calendar boundaries rather than the exact moment you run prune for more predictable behavior of coarser rules: - ``--keep-daily 7d --keep-weekly 4w --since $(date +%F)``. + ``--keep-daily 7d --keep-weekly 4w --from $(date +%F)``. Count-based retention keeps archives less bound to time. For instance, ``--keep-yearly 3`` retains 3 yearly archives however far back they @@ -476,9 +476,9 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser): "Some keys are always present. Note: JSON can only represent text.", ) subparser.add_argument( - "--since", + "--from", metavar="TIMESTAMP", - dest=PRUNE_SINCE.key, + dest=PRUNE_FROM.key, type=timestamp, action=Highlander, help="only consider archives older than this for pruning", diff --git a/src/borg/testsuite/archiver/prune_cmd_test.py b/src/borg/testsuite/archiver/prune_cmd_test.py index 4d14f3cd45..396a7f342f 100644 --- a/src/borg/testsuite/archiver/prune_cmd_test.py +++ b/src/borg/testsuite/archiver/prune_cmd_test.py @@ -154,14 +154,14 @@ def test_prune_repository_example_interval(archivers, request, backup_files): "prune", "--list", "--dry-run", - "--since=2026-06-04T16:00:00+00:00", + "--from=2026-06-04T16:00:00+00:00", "--keep-daily=1w", "--keep-weekly=4", "--keep-monthly=5m", "--keep-yearly=2", ) - # 2026-06-04 is kept unconditionally by the --since prefilter. + # 2026-06-04 is kept unconditionally by the --from prefilter. assert re.search(r"Keeping archive \(rule: skip #1\):\s+backup_2026-06-04", output) daily_kept = [ @@ -530,14 +530,14 @@ def test_prune_keep_int_or_interval(archivers, request, backup_files, keep_arg): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) dt = datetime(2023, 12, 31, 23, 59, 59, tzinfo=timezone.utc) - # Two archives at the --since boundary prove the inclusive timestamp check. + # Two archives at the --from since boundary prove the inclusive timestamp check. _create_archive_dt(archiver, backup_files, "test-1", dt) _create_archive_dt( archiver, backup_files, "test-2", dt - timedelta(microseconds=999999) ) # Would be pruned if `secondly`-rule was active. _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(seconds=1)) _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(seconds=1, microseconds=1)) - output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + output = cmd(archiver, "prune", "--list", "--dry-run", "--from", dt.isoformat(), keep_arg) assert re.search(r"Keeping archive \(rule: skip #1\):\s+test-1", output) assert re.search(r"Keeping archive \(rule: keep #1\):\s+test-2", output) assert re.search(r"Keeping archive \(rule: keep #2\):\s+test-3", output) @@ -553,7 +553,7 @@ def test_prune_keep_secondly_int_or_interval(archivers, request, backup_files, k _create_archive_dt(archiver, backup_files, "test-2", dt - timedelta(seconds=1, microseconds=999999)) _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(seconds=2)) _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(seconds=2, microseconds=1)) - output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + output = cmd(archiver, "prune", "--list", "--dry-run", "--from", dt.isoformat(), keep_arg) assert re.search(r"Keeping archive \(rule: secondly #1\):\s+test-1", output) assert re.search(r"Keeping archive \(rule: secondly #2\):\s+test-2", output) assert re.search(r"Would prune:\s+test-3", output) @@ -570,7 +570,7 @@ def test_prune_keep_minutely_int_or_interval(archivers, request, backup_files, k _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(minutes=2)) _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(minutes=3)) _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(minutes=3, microseconds=1)) - output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + output = cmd(archiver, "prune", "--list", "--dry-run", "--from", dt.isoformat(), keep_arg) assert re.search(r"Keeping archive \(rule: minutely #1\):\s+test-1", output) assert re.search(r"Keeping archive \(rule: minutely #2\):\s+test-2", output) assert re.search(r"Would prune:\s+test-3", output) @@ -588,7 +588,7 @@ def test_prune_keep_hourly_int_or_interval(archivers, request, backup_files, kee _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(hours=2)) _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(hours=3)) _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(hours=3, microseconds=1)) - output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + output = cmd(archiver, "prune", "--list", "--dry-run", "--from", dt.isoformat(), keep_arg) assert re.search(r"Keeping archive \(rule: hourly #1\):\s+test-1", output) assert re.search(r"Keeping archive \(rule: hourly #2\):\s+test-2", output) assert re.search(r"Would prune:\s+test-3", output) @@ -606,7 +606,7 @@ def test_prune_keep_daily_int_or_interval(archivers, request, backup_files, keep _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=2)) _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(days=3)) _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=3, microseconds=1)) - output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + output = cmd(archiver, "prune", "--list", "--dry-run", "--from", dt.isoformat(), keep_arg) assert re.search(r"Keeping archive \(rule: daily #1\):\s+test-1", output) assert re.search(r"Keeping archive \(rule: daily #2\):\s+test-2", output) assert re.search(r"Would prune:\s+test-3", output) @@ -624,7 +624,7 @@ def test_prune_keep_weekly_int_or_interval(archivers, request, backup_files, kee _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=14)) _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(days=21)) _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=21, microseconds=1)) - output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + output = cmd(archiver, "prune", "--list", "--dry-run", "--from", dt.isoformat(), keep_arg) assert re.search(r"Keeping archive \(rule: weekly #1\):\s+test-1", output) assert re.search(r"Keeping archive \(rule: weekly #2\):\s+test-2", output) assert re.search(r"Would prune:\s+test-3", output) @@ -642,7 +642,7 @@ def test_prune_keep_monthly_int_or_interval(archivers, request, backup_files, ke _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=62)) _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(days=93)) _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=93, microseconds=1)) - output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + output = cmd(archiver, "prune", "--list", "--dry-run", "--from", dt.isoformat(), keep_arg) assert re.search(r"Keeping archive \(rule: monthly #1\):\s+test-1", output) assert re.search(r"Keeping archive \(rule: monthly #2\):\s+test-2", output) assert re.search(r"Would prune:\s+test-3", output) @@ -661,7 +661,7 @@ def test_prune_keep_13weekly_int_or_interval(archivers, request, backup_files, k _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=182)) _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(days=273)) _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=273, microseconds=1)) - output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + output = cmd(archiver, "prune", "--list", "--dry-run", "--from", dt.isoformat(), keep_arg) assert re.search(r"Keeping archive \(rule: quarterly_13weekly #1\):\s+test-1", output) assert re.search(r"Keeping archive \(rule: quarterly_13weekly #2\):\s+test-2", output) assert re.search(r"Would prune:\s+test-3", output) @@ -680,7 +680,7 @@ def test_prune_keep_3monthly_int_or_interval(archivers, request, backup_files, k _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(days=275)) _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=275, microseconds=1)) # 275d is the interval from dt to the oldest kept monthly archive - output = cmd(archiver, "prune", "--list", "--short", "--dry-run", "--since", dt.isoformat(), keep_arg) + output = cmd(archiver, "prune", "--list", "--short", "--dry-run", "--from", dt.isoformat(), keep_arg) assert re.search(r"Keeping archive \(rule: quarterly_3monthly #1\):\s+test-1", output) assert re.search(r"Keeping archive \(rule: quarterly_3monthly #2\):\s+test-2", output) assert re.search(r"Would prune:\s+test-3", output) @@ -698,7 +698,7 @@ def test_prune_keep_yearly_int_or_interval(archivers, request, backup_files, kee _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=730)) _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(days=1095)) _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=1095, microseconds=1)) - output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + output = cmd(archiver, "prune", "--list", "--dry-run", "--from", dt.isoformat(), keep_arg) assert re.search(r"Keeping archive \(rule: yearly #1\):\s+test-1", output) assert re.search(r"Keeping archive \(rule: yearly #2\):\s+test-2", output) assert re.search(r"Would prune:\s+test-3", output) @@ -714,7 +714,7 @@ def test_prune_keep_daily_all(archivers, request, backup_files, keep_arg): _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(days=1)) _create_archive_dt(archiver, backup_files, "test-2", dt - timedelta(days=2)) _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=3)) - output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + output = cmd(archiver, "prune", "--list", "--dry-run", "--from", dt.isoformat(), keep_arg) assert re.search(r"Keeping archive \(rule: daily #1\):\s+test-1", output) assert re.search(r"Keeping archive \(rule: daily #2\):\s+test-2", output) assert re.search(r"Keeping archive \(rule: daily #3\):\s+test-3", output) @@ -730,7 +730,7 @@ def test_prune_keep_flat_all(archivers, request, backup_files, keep_arg): _create_archive_dt(archiver, backup_files, "test-2", dt - timedelta(microseconds=2)) _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=3)) _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(days=33333)) - output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + output = cmd(archiver, "prune", "--list", "--dry-run", "--from", dt.isoformat(), keep_arg) assert re.search(r"Keeping archive \(rule: keep #1\):\s+test-1", output) assert re.search(r"Keeping archive \(rule: keep #2\):\s+test-2", output) assert re.search(r"Keeping archive \(rule: keep #3\):\s+test-3", output) @@ -824,7 +824,7 @@ def test_prune_int_rolling_schedule_oldest_retention(): assert archives[-1].ts.strftime("%m-%d") == "01-31" -def test_prune_since_prefiltered_archives_ignored_in_pruning(archivers, request, backup_files): +def test_prune_from_prefiltered_archives_ignored_in_pruning(archivers, request, backup_files): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) dt = datetime(2024, 6, 6, 12, 0, 0, tzinfo=timezone.utc) @@ -833,7 +833,7 @@ def test_prune_since_prefiltered_archives_ignored_in_pruning(archivers, request, _create_archive_dt(archiver, backup_files, "test-b", dt - timedelta(hours=1)) _create_archive_dt(archiver, backup_files, "test-c", dt - timedelta(days=1)) - output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), "--keep-daily=1") + output = cmd(archiver, "prune", "--list", "--dry-run", "--from", dt.isoformat(), "--keep-daily=1") # 'test-b' is kept, meaning 'test-a' was entirely skipped for pruning consideration. # They would otherwise have occupied the same period. From ab2d78b84a34a7cba42a28fb7f4db3b80e2dadcc Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Sun, 21 Jun 2026 10:00:00 +0200 Subject: [PATCH 22/22] Tests keep-all with shorter archive span 33333 days in the past is before 1970 and may not be representable on some platforms. 3333 is just ~9 years. --- src/borg/testsuite/archiver/prune_cmd_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/borg/testsuite/archiver/prune_cmd_test.py b/src/borg/testsuite/archiver/prune_cmd_test.py index 396a7f342f..2ea57ef32f 100644 --- a/src/borg/testsuite/archiver/prune_cmd_test.py +++ b/src/borg/testsuite/archiver/prune_cmd_test.py @@ -729,7 +729,7 @@ def test_prune_keep_flat_all(archivers, request, backup_files, keep_arg): _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(microseconds=1)) _create_archive_dt(archiver, backup_files, "test-2", dt - timedelta(microseconds=2)) _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=3)) - _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(days=33333)) + _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(days=3333)) output = cmd(archiver, "prune", "--list", "--dry-run", "--from", dt.isoformat(), keep_arg) assert re.search(r"Keeping archive \(rule: keep #1\):\s+test-1", output) assert re.search(r"Keeping archive \(rule: keep #2\):\s+test-2", output)