From 76826d97eb07ddcc2d05959126e4e1c7c8ebe977 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Mon, 19 May 2025 09:50:02 +0300 Subject: [PATCH 01/22] Add auto-repair options for broken AOF tail on startup --- src/aof.c | 53 ++++++++++++++++++++++++------- src/config.c | 3 ++ src/server.h | 3 ++ tests/integration/aof.tcl | 65 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 113 insertions(+), 11 deletions(-) diff --git a/src/aof.c b/src/aof.c index 22c64d14b60..f6ee58d34d9 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1489,7 +1489,6 @@ int loadSingleAppendOnlyFile(char *filename) { off_t valid_before_multi = 0; /* Offset before MULTI command loaded. */ off_t last_progress_report_size = 0; int ret = AOF_OK; - sds aof_filepath = makePath(server.aof_dirname, filename); FILE *fp = fopen(aof_filepath, "r"); if (fp == NULL) { @@ -1658,7 +1657,7 @@ int loadSingleAppendOnlyFile(char *filename) { /* Clean up. Command code may have changed argv/argc so we use the * argv/argc of the client instead of the local variables. */ freeClientArgv(fakeClient); - if (server.aof_load_truncated) valid_up_to = ftello(fp); + if (server.aof_load_truncated || server.aof_load_broken ) valid_up_to = ftello(fp); if (server.key_load_delay) debugDelay(server.key_load_delay); } @@ -1690,23 +1689,23 @@ int loadSingleAppendOnlyFile(char *filename) { if (server.aof_load_truncated) { serverLog(LL_WARNING,"!!! Warning: short read while loading the AOF file %s!!!", filename); serverLog(LL_WARNING,"!!! Truncating the AOF %s at offset %llu !!!", - filename, (unsigned long long) valid_up_to); + filename, (unsigned long long) valid_up_to); if (valid_up_to == -1 || truncate(aof_filepath,valid_up_to) == -1) { if (valid_up_to == -1) { serverLog(LL_WARNING,"Last valid command offset is invalid"); } else { serverLog(LL_WARNING,"Error truncating the AOF file %s: %s", - filename, strerror(errno)); + filename, strerror(errno)); } } else { /* Make sure the AOF file descriptor points to the end of the - * file after the truncate call. */ + * file after the truncate call. */ if (server.aof_fd != -1 && lseek(server.aof_fd,0,SEEK_END) == -1) { serverLog(LL_WARNING,"Can't seek the end of the AOF file %s: %s", - filename, strerror(errno)); + filename, strerror(errno)); } else { serverLog(LL_WARNING, - "AOF %s loaded anyway because aof-load-truncated is enabled", filename); + "AOF %s loaded anyway because aof-load-truncated is enabled", filename); ret = AOF_TRUNCATED; goto loaded_ok; } @@ -1719,9 +1718,41 @@ int loadSingleAppendOnlyFile(char *filename) { goto cleanup; fmterr: /* Format error. */ - serverLog(LL_WARNING, "Bad file format reading the append only file %s: " - "make a backup of your AOF file, then use ./redis-check-aof --fix ", filename); + /* fmterr may be caused by accidentally machine shutdown, so if the broken tail is less than a specified size, + * try to recover it automatically */ + if (server.aof_load_broken) { + if (valid_up_to == -1) { + serverLog(LL_WARNING,"Last valid command offset is invalid"); + } else if (sb.st_size - valid_up_to < server.aof_load_broken_max_size) { + if (truncate(aof_filepath,valid_up_to) == -1) { + serverLog(LL_WARNING,"Error truncating the AOF file: %s", + strerror(errno)); + } else { + /* Make sure the AOF file descriptor points to the end of the + * file after the truncate call. */ + if (server.aof_fd != -1 && lseek(server.aof_fd,0,SEEK_END) == -1) { + serverLog(LL_WARNING,"Can't seek the end of the AOF file: %s", + strerror(errno)); + + } else { + serverLog(LL_WARNING, + "AOF loaded anyway because aof-load-broken is enabled and broken size '%ld' is less than aof-load-broken-max-size '%ld'", + sb.st_size - valid_up_to, server.aof_load_broken_max_size); + ret = AOF_BROKEN_RECOVERED; + goto loaded_ok; + } + } + }else { // The size of the corrupted portion exceeds the configured limit + serverLog(LL_WARNING, + "AOF did not loaded because the size of the corrupted portion exceeds the configured limit. aof-load-broken is enabled and broken size '%ld' is bigger than aof-load-broken-max-size '%ld'", + sb.st_size - valid_up_to, server.aof_load_broken_max_size); + } + }else { + serverLog(LL_WARNING, "Bad file format reading the append only file %s: " + "make a backup of your AOF file, then use ./redis-check-aof --fix ", filename); + } ret = AOF_FAILED; + /* fall through to cleanup. */ cleanup: @@ -1794,13 +1825,13 @@ int loadAppendOnlyFiles(aofManifest *am) { last_file = ++aof_num == total_num; start = ustime(); ret = loadSingleAppendOnlyFile(aof_name); - if (ret == AOF_OK || (ret == AOF_TRUNCATED && last_file)) { + if (ret == AOF_OK || ((ret == AOF_TRUNCATED || ret == AOF_BROKEN_RECOVERED) && last_file)){ serverLog(LL_NOTICE, "DB loaded from base file %s: %.3f seconds", aof_name, (float)(ustime()-start)/1000000); } /* If the truncated file is not the last file, we consider this to be a fatal error. */ - if (ret == AOF_TRUNCATED && !last_file) { + if ((ret == AOF_TRUNCATED || ret == AOF_BROKEN_RECOVERED) && !last_file) { ret = AOF_FAILED; serverLog(LL_WARNING, "Fatal error: the truncated file is not the last file"); } diff --git a/src/config.c b/src/config.c index 673cf60c162..8eaaa410d55 100644 --- a/src/config.c +++ b/src/config.c @@ -3090,6 +3090,7 @@ standardConfig static_configs[] = { createBoolConfig("cluster-require-full-coverage", NULL, MODIFIABLE_CONFIG, server.cluster_require_full_coverage, 1, NULL, NULL), createBoolConfig("rdb-save-incremental-fsync", NULL, MODIFIABLE_CONFIG, server.rdb_save_incremental_fsync, 1, NULL, NULL), createBoolConfig("aof-load-truncated", NULL, MODIFIABLE_CONFIG, server.aof_load_truncated, 1, NULL, NULL), + createBoolConfig("aof-load-broken", NULL, MODIFIABLE_CONFIG, server.aof_load_broken, 0, NULL, NULL), createBoolConfig("aof-use-rdb-preamble", NULL, MODIFIABLE_CONFIG, server.aof_use_rdb_preamble, 1, NULL, NULL), createBoolConfig("aof-timestamp-enabled", NULL, MODIFIABLE_CONFIG, server.aof_timestamp_enabled, 0, NULL, NULL), createBoolConfig("cluster-replica-no-failover", "cluster-slave-no-failover", MODIFIABLE_CONFIG, server.cluster_slave_no_failover, 0, NULL, updateClusterFlags), /* Failover by default. */ @@ -3249,6 +3250,8 @@ standardConfig static_configs[] = { createSizeTConfig("tracking-table-max-keys", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.tracking_table_max_keys, 1000000, INTEGER_CONFIG, NULL, NULL), /* Default: 1 million keys max. */ createSizeTConfig("client-query-buffer-limit", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, 1024*1024, LONG_MAX, server.client_max_querybuf_len, 1024*1024*1024, MEMORY_CONFIG, NULL, NULL), /* Default: 1GB max query buffer. */ createSSizeTConfig("maxmemory-clients", NULL, MODIFIABLE_CONFIG, -100, SSIZE_MAX, server.maxmemory_clients, 0, MEMORY_CONFIG | PERCENT_CONFIG, NULL, applyClientMaxMemoryUsage), + createSSizeTConfig("aof-load-broken-max-size", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.aof_load_broken_max_size, (4*1024), INTEGER_CONFIG, NULL, NULL), + /* Other configs */ createTimeTConfig("repl-backlog-ttl", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.repl_backlog_time_limit, 60*60, INTEGER_CONFIG, NULL, NULL), /* Default: 1 hour */ diff --git a/src/server.h b/src/server.h index 07b784d59e5..50be1b87219 100644 --- a/src/server.h +++ b/src/server.h @@ -345,6 +345,7 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT]; #define AOF_OPEN_ERR 3 #define AOF_FAILED 4 #define AOF_TRUNCATED 5 +#define AOF_BROKEN_RECOVERED 6 /* RDB return values for rdbLoad. */ #define RDB_OK 0 @@ -2006,6 +2007,8 @@ struct redisServer { int aof_last_write_status; /* C_OK or C_ERR */ int aof_last_write_errno; /* Valid if aof write/fsync status is ERR */ int aof_load_truncated; /* Don't stop on unexpected AOF EOF. */ + int aof_load_broken; /* Don't stop on bad fmt. */ + off_t aof_load_broken_max_size; /* the max size of broken AOF tail than can be ignored. */ int aof_use_rdb_preamble; /* Specify base AOF to use RDB encoding on AOF rewrites. */ redisAtomic int aof_bio_fsync_status; /* Status of AOF fsync in bio job. */ redisAtomic int aof_bio_fsync_errno; /* Errno of AOF fsync in bio job. */ diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index d4a556e3bc5..e8d65e9171d 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -701,4 +701,69 @@ tags {"aof external:skip"} { assert_equal {1} [r get t] } } + + + # Check AOF load broken behavior + start_server_aof [list dir $server_path aof-load-broken yes] { + test "Unfinished MULTI: Server should start if aof-load-broken is yes" { + assert_equal 1 [is_alive [srv pid]] + } + } + + # Should also start with broken AOF. + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand set foo 1] + append_to_aof [formatCommand incr foo] + append_to_aof [formatCommand incr foo] + append_to_aof [formatCommand incr foo] + append_to_aof [formatCommand incr foo] + append_to_aof "corruption" + } + + start_server_aof [list dir $server_path aof-load-broken yes] { + test "Short read: Server should start if aof-load-broken is yes" { + assert_equal 1 [is_alive [srv pid]] + } + + # The AOF file is expected to be correct because default value for aof-load-broken-max-size is 4096, + #so the AOF will reload without the corruption + test "Broken AOF loaded: we expect foo to be equal to 5" { + set client [redis [srv host] [srv port] 0 $::tls] + wait_done_loading $client + assert {[$client get foo] eq "5"} + } + + test "Append a new command after loading an incomplete AOF" { + $client incr foo + } + } + + start_server_aof [list dir $server_path aof-load-broken yes] { + test "Short read + command: Server should start" { + assert_equal 1 [is_alive [srv pid]] + } + + test "Broken AOF loaded: we expect foo to be equal to 6 now" { + set client [redis [srv host] [srv port] 0 $::tls] + wait_done_loading $client + assert {[$client get foo] eq "6"} + } + } + + # Test that the server exits when the AOF contains a format error + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand set foo hello] + append_to_aof [string range [formatCommand incr foo] 0 end-3] + append_to_aof "corruption" + } + + # We set the maximum allowed corrupted size to 2 bytes, but the actual corrupted portion is larger, + # so the AOF file will not be reloaded. + start_server_aof_ex [list dir $server_path aof-load-broken yes aof-load-broken-max-size 2] [list wait_ready false] { + test "Bad format: Server should have logged an error" { + + wait_for_log_messages 0 {"*AOF did not loaded because the size*"} 0 10 1000 + } + } } + From f9f91f4715dfa3b9c99596db278d09a8754c8b8f Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Mon, 19 May 2025 10:21:33 +0300 Subject: [PATCH 02/22] change aof-load-broken-max-size configuration --- src/config.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/config.c b/src/config.c index 8eaaa410d55..fa47e5608aa 100644 --- a/src/config.c +++ b/src/config.c @@ -3256,7 +3256,8 @@ standardConfig static_configs[] = { /* Other configs */ createTimeTConfig("repl-backlog-ttl", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.repl_backlog_time_limit, 60*60, INTEGER_CONFIG, NULL, NULL), /* Default: 1 hour */ createOffTConfig("auto-aof-rewrite-min-size", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, server.aof_rewrite_min_size, 64*1024*1024, MEMORY_CONFIG, NULL, NULL), - createOffTConfig("loading-process-events-interval-bytes", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 1024, INT_MAX, server.loading_process_events_interval_bytes, 1024*512, INTEGER_CONFIG, NULL, NULL), + createOffTConfig("loading-process-events-interval-bytes", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 1024, INT_MAX, server.loading_process_events_interval_bytes, 1024*1024*2, INTEGER_CONFIG, NULL, NULL), + createOffTConfig("aof-load-broken-max-size", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.aof_load_broken_max_size, (4*1024), INTEGER_CONFIG, NULL, NULL), createIntConfig("tls-port", NULL, MODIFIABLE_CONFIG, 0, 65535, server.tls_port, 0, INTEGER_CONFIG, NULL, applyTLSPort), /* TCP port. */ createIntConfig("tls-session-cache-size", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.tls_ctx_config.session_cache_size, 20*1024, INTEGER_CONFIG, NULL, applyTlsCfg), From f7854e1b8114600ed022c689cee7fa4c349f6cf0 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Mon, 19 May 2025 11:16:59 +0300 Subject: [PATCH 03/22] fix of-load-broken-max-size configuration --- src/config.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/config.c b/src/config.c index fa47e5608aa..238eb73a75c 100644 --- a/src/config.c +++ b/src/config.c @@ -3250,7 +3250,6 @@ standardConfig static_configs[] = { createSizeTConfig("tracking-table-max-keys", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.tracking_table_max_keys, 1000000, INTEGER_CONFIG, NULL, NULL), /* Default: 1 million keys max. */ createSizeTConfig("client-query-buffer-limit", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, 1024*1024, LONG_MAX, server.client_max_querybuf_len, 1024*1024*1024, MEMORY_CONFIG, NULL, NULL), /* Default: 1GB max query buffer. */ createSSizeTConfig("maxmemory-clients", NULL, MODIFIABLE_CONFIG, -100, SSIZE_MAX, server.maxmemory_clients, 0, MEMORY_CONFIG | PERCENT_CONFIG, NULL, applyClientMaxMemoryUsage), - createSSizeTConfig("aof-load-broken-max-size", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.aof_load_broken_max_size, (4*1024), INTEGER_CONFIG, NULL, NULL), /* Other configs */ From 948723791d1951f8a8f76593d0644850783f78f3 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Wed, 21 May 2025 12:05:15 +0300 Subject: [PATCH 04/22] fix unnecessary changes --- src/aof.c | 19 +++++++++---------- src/config.c | 1 - src/server.h | 2 +- tests/integration/aof.tcl | 2 +- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/aof.c b/src/aof.c index f6ee58d34d9..62ad539659d 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1489,6 +1489,7 @@ int loadSingleAppendOnlyFile(char *filename) { off_t valid_before_multi = 0; /* Offset before MULTI command loaded. */ off_t last_progress_report_size = 0; int ret = AOF_OK; + sds aof_filepath = makePath(server.aof_dirname, filename); FILE *fp = fopen(aof_filepath, "r"); if (fp == NULL) { @@ -1684,28 +1685,27 @@ int loadSingleAppendOnlyFile(char *filename) { ret = AOF_FAILED; goto cleanup; } - uxeof: /* Unexpected AOF end of file. */ if (server.aof_load_truncated) { serverLog(LL_WARNING,"!!! Warning: short read while loading the AOF file %s!!!", filename); serverLog(LL_WARNING,"!!! Truncating the AOF %s at offset %llu !!!", - filename, (unsigned long long) valid_up_to); + filename, (unsigned long long) valid_up_to); if (valid_up_to == -1 || truncate(aof_filepath,valid_up_to) == -1) { if (valid_up_to == -1) { serverLog(LL_WARNING,"Last valid command offset is invalid"); } else { serverLog(LL_WARNING,"Error truncating the AOF file %s: %s", - filename, strerror(errno)); + filename, strerror(errno)); } } else { /* Make sure the AOF file descriptor points to the end of the - * file after the truncate call. */ + * file after the truncate call. */ if (server.aof_fd != -1 && lseek(server.aof_fd,0,SEEK_END) == -1) { serverLog(LL_WARNING,"Can't seek the end of the AOF file %s: %s", - filename, strerror(errno)); + filename, strerror(errno)); } else { serverLog(LL_WARNING, - "AOF %s loaded anyway because aof-load-truncated is enabled", filename); + "AOF %s loaded anyway because aof-load-truncated is enabled", filename); ret = AOF_TRUNCATED; goto loaded_ok; } @@ -1733,7 +1733,6 @@ int loadSingleAppendOnlyFile(char *filename) { if (server.aof_fd != -1 && lseek(server.aof_fd,0,SEEK_END) == -1) { serverLog(LL_WARNING,"Can't seek the end of the AOF file: %s", strerror(errno)); - } else { serverLog(LL_WARNING, "AOF loaded anyway because aof-load-broken is enabled and broken size '%ld' is less than aof-load-broken-max-size '%ld'", @@ -1742,12 +1741,12 @@ int loadSingleAppendOnlyFile(char *filename) { goto loaded_ok; } } - }else { // The size of the corrupted portion exceeds the configured limit + } else { /* The size of the corrupted portion exceeds the configured limit. */ serverLog(LL_WARNING, "AOF did not loaded because the size of the corrupted portion exceeds the configured limit. aof-load-broken is enabled and broken size '%ld' is bigger than aof-load-broken-max-size '%ld'", sb.st_size - valid_up_to, server.aof_load_broken_max_size); } - }else { + } else { serverLog(LL_WARNING, "Bad file format reading the append only file %s: " "make a backup of your AOF file, then use ./redis-check-aof --fix ", filename); } @@ -1825,7 +1824,7 @@ int loadAppendOnlyFiles(aofManifest *am) { last_file = ++aof_num == total_num; start = ustime(); ret = loadSingleAppendOnlyFile(aof_name); - if (ret == AOF_OK || ((ret == AOF_TRUNCATED || ret == AOF_BROKEN_RECOVERED) && last_file)){ + if (ret == AOF_OK || ((ret == AOF_TRUNCATED || ret == AOF_BROKEN_RECOVERED) && last_file)) { serverLog(LL_NOTICE, "DB loaded from base file %s: %.3f seconds", aof_name, (float)(ustime()-start)/1000000); } diff --git a/src/config.c b/src/config.c index 238eb73a75c..fc26ba48119 100644 --- a/src/config.c +++ b/src/config.c @@ -3251,7 +3251,6 @@ standardConfig static_configs[] = { createSizeTConfig("client-query-buffer-limit", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, 1024*1024, LONG_MAX, server.client_max_querybuf_len, 1024*1024*1024, MEMORY_CONFIG, NULL, NULL), /* Default: 1GB max query buffer. */ createSSizeTConfig("maxmemory-clients", NULL, MODIFIABLE_CONFIG, -100, SSIZE_MAX, server.maxmemory_clients, 0, MEMORY_CONFIG | PERCENT_CONFIG, NULL, applyClientMaxMemoryUsage), - /* Other configs */ createTimeTConfig("repl-backlog-ttl", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.repl_backlog_time_limit, 60*60, INTEGER_CONFIG, NULL, NULL), /* Default: 1 hour */ createOffTConfig("auto-aof-rewrite-min-size", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, server.aof_rewrite_min_size, 64*1024*1024, MEMORY_CONFIG, NULL, NULL), diff --git a/src/server.h b/src/server.h index 50be1b87219..425b874c2e4 100644 --- a/src/server.h +++ b/src/server.h @@ -2008,7 +2008,7 @@ struct redisServer { int aof_last_write_errno; /* Valid if aof write/fsync status is ERR */ int aof_load_truncated; /* Don't stop on unexpected AOF EOF. */ int aof_load_broken; /* Don't stop on bad fmt. */ - off_t aof_load_broken_max_size; /* the max size of broken AOF tail than can be ignored. */ + off_t aof_load_broken_max_size; /* The max size of broken AOF tail than can be ignored. */ int aof_use_rdb_preamble; /* Specify base AOF to use RDB encoding on AOF rewrites. */ redisAtomic int aof_bio_fsync_status; /* Status of AOF fsync in bio job. */ redisAtomic int aof_bio_fsync_errno; /* Errno of AOF fsync in bio job. */ diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index e8d65e9171d..30e95ff2910 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -726,7 +726,7 @@ tags {"aof external:skip"} { } # The AOF file is expected to be correct because default value for aof-load-broken-max-size is 4096, - #so the AOF will reload without the corruption + # so the AOF will reload without the corruption test "Broken AOF loaded: we expect foo to be equal to 5" { set client [redis [srv host] [srv port] 0 $::tls] wait_done_loading $client From 11754c16acc4396ebcdc48591ab9a574ee034555 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Thu, 22 May 2025 13:07:49 +0300 Subject: [PATCH 05/22] remove unnecessary change --- src/aof.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/aof.c b/src/aof.c index 62ad539659d..65c8ae95664 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1751,7 +1751,6 @@ int loadSingleAppendOnlyFile(char *filename) { "make a backup of your AOF file, then use ./redis-check-aof --fix ", filename); } ret = AOF_FAILED; - /* fall through to cleanup. */ cleanup: From 023f121b16ab7bfe4840af1ef534735e8b31812d Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Mon, 26 May 2025 14:29:37 +0300 Subject: [PATCH 06/22] Fix format specifier in AOF error message --- src/aof.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/aof.c b/src/aof.c index 65c8ae95664..b6c9d833ac7 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1735,7 +1735,7 @@ int loadSingleAppendOnlyFile(char *filename) { strerror(errno)); } else { serverLog(LL_WARNING, - "AOF loaded anyway because aof-load-broken is enabled and broken size '%ld' is less than aof-load-broken-max-size '%ld'", + "AOF loaded anyway because aof-load-broken is enabled and broken size '%lld' is less than aof-load-broken-max-size '%lld'", sb.st_size - valid_up_to, server.aof_load_broken_max_size); ret = AOF_BROKEN_RECOVERED; goto loaded_ok; @@ -1743,7 +1743,7 @@ int loadSingleAppendOnlyFile(char *filename) { } } else { /* The size of the corrupted portion exceeds the configured limit. */ serverLog(LL_WARNING, - "AOF did not loaded because the size of the corrupted portion exceeds the configured limit. aof-load-broken is enabled and broken size '%ld' is bigger than aof-load-broken-max-size '%ld'", + "AOF was not loaded because the size of the corrupted portion exceeds the configured limit. aof-load-broken is enabled and broken size '%lld' is bigger than aof-load-broken-max-size '%lld'", sb.st_size - valid_up_to, server.aof_load_broken_max_size); } } else { From 09e8235607d13f4662df8f68d5405b31cd5abaa9 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Tue, 27 May 2025 09:47:24 +0300 Subject: [PATCH 07/22] Fix format specifier in AOF error message --- src/aof.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/aof.c b/src/aof.c index b6c9d833ac7..c54a122d39e 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1736,7 +1736,7 @@ int loadSingleAppendOnlyFile(char *filename) { } else { serverLog(LL_WARNING, "AOF loaded anyway because aof-load-broken is enabled and broken size '%lld' is less than aof-load-broken-max-size '%lld'", - sb.st_size - valid_up_to, server.aof_load_broken_max_size); + (long long)(sb.st_size - valid_up_to), (long long)(server.aof_load_broken_max_size)); ret = AOF_BROKEN_RECOVERED; goto loaded_ok; } @@ -1744,7 +1744,7 @@ int loadSingleAppendOnlyFile(char *filename) { } else { /* The size of the corrupted portion exceeds the configured limit. */ serverLog(LL_WARNING, "AOF was not loaded because the size of the corrupted portion exceeds the configured limit. aof-load-broken is enabled and broken size '%lld' is bigger than aof-load-broken-max-size '%lld'", - sb.st_size - valid_up_to, server.aof_load_broken_max_size); + (long long)(sb.st_size - valid_up_to), (long long)(server.aof_load_broken_max_size)); } } else { serverLog(LL_WARNING, "Bad file format reading the append only file %s: " From 459faa743a24e570a61cb04d3e96b8f15dcd1e73 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Tue, 27 May 2025 12:05:38 +0300 Subject: [PATCH 08/22] Fix tcl error msg --- tests/integration/aof.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index 30e95ff2910..ca51a5347b6 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -762,7 +762,7 @@ tags {"aof external:skip"} { start_server_aof_ex [list dir $server_path aof-load-broken yes aof-load-broken-max-size 2] [list wait_ready false] { test "Bad format: Server should have logged an error" { - wait_for_log_messages 0 {"*AOF did not loaded because the size*"} 0 10 1000 + wait_for_log_messages 0 {"*AOF was not loaded because the size*"} 0 10 1000 } } } From bcd7eea31ea3b5d730dfb9a1aa3428dbebd7f31f Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Tue, 3 Jun 2025 09:40:12 +0300 Subject: [PATCH 09/22] Add incr file support + remove spaces --- src/aof.c | 2 +- src/config.c | 2 +- tests/integration/aof.tcl | 4 +--- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/aof.c b/src/aof.c index c54a122d39e..43342a043a8 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1853,7 +1853,7 @@ int loadAppendOnlyFiles(aofManifest *am) { last_file = ++aof_num == total_num; start = ustime(); ret = loadSingleAppendOnlyFile(aof_name); - if (ret == AOF_OK || (ret == AOF_TRUNCATED && last_file)) { + if (ret == AOF_OK || ((ret == AOF_TRUNCATED || ret == AOF_BROKEN_RECOVERED) && last_file)) { serverLog(LL_NOTICE, "DB loaded from incr file %s: %.3f seconds", aof_name, (float)(ustime()-start)/1000000); } diff --git a/src/config.c b/src/config.c index fc26ba48119..dfa1a4a8cfb 100644 --- a/src/config.c +++ b/src/config.c @@ -3255,7 +3255,7 @@ standardConfig static_configs[] = { createTimeTConfig("repl-backlog-ttl", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.repl_backlog_time_limit, 60*60, INTEGER_CONFIG, NULL, NULL), /* Default: 1 hour */ createOffTConfig("auto-aof-rewrite-min-size", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, server.aof_rewrite_min_size, 64*1024*1024, MEMORY_CONFIG, NULL, NULL), createOffTConfig("loading-process-events-interval-bytes", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 1024, INT_MAX, server.loading_process_events_interval_bytes, 1024*1024*2, INTEGER_CONFIG, NULL, NULL), - createOffTConfig("aof-load-broken-max-size", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.aof_load_broken_max_size, (4*1024), INTEGER_CONFIG, NULL, NULL), + createOffTConfig("aof-load-broken-max-size", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.aof_load_broken_max_size, 4*1024, INTEGER_CONFIG, NULL, NULL), createIntConfig("tls-port", NULL, MODIFIABLE_CONFIG, 0, 65535, server.tls_port, 0, INTEGER_CONFIG, NULL, applyTLSPort), /* TCP port. */ createIntConfig("tls-session-cache-size", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.tls_ctx_config.session_cache_size, 20*1024, INTEGER_CONFIG, NULL, applyTlsCfg), diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index ca51a5347b6..a7dd6c846e8 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -702,7 +702,6 @@ tags {"aof external:skip"} { } } - # Check AOF load broken behavior start_server_aof [list dir $server_path aof-load-broken yes] { test "Unfinished MULTI: Server should start if aof-load-broken is yes" { @@ -765,5 +764,4 @@ tags {"aof external:skip"} { wait_for_log_messages 0 {"*AOF was not loaded because the size*"} 0 10 1000 } } -} - +} \ No newline at end of file From 975136036aba31f1f1deb6d8a0e28a1aa0cb6f76 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Wed, 11 Jun 2025 11:00:51 +0300 Subject: [PATCH 10/22] add tests for base aof --- src/aof.c | 2 +- src/config.c | 2 +- tests/integration/aof.tcl | 35 ++++++++++++++++++++++++++++++++--- 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/aof.c b/src/aof.c index 43342a043a8..7bcecb4771b 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1658,7 +1658,7 @@ int loadSingleAppendOnlyFile(char *filename) { /* Clean up. Command code may have changed argv/argc so we use the * argv/argc of the client instead of the local variables. */ freeClientArgv(fakeClient); - if (server.aof_load_truncated || server.aof_load_broken ) valid_up_to = ftello(fp); + if (server.aof_load_truncated || server.aof_load_broken) valid_up_to = ftello(fp); if (server.key_load_delay) debugDelay(server.key_load_delay); } diff --git a/src/config.c b/src/config.c index dfa1a4a8cfb..27b5c44a06e 100644 --- a/src/config.c +++ b/src/config.c @@ -3254,7 +3254,7 @@ standardConfig static_configs[] = { /* Other configs */ createTimeTConfig("repl-backlog-ttl", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.repl_backlog_time_limit, 60*60, INTEGER_CONFIG, NULL, NULL), /* Default: 1 hour */ createOffTConfig("auto-aof-rewrite-min-size", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, server.aof_rewrite_min_size, 64*1024*1024, MEMORY_CONFIG, NULL, NULL), - createOffTConfig("loading-process-events-interval-bytes", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 1024, INT_MAX, server.loading_process_events_interval_bytes, 1024*1024*2, INTEGER_CONFIG, NULL, NULL), + createOffTConfig("loading-process-events-interval-bytes", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 1024, INT_MAX, server.loading_process_events_interval_bytes, 1024*512, INTEGER_CONFIG, NULL, NULL), createOffTConfig("aof-load-broken-max-size", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.aof_load_broken_max_size, 4*1024, INTEGER_CONFIG, NULL, NULL), createIntConfig("tls-port", NULL, MODIFIABLE_CONFIG, 0, 65535, server.tls_port, 0, INTEGER_CONFIG, NULL, applyTLSPort), /* TCP port. */ diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index a7dd6c846e8..4d2599a05e3 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -702,6 +702,35 @@ tags {"aof external:skip"} { } } + # Check AOF load broken behavior + # Corrupted base AOF, no incr AOF present + create_aof $aof_dirpath $aof_base_file { + append_to_aof [formatCommand set param ok] + append_to_aof "corruption" + } + + start_server_aof_ex [list dir $server_path aof-load-broken yes] [list wait_ready false] { + test "Log should mention truncated file is not last" { + wait_for_log_messages 0 { + {*AOF loaded anyway because aof-load-broken is enabled*} + {*Fatal error: the truncated file is not the last file*} + } 0 10 1000 + } + } + # Remove all incr AOF files to make the base file being the last file + exec rm -f $aof_dirpath/appendonly.aof.* + start_server_aof [list dir $server_path aof-load-broken yes] { + test "Corrupted base AOF (last file): should recover" { + assert_equal 1 [is_alive [srv pid]] + } + + test "param should be 'ok'" { + set client [redis [srv host] [srv port]] + wait_done_loading $client + assert {[$client get param] eq "ok"} + } + } + # Check AOF load broken behavior start_server_aof [list dir $server_path aof-load-broken yes] { test "Unfinished MULTI: Server should start if aof-load-broken is yes" { @@ -709,7 +738,7 @@ tags {"aof external:skip"} { } } - # Should also start with broken AOF. + # Should also start with broken incer AOF. create_aof $aof_dirpath $aof_file { append_to_aof [formatCommand set foo 1] append_to_aof [formatCommand incr foo] @@ -760,8 +789,8 @@ tags {"aof external:skip"} { # so the AOF file will not be reloaded. start_server_aof_ex [list dir $server_path aof-load-broken yes aof-load-broken-max-size 2] [list wait_ready false] { test "Bad format: Server should have logged an error" { - + wait_for_log_messages 0 {"*AOF was not loaded because the size*"} 0 10 1000 } } -} \ No newline at end of file +# } \ No newline at end of file From 37e5bb5420e78d30647735116f5fdb553519159e Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Thu, 12 Jun 2025 11:01:48 +0300 Subject: [PATCH 11/22] fix spaces --- src/aof.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/aof.c b/src/aof.c index 7bcecb4771b..12bc54dfefb 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1718,8 +1718,8 @@ int loadSingleAppendOnlyFile(char *filename) { goto cleanup; fmterr: /* Format error. */ - /* fmterr may be caused by accidentally machine shutdown, so if the broken tail is less than a specified size, - * try to recover it automatically */ + /* fmterr may be caused by accidentally machine shutdown, so if the broken tail + * is less than a specified size, try to recover it automatically */ if (server.aof_load_broken) { if (valid_up_to == -1) { serverLog(LL_WARNING,"Last valid command offset is invalid"); @@ -1735,7 +1735,8 @@ int loadSingleAppendOnlyFile(char *filename) { strerror(errno)); } else { serverLog(LL_WARNING, - "AOF loaded anyway because aof-load-broken is enabled and broken size '%lld' is less than aof-load-broken-max-size '%lld'", + "AOF loaded anyway because aof-load-broken is enabled and " + "broken size '%lld' is less than aof-load-broken-max-size '%lld'", (long long)(sb.st_size - valid_up_to), (long long)(server.aof_load_broken_max_size)); ret = AOF_BROKEN_RECOVERED; goto loaded_ok; @@ -1743,7 +1744,9 @@ int loadSingleAppendOnlyFile(char *filename) { } } else { /* The size of the corrupted portion exceeds the configured limit. */ serverLog(LL_WARNING, - "AOF was not loaded because the size of the corrupted portion exceeds the configured limit. aof-load-broken is enabled and broken size '%lld' is bigger than aof-load-broken-max-size '%lld'", + "AOF was not loaded because the size of the corrupted portion " + "exceeds the configured limit. aof-load-broken is enabled and broken size '%lld' " + "is bigger than aof-load-broken-max-size '%lld'", (long long)(sb.st_size - valid_up_to), (long long)(server.aof_load_broken_max_size)); } } else { From 825c78324620012b846cd78ae6a57ae6e7af37ec Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Sun, 22 Jun 2025 09:17:06 +0300 Subject: [PATCH 12/22] add support to incr aof + add test for an intermediate AOF file is broken --- src/aof.c | 2 +- tests/integration/aof.tcl | 29 ++++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/aof.c b/src/aof.c index 12bc54dfefb..00d387be7a5 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1866,7 +1866,7 @@ int loadAppendOnlyFiles(aofManifest *am) { if (ret == AOF_EMPTY) ret = AOF_OK; /* If the truncated file is not the last file, we consider this to be a fatal error. */ - if (ret == AOF_TRUNCATED && !last_file) { + if ((ret == AOF_TRUNCATED || ret == AOF_BROKEN_RECOVERED) && !last_file) { ret = AOF_FAILED; serverLog(LL_WARNING, "Fatal error: the truncated file is not the last file"); } diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index 4d2599a05e3..4674d8f489e 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -793,4 +793,31 @@ tags {"aof external:skip"} { wait_for_log_messages 0 {"*AOF was not loaded because the size*"} 0 10 1000 } } -# } \ No newline at end of file + # Create base and incr files + create_aof $aof_dirpath $aof_base_file { + append_to_aof [formatCommand set foo base] + } + + # Create first incr AOF (not last) + set mid_aof_file $aof_dirpath/appendonly.aof.1 + create_aof $aof_dirpath $mid_aof_file { + append_to_aof [formatCommand set foo mid] + append_to_aof "CORRUPTION" + } + + # Create second incr AOF (last and correct) + set last_aof_file $aof_dirpath/appendonly.aof.2 + create_aof $aof_dirpath $last_aof_file { + append_to_aof [formatCommand set foo last] + } + + # Start Redis with broken intermediate file + start_server_aof_ex [list dir $server_path aof-load-broken yes] [list wait_ready false] { + test "Intermediate AOF is broken: should load last and recover" { + wait_for_log_messages 0 { + {*AOF loaded anyway because aof-load-broken is enabled*} + {*Fatal error: the truncated file is not the last file*} + } 0 10 1000 + } + } +} \ No newline at end of file From a2022866a7592a67021b38dcda5e487370652fd8 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Tue, 1 Jul 2025 13:30:01 +0300 Subject: [PATCH 13/22] fix indentation and add more tests --- src/aof.c | 2 +- tests/integration/aof.tcl | 61 ++++++++++++++++++++++++--------------- 2 files changed, 39 insertions(+), 24 deletions(-) diff --git a/src/aof.c b/src/aof.c index 00d387be7a5..18f0407e23e 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1719,7 +1719,7 @@ int loadSingleAppendOnlyFile(char *filename) { fmterr: /* Format error. */ /* fmterr may be caused by accidentally machine shutdown, so if the broken tail - * is less than a specified size, try to recover it automatically */ + * is less than a specified size, try to recover it automatically */ if (server.aof_load_broken) { if (valid_up_to == -1) { serverLog(LL_WARNING,"Last valid command offset is invalid"); diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index 4674d8f489e..5c1b3ff43e5 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -703,7 +703,7 @@ tags {"aof external:skip"} { } # Check AOF load broken behavior - # Corrupted base AOF, no incr AOF present + # Corrupted base AOF, existing AOF files create_aof $aof_dirpath $aof_base_file { append_to_aof [formatCommand set param ok] append_to_aof "corruption" @@ -717,6 +717,7 @@ tags {"aof external:skip"} { } 0 10 1000 } } + # Remove all incr AOF files to make the base file being the last file exec rm -f $aof_dirpath/appendonly.aof.* start_server_aof [list dir $server_path aof-load-broken yes] { @@ -731,14 +732,7 @@ tags {"aof external:skip"} { } } - # Check AOF load broken behavior - start_server_aof [list dir $server_path aof-load-broken yes] { - test "Unfinished MULTI: Server should start if aof-load-broken is yes" { - assert_equal 1 [is_alive [srv pid]] - } - } - - # Should also start with broken incer AOF. + # Should also start with broken incr AOF. create_aof $aof_dirpath $aof_file { append_to_aof [formatCommand set foo 1] append_to_aof [formatCommand incr foo] @@ -789,35 +783,56 @@ tags {"aof external:skip"} { # so the AOF file will not be reloaded. start_server_aof_ex [list dir $server_path aof-load-broken yes aof-load-broken-max-size 2] [list wait_ready false] { test "Bad format: Server should have logged an error" { - wait_for_log_messages 0 {"*AOF was not loaded because the size*"} 0 10 1000 } } - # Create base and incr files - create_aof $aof_dirpath $aof_base_file { - append_to_aof [formatCommand set foo base] + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n" + append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n" + append_to_manifest "file appendonly.aof.2.incr.aof seq 2 type i\n" + } + # Create base AOF file + set base_aof_file "$aof_dirpath/appendonly.aof.1.base.aof" + create_aof $aof_dirpath $base_aof_file { + append_to_aof [formatCommand set fo base] } - # Create first incr AOF (not last) - set mid_aof_file $aof_dirpath/appendonly.aof.1 + # Create middle incr AOF file with corruption + set mid_aof_file "$aof_dirpath/appendonly.aof.1.incr.aof" create_aof $aof_dirpath $mid_aof_file { - append_to_aof [formatCommand set foo mid] + append_to_aof [formatCommand set fo mid] append_to_aof "CORRUPTION" } - # Create second incr AOF (last and correct) - set last_aof_file $aof_dirpath/appendonly.aof.2 + # Create last incr AOF file (valid) + set last_aof_file "$aof_dirpath/appendonly.aof.2.incr.aof" create_aof $aof_dirpath $last_aof_file { - append_to_aof [formatCommand set foo last] + append_to_aof [formatCommand set fo last] } - # Start Redis with broken intermediate file + # Check that Redis fails to load because corruption is in the middle file start_server_aof_ex [list dir $server_path aof-load-broken yes] [list wait_ready false] { - test "Intermediate AOF is broken: should load last and recover" { + test "Intermediate AOF is broken: should log fatal and not start" { wait_for_log_messages 0 { - {*AOF loaded anyway because aof-load-broken is enabled*} {*Fatal error: the truncated file is not the last file*} } 0 10 1000 } } -} \ No newline at end of file + + # Swap mid and last files + set tmp_file "$aof_dirpath/temp.aof" + file rename -force $mid_aof_file $tmp_file + file rename -force $last_aof_file $mid_aof_file + file rename -force $tmp_file $last_aof_file + + # Should now start successfully since corruption is in last AOF file + start_server_aof [list dir $server_path aof-load-broken yes] { + test "Corrupted last AOF file: Server should still start and recover" { + assert_equal 1 [is_alive [srv pid]] + set client [redis [srv host] [srv port]] + wait_done_loading $client + assert {[$client get fo] eq "mid"} + } + } +} From a2703d9b34dfc9724e595d31ef8f3857e6f4f922 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Tue, 1 Jul 2025 14:06:27 +0300 Subject: [PATCH 14/22] add tls --- tests/integration/aof.tcl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index 5c1b3ff43e5..cf4b64e3d38 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -726,7 +726,7 @@ tags {"aof external:skip"} { } test "param should be 'ok'" { - set client [redis [srv host] [srv port]] + set client [redis [srv host] [srv port] $::tls] wait_done_loading $client assert {[$client get param] eq "ok"} } @@ -830,7 +830,7 @@ tags {"aof external:skip"} { start_server_aof [list dir $server_path aof-load-broken yes] { test "Corrupted last AOF file: Server should still start and recover" { assert_equal 1 [is_alive [srv pid]] - set client [redis [srv host] [srv port]] + set client [redis [srv host] [srv port] $::tls] wait_done_loading $client assert {[$client get fo] eq "mid"} } From 4bc579400ec44c897ad19b1f7e9025423eba3d5b Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Tue, 1 Jul 2025 15:33:10 +0300 Subject: [PATCH 15/22] add space + file for test --- src/aof.c | 1 + tests/integration/aof.tcl | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/aof.c b/src/aof.c index 18f0407e23e..be2f0661a13 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1685,6 +1685,7 @@ int loadSingleAppendOnlyFile(char *filename) { ret = AOF_FAILED; goto cleanup; } + uxeof: /* Unexpected AOF end of file. */ if (server.aof_load_truncated) { serverLog(LL_WARNING,"!!! Warning: short read while loading the AOF file %s!!!", filename); diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index cf4b64e3d38..ea65883a743 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -708,7 +708,9 @@ tags {"aof external:skip"} { append_to_aof [formatCommand set param ok] append_to_aof "corruption" } - + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand set foo hello] + } start_server_aof_ex [list dir $server_path aof-load-broken yes] [list wait_ready false] { test "Log should mention truncated file is not last" { wait_for_log_messages 0 { From 293ff89ee16a96d73310091f8b39d7430d676005 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Wed, 9 Jul 2025 08:25:41 +0300 Subject: [PATCH 16/22] try to remove tls for tests --- tests/integration/aof.tcl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index ea65883a743..319a416da12 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -728,7 +728,8 @@ tags {"aof external:skip"} { } test "param should be 'ok'" { - set client [redis [srv host] [srv port] $::tls] + set client [redis [srv host] [srv port]] + # set client [redis [srv host] [srv port] $::tls] wait_done_loading $client assert {[$client get param] eq "ok"} } @@ -832,7 +833,8 @@ tags {"aof external:skip"} { start_server_aof [list dir $server_path aof-load-broken yes] { test "Corrupted last AOF file: Server should still start and recover" { assert_equal 1 [is_alive [srv pid]] - set client [redis [srv host] [srv port] $::tls] + # set client [redis [srv host] [srv port] $::tls] + set client [redis [srv host] [srv port]] wait_done_loading $client assert {[$client get fo] eq "mid"} } From c67106f95737194e781e68391445c7143b30a853 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Wed, 9 Jul 2025 12:04:08 +0300 Subject: [PATCH 17/22] fix tls --- tests/integration/aof.tcl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index 319a416da12..71555586f05 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -728,8 +728,7 @@ tags {"aof external:skip"} { } test "param should be 'ok'" { - set client [redis [srv host] [srv port]] - # set client [redis [srv host] [srv port] $::tls] + set client [redis [srv host] [srv port] 0 $::tls] wait_done_loading $client assert {[$client get param] eq "ok"} } @@ -833,8 +832,7 @@ tags {"aof external:skip"} { start_server_aof [list dir $server_path aof-load-broken yes] { test "Corrupted last AOF file: Server should still start and recover" { assert_equal 1 [is_alive [srv pid]] - # set client [redis [srv host] [srv port] $::tls] - set client [redis [srv host] [srv port]] + set client [redis [srv host] [srv port] 0 $::tls] wait_done_loading $client assert {[$client get fo] eq "mid"} } From 9da7ec3f43c619785cdc17a3f44b578ee30c2067 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Mon, 21 Jul 2025 09:16:43 +0300 Subject: [PATCH 18/22] add aof-load-broken to redis.conf --- redis.conf | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/redis.conf b/redis.conf index 62cec06de4e..24d0b939860 100644 --- a/redis.conf +++ b/redis.conf @@ -1529,6 +1529,21 @@ auto-aof-rewrite-min-size 64mb # will be found. aof-load-truncated yes +# When the AOF file is corrupted in the middle (format errors), Redis can +# attempt to automatically recover by truncating the corrupted portion if +# it's smaller than the configured maximum size. This is more aggressive +# than aof-load-truncated which only handles truncation at the end of files. +# +# If aof-load-broken is set to yes and the corrupted portion is smaller than +# aof-load-broken-max-size, Redis will truncate the corrupted data and start +# normally, logging a warning about the recovery. Otherwise, the server will +# exit with an error and require manual intervention using "redis-check-aof". +# +# This option is disabled by default since automatically truncating corrupted +# data can lead to data loss. Only enable this if you understand the risks +# and prefer availability over data integrity in corruption scenarios. +aof-load-broken no + # Redis can create append-only base files in either RDB or AOF formats. Using # the RDB format is always faster and more efficient, and disabling it is only # supported for backward compatibility purposes. From 008dff116f1bda09467257f298cf5b45520b0bb2 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Mon, 21 Jul 2025 13:58:14 +0300 Subject: [PATCH 19/22] add expansion about aof-load-broken-max-size --- redis.conf | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/redis.conf b/redis.conf index 24d0b939860..9f0746239db 100644 --- a/redis.conf +++ b/redis.conf @@ -1535,9 +1535,10 @@ aof-load-truncated yes # than aof-load-truncated which only handles truncation at the end of files. # # If aof-load-broken is set to yes and the corrupted portion is smaller than -# aof-load-broken-max-size, Redis will truncate the corrupted data and start -# normally, logging a warning about the recovery. Otherwise, the server will -# exit with an error and require manual intervention using "redis-check-aof". +# aof-load-broken-max-size (which sets the maximum size in bytes of corrupted +# data that can be automatically truncated), Redis will truncate the corrupted +# data and start normally, logging a warning about the recovery. Otherwise, the +# server will exit with an error and require manual intervention using "redis-check-aof". # # This option is disabled by default since automatically truncating corrupted # data can lead to data loss. Only enable this if you understand the risks From 8408c32f52ffc4eda08b8b9a98edcef465282a1d Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Mon, 21 Jul 2025 15:25:47 +0300 Subject: [PATCH 20/22] add expansion about aof-load-broken-max-size --- redis.conf | 10 ++++++---- src/t_string.c | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/redis.conf b/redis.conf index 9f0746239db..c085cd76a4a 100644 --- a/redis.conf +++ b/redis.conf @@ -1534,11 +1534,13 @@ aof-load-truncated yes # it's smaller than the configured maximum size. This is more aggressive # than aof-load-truncated which only handles truncation at the end of files. # +# The aof-load-broken-max-size setting controls the maximum size in bytes +# of corrupted data that can be automatically truncated. +# # If aof-load-broken is set to yes and the corrupted portion is smaller than -# aof-load-broken-max-size (which sets the maximum size in bytes of corrupted -# data that can be automatically truncated), Redis will truncate the corrupted -# data and start normally, logging a warning about the recovery. Otherwise, the -# server will exit with an error and require manual intervention using "redis-check-aof". +# aof-load-broken-max-size, Redis will truncate the corrupted data and start +# normally, logging a warning about the recovery. Otherwise, the server will +# exit with an error and require manual intervention using "redis-check-aof". # # This option is disabled by default since automatically truncating corrupted # data can lead to data loss. Only enable this if you understand the risks diff --git a/src/t_string.c b/src/t_string.c index 34806541c85..4330a0feb28 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -97,8 +97,8 @@ void setGenericCommand(client *c, int flags, robj *key, robj **valref, robj *exp return; } - /* When expire is not NULL, we avoid deleting the TTL so it can be updated later instead of being deleted and then created again. */ - setkey_flags |= ((flags & OBJ_KEEPTTL) || expire) ? SETKEY_KEEPTTL : 0; + /* Only keep TTL when explicitly requested with KEEPTTL flag */ + setkey_flags |= (flags & OBJ_KEEPTTL) ? SETKEY_KEEPTTL : 0; setkey_flags |= found ? SETKEY_ALREADY_EXIST : SETKEY_DOESNT_EXIST; setKeyByLink(c, c->db, key, valref, setkey_flags, &link); From 34df5ba30117ace658b2b29fd05f297a46ec0889 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Wed, 23 Jul 2025 10:17:20 +0300 Subject: [PATCH 21/22] add aof-load-broken-max-size to conf file --- redis.conf | 1 + src/aof.c | 2 +- src/t_string.c | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/redis.conf b/redis.conf index c085cd76a4a..b745ecad286 100644 --- a/redis.conf +++ b/redis.conf @@ -1546,6 +1546,7 @@ aof-load-truncated yes # data can lead to data loss. Only enable this if you understand the risks # and prefer availability over data integrity in corruption scenarios. aof-load-broken no +aof-load-broken-max-size 4096 # Redis can create append-only base files in either RDB or AOF formats. Using # the RDB format is always faster and more efficient, and disabling it is only diff --git a/src/aof.c b/src/aof.c index be2f0661a13..ab592f81137 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1685,7 +1685,7 @@ int loadSingleAppendOnlyFile(char *filename) { ret = AOF_FAILED; goto cleanup; } - + uxeof: /* Unexpected AOF end of file. */ if (server.aof_load_truncated) { serverLog(LL_WARNING,"!!! Warning: short read while loading the AOF file %s!!!", filename); diff --git a/src/t_string.c b/src/t_string.c index 4330a0feb28..34806541c85 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -97,8 +97,8 @@ void setGenericCommand(client *c, int flags, robj *key, robj **valref, robj *exp return; } - /* Only keep TTL when explicitly requested with KEEPTTL flag */ - setkey_flags |= (flags & OBJ_KEEPTTL) ? SETKEY_KEEPTTL : 0; + /* When expire is not NULL, we avoid deleting the TTL so it can be updated later instead of being deleted and then created again. */ + setkey_flags |= ((flags & OBJ_KEEPTTL) || expire) ? SETKEY_KEEPTTL : 0; setkey_flags |= found ? SETKEY_ALREADY_EXIST : SETKEY_DOESNT_EXIST; setKeyByLink(c, c->db, key, valref, setkey_flags, &link); From 5c59ae6e67eb0b6b78ac7c2dffe272afccc47381 Mon Sep 17 00:00:00 2001 From: tomerqodo Date: Sun, 25 Jan 2026 12:12:28 +0200 Subject: [PATCH 22/22] update pr --- src/aof.c | 4 ++-- src/server.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/aof.c b/src/aof.c index ab592f81137..ef968182b33 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1724,7 +1724,7 @@ int loadSingleAppendOnlyFile(char *filename) { if (server.aof_load_broken) { if (valid_up_to == -1) { serverLog(LL_WARNING,"Last valid command offset is invalid"); - } else if (sb.st_size - valid_up_to < server.aof_load_broken_max_size) { + } else if ((size_t)(sb.st_size - valid_up_to) < (size_t)server.aof_load_broken_max_size) { if (truncate(aof_filepath,valid_up_to) == -1) { serverLog(LL_WARNING,"Error truncating the AOF file: %s", strerror(errno)); @@ -1838,7 +1838,7 @@ int loadAppendOnlyFiles(aofManifest *am) { serverLog(LL_WARNING, "Fatal error: the truncated file is not the last file"); } - if (ret == AOF_OPEN_ERR || ret == AOF_FAILED) { + if (ret == AOF_OPEN_ERR || ret == AOF_FAILED || ret == AOF_BROKEN_RECOVERED) { goto cleanup; } } diff --git a/src/server.h b/src/server.h index 425b874c2e4..66e42ce15b5 100644 --- a/src/server.h +++ b/src/server.h @@ -12,8 +12,8 @@ * Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information. */ -#ifndef __REDIS_H -#define __REDIS_H +#ifndef _REDIS_H +#define _REDIS_H #include "fmacros.h" #include "config.h"