Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,29 @@ production stability.

------------------------------------------------------------------------

## [0.3.0] - 2026-03-15

Safe garbage collection foundation.

### Added
- Repository verification command (`coldkeep verify`)
- Verification levels: standard, full, deep
- Reference count validation
- Container integrity verification
- Chunk offset validation
- Deep data verification (hash validation)

### Improved
- Garbage collection safety via transactional re-checks
- Advisory lock preventing concurrent GC runs
- `gc --dry-run` simulation mode

### Testing
- Integration tests for GC safety
- Verification corruption detection tests

------------------------------------------------------------------------

## [0.2.0]- 2026-03-11

Crash-consistency foundation for the storage engine
Expand Down
33 changes: 31 additions & 2 deletions cmd/coldkeep/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,16 @@ func main() {
err = storage.RemoveFile(fileID)

case "gc":
err = maintenance.RunGC()
if len(os.Args) > 2 {
switch os.Args[2] {
case "--dry-run", "--dryRun", "dry-run", "dryRun":
err = maintenance.RunGC(true)
default:
log.Fatal("Unknown option for gc: ", os.Args[2])
}
} else {
err = maintenance.RunGC(false)
}

case "stats":
err = maintenance.RunStats()
Expand All @@ -79,6 +88,20 @@ func main() {
case "search":
err = listing.SearchFiles(os.Args[2:])

case "verify":
if len(os.Args) > 2 {
switch os.Args[2] {
case "--full", "--full-check", "full", "full-check":
err = maintenance.RunVerify(maintenance.VerifyFull)
case "--deep", "--deep-check", "deep", "deep-check":
err = maintenance.RunVerify(maintenance.VerifyDeep)
default:
log.Fatal("Unknown option for verify: ", os.Args[2])
}
} else {
err = maintenance.RunVerify(maintenance.VerifyStandard)
}

default:
fmt.Println("Unknown command:", os.Args[1])
fmt.Println()
Expand All @@ -104,8 +127,14 @@ func printHelp() {
fmt.Println(" store-folder <folder> Store all files in a folder recursively")
fmt.Println(" restore <fileID> <dir> Restore file by ID into directory")
fmt.Println(" remove <fileID> Remove logical file (decrement refcounts)")
fmt.Println(" gc Run garbage collection")
fmt.Println(" gc [options] Run garbage collection")
fmt.Println(" (no options) Perform standard GC")
fmt.Println(" gc --dry-run Show what would be removed without deleting")
fmt.Println(" stats Show storage statistics")
fmt.Println(" verify [options] Verify stored files")
fmt.Println(" (no options) Perform standard verification (metadata only)")
fmt.Println(" verify --full Perform full verification (metadata + content)")
fmt.Println(" verify --deep Perform deep verification (metadata + content + checksums)")
fmt.Println(" help Show this help message")
fmt.Println(" version Show version information")
fmt.Println(" list List stored logical files")
Expand Down
2 changes: 1 addition & 1 deletion db/init.sql
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ CREATE TABLE IF NOT EXISTS file_chunk (
REFERENCES logical_file(id) ON DELETE CASCADE,
chunk_id BIGINT NOT NULL
REFERENCES chunk(id) ON DELETE RESTRICT,
chunk_order INTEGER NOT NULL CHECK (chunk_order >= 0),
chunk_order BIGINT NOT NULL CHECK (chunk_order >= 0),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
PRIMARY KEY (logical_file_id, chunk_order)
);
Expand Down
8 changes: 4 additions & 4 deletions internal/chunk/cdc.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (

const (
minChunkSize = 512 * 1024
maxChunkSize = 2 * 1024 * 1024
MaxChunkSize = 2 * 1024 * 1024
mask = 0x3FFFF
)

Expand All @@ -19,7 +19,7 @@ func ChunkFile(filePath string) ([][]byte, error) {
defer file.Close()

var chunks [][]byte
buffer := make([]byte, 0, maxChunkSize)
buffer := make([]byte, 0, MaxChunkSize)
var rolling uint32

temp := make([]byte, 32*1024)
Expand All @@ -32,11 +32,11 @@ func ChunkFile(filePath string) ([][]byte, error) {
buffer = append(buffer, b)
rolling = (rolling << 1) + uint32(b)

if len(buffer) >= minChunkSize && ((rolling&mask) == 0 || len(buffer) >= maxChunkSize) {
if len(buffer) >= minChunkSize && ((rolling&mask) == 0 || len(buffer) >= MaxChunkSize) {
chunk := make([]byte, len(buffer))
copy(chunk, buffer)
chunks = append(chunks, chunk)
buffer = make([]byte, 0, maxChunkSize)
buffer = make([]byte, 0, MaxChunkSize)
rolling = 0
}
}
Expand Down
26 changes: 26 additions & 0 deletions internal/maintenance/constants.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package maintenance

const gcAdvisoryLockID = 847362

type VerifyLevel int

const (
VerifyStandard VerifyLevel = iota
VerifyFull
VerifyDeep
)

func VerifyLevelString(v VerifyLevel) string {
switch v {
case VerifyStandard:
return "standard"
case VerifyFull:
return "full"
case VerifyDeep:
return "deep"
default:
return "unknown"
}
}

const maxErrorsToPrint = 50
58 changes: 49 additions & 9 deletions internal/maintenance/gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,42 @@ import (
"github.com/franchoy/coldkeep/internal/utils"
)

func RunGC() error {
func RunGC(dryRun bool) error {
dbconn, err := db.ConnectDB()
if err != nil {
return fmt.Errorf("failed to connect to DB: %w", err)
}
defer dbconn.Close()

// Attempt to acquire advisory lock to ensure only one GC runs at a time
var locked bool

err = dbconn.QueryRow("SELECT pg_try_advisory_lock($1)", gcAdvisoryLockID).Scan(&locked)
if err != nil {
return fmt.Errorf("failed to attempt advisory lock: %w", err)
}

if !locked {
return fmt.Errorf("GC already running (advisory lock held)")
}

defer func() {
_, err = dbconn.Exec("SELECT pg_advisory_unlock($1)", gcAdvisoryLockID)
if err != nil {
log.Printf("warning: failed to release advisory lock: %v\n", err)
}
}()

rows, err := dbconn.Query(`
SELECT id, filename, compression_algorithm
FROM container WHERE quarantine = FALSE
FROM container WHERE quarantine = FALSE AND sealed = TRUE
`)
if err != nil {
return err
}
defer rows.Close()

var deletedContainers int
var affectedContainers int

for rows.Next() {
var containerID int64
Expand All @@ -47,13 +66,12 @@ func RunGC() error {
var stillEmpty bool
err = tx.QueryRow(`
SELECT
sealed AND NOT EXISTS (
COALESCE(sealed, false) AND NOT EXISTS (
SELECT 1 FROM chunk
WHERE container_id = $1
AND ref_count > 0
)
FROM container where quarantine = FALSE
and id = $1
FROM container where id = $1
`, containerID).Scan(&stillEmpty)
if err != nil {
_ = tx.Rollback()
Expand All @@ -65,8 +83,17 @@ func RunGC() error {
continue
}

// If dry-run, rollback transaction and skip file deletion
// dry-run is just simulation and count
if dryRun {
fmt.Println("[DRY-RUN] Would delete container:", filename)
_ = tx.Rollback()
affectedContainers++
continue
}

// Delete chunks
_, err = tx.Exec(`DELETE FROM chunk WHERE container_id = $1 and status = 'COMPLETED'`, containerID)
_, err = tx.Exec(`DELETE FROM chunk WHERE container_id = $1 AND status = 'COMPLETED'`, containerID)
if err != nil {
_ = tx.Rollback()
return err
Expand All @@ -93,10 +120,23 @@ func RunGC() error {
log.Println("warning: failed to delete container file:", err)
}

deletedContainers++
affectedContainers++
fmt.Println("Deleted container:", filename)
}

fmt.Printf("GC completed. Containers deleted: %d\n", deletedContainers)
if err := rows.Err(); err != nil {
return err
}

if affectedContainers == 0 {
fmt.Println("GC completed. No containers eligible for deletion.")
return nil
}

if dryRun {
fmt.Printf("GC dry-run completed. Containers eligible for deletion: %d\n", affectedContainers)
} else {
fmt.Printf("GC completed. Containers deleted: %d\n", affectedContainers)
}
return nil
}
Loading
Loading