diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f507e0..7a23394 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,42 @@ production stability. ------------------------------------------------------------------------ +## [0.4.0] - 2026-03-19 + +Integrity Verification Layer + +This release introduces a complete integrity verification system for coldkeep, +covering metadata consistency, container structure validation, and full +end-to-end data integrity checks. + +The system is designed in three verification levels: + +- Standard: metadata integrity checks +- Full: metadata + container structure and hash validation +- Deep: full physical verification by reading container data and recomputing chunk hashes + +### Added +- `verify system` command with three verification levels (standard, full, deep) +- `verify file ` command with per-file verification (standard, full, deep) +- Deep verification logic that reads container data and validates chunk hashes +- Record-level validation (header hash + stored size + data hash) +- Container-wide integrity verification across all sealed containers +- Comprehensive integration tests for verification (positive and corruption scenarios) + +### Improved +- Verification coverage across file, chunk, and container layers +- Error reporting with aggregated verification failures +- Internal consistency checks for chunk offsets, sizes, and container bounds + +### Notes +- Deep verification performs full disk reads and may be slow on large datasets +- Whole-container compression is still present but will be removed in a future release in favor of block-level compression + +coldkeep remains an experimental research project and is not production ready. +The on-disk format may change before v1.0. + +------------------------------------------------------------------------ + ## [0.3.0] - 2026-03-15 Safe garbage collection foundation. diff --git a/README.md b/README.md index 21ee828..92a555d 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ > **Status:** Experimental research projec.\ > **Not production-ready. Do not use for real or sensitive data.** -coldkeep is an experimental **local-first content-addressed file storage engine** +coldkeep is an experimental **local-first content-addressed file storage engine with verifiable integrity** written in Go. Files are split into **content-addressed chunks**, packed into @@ -34,6 +34,54 @@ storage. - Run garbage collection to remove unreferenced chunks. - Recover safely from interrupted operations on startup. - Display storage statistics and container health information. +- Multi-level integrity verification (metadata, container structure, and full data integrity) + +------------------------------------------------------------------------ + +## Verification + +coldkeep provides a multi-level integrity verification system to ensure +consistency and detect corruption across metadata and stored data. + +### Levels + +- **Standard** + - Validates metadata integrity + - Checks reference counts, chunk ordering, and orphan records + +- **Full** + - Includes all standard checks + - Verifies container files exist and match recorded sizes + - Validates container hashes and chunk-to-container consistency + +- **Deep** + - Includes all full checks + - Reads container data and recomputes chunk hashes + - Detects physical data corruption at the byte level + +### Usage + +Verify the entire system: + +```bash +coldkeep verify system --level standard +coldkeep verify system --level full +coldkeep verify system --level deep +``` + +Verify an specific file + +```bash +coldkeep verify file --level standard +coldkeep verify file --level full +coldkeep verify file --level deep +``` + +### Notes + +Deep verification performs full reads of container files and may be slow + +Recommended for periodic integrity audits rather than frequent execution ------------------------------------------------------------------------ @@ -76,13 +124,17 @@ recover safely on startup. │ └─ coldkeep/ # CLI entrypoint │ ├─ internal/ - │ ├─ container/ # container format + container management - │ ├─ chunk/ # chunking and compression logic - │ ├─ db/ # database connection helpers - │ ├─ storage/ # store / restore / remove pipeline - │ ├─ maintenance/ # gc and stats - │ ├─ listing/ # file listing operations - │ └─ utils/ # small helper utilities + │ ├─ chunk/ # chunking and compression logic + │ ├─ container/ # container format + container management + │ ├─ db/ # database connection helpers + │ ├─ listing/ # file listing operations + │ ├─ maintenance/ # gc, stats, and verify_command + │ ├─ recovery # system recovery logic + │ ├─ storage/ # store / restore / remove pipeline + │ ├─ utils_compresion/ # small compresion helper utilities + │ ├─ utils_env/ # small env helper utilities + │ ├─ utils_print/ # small print helper utilities + │ └─ verify/ # verify logc for system or file │ ├─ tests/ # integration tests ├─ scripts/ # smoke / development scripts @@ -278,7 +330,7 @@ data. ## Build - go build ./cmd/coldkeep + go build -o coldkeep ./cmd/coldkeep ## Tests diff --git a/cmd/coldkeep/main.go b/cmd/coldkeep/main.go index ec25c62..6ba267c 100644 --- a/cmd/coldkeep/main.go +++ b/cmd/coldkeep/main.go @@ -10,9 +10,10 @@ import ( "github.com/franchoy/coldkeep/internal/maintenance" "github.com/franchoy/coldkeep/internal/recovery" "github.com/franchoy/coldkeep/internal/storage" + "github.com/franchoy/coldkeep/internal/verify" ) -const version = "0.3.0" +const version = "0.4.0" func main() { @@ -89,17 +90,60 @@ func main() { err = listing.SearchFiles(os.Args[2:]) case "verify": + var target string + var verifyLevel verify.VerifyLevel + var fileID int64 + //target can be "system" or "file" if len(os.Args) > 2 { - switch os.Args[2] { - case "--full", "--full-check", "full", "full-check": - err = maintenance.RunVerify(maintenance.VerifyFull) - case "--deep", "--deep-check", "deep", "deep-check": - err = maintenance.RunVerify(maintenance.VerifyDeep) + target = os.Args[2] + switch target { + case "system": + //target is system, verify level can be --standard, --full, or --deep + if len(os.Args) > 3 { + switch os.Args[3] { + case "--standard", "standard", "": + verifyLevel = verify.VerifyStandard + case "--full", "full": + verifyLevel = verify.VerifyFull + case "--deep", "deep": + verifyLevel = verify.VerifyDeep + default: + log.Fatal("Unknown option for system verify: ", os.Args[3]) + } + } else { + verifyLevel = verify.VerifyStandard + } + case "file": + if len(os.Args) > 3 { + fileID, err = strconv.ParseInt(os.Args[3], 10, 64) + if err != nil { + log.Fatal("Invalid fileID: ", err) + } + } else { + log.Fatal("Usage: coldkeep verify file [--standard|--full|--deep]") + } + if len(os.Args) > 4 { + switch os.Args[4] { + case "--standard", "standard", "": + verifyLevel = verify.VerifyStandard + case "--full", "full": + verifyLevel = verify.VerifyFull + case "--deep", "deep": + verifyLevel = verify.VerifyDeep + default: + log.Fatal("Unknown option for file verify: ", os.Args[4]) + } + } else { + verifyLevel = verify.VerifyStandard + } default: - log.Fatal("Unknown option for verify: ", os.Args[2]) + log.Fatal("Unknown target for verify: ", target) + + //call verify command with target, fileID, and verifyLevel + err = maintenance.VerifyCommand(target, int(fileID), verifyLevel) } } else { - err = maintenance.RunVerify(maintenance.VerifyStandard) + log.Fatal("Usage: coldkeep verify file [--standard|--full|--deep]") } default: @@ -117,33 +161,34 @@ func main() { } func printHelp() { - fmt.Println("coldkeep (V0.3.0)") + fmt.Println("coldkeep (V0.4.0)") fmt.Println() fmt.Println("Usage:") fmt.Println(" coldkeep [arguments]") fmt.Println() fmt.Println("Commands:") - fmt.Println(" store Store a single file") - fmt.Println(" store-folder Store all files in a folder recursively") - fmt.Println(" restore Restore file by ID into directory") - fmt.Println(" remove Remove logical file (decrement refcounts)") - fmt.Println(" gc [options] Run garbage collection") - fmt.Println(" (no options) Perform standard GC") - fmt.Println(" gc --dry-run Show what would be removed without deleting") - fmt.Println(" stats Show storage statistics") - fmt.Println(" verify [options] Verify stored files") - fmt.Println(" (no options) Perform standard verification (metadata only)") - fmt.Println(" verify --full Perform full verification (metadata + content)") - fmt.Println(" verify --deep Perform deep verification (metadata + content + checksums)") - fmt.Println(" help Show this help message") - fmt.Println(" version Show version information") - fmt.Println(" list List stored logical files") - fmt.Println(" search [filters] Search files by filters") - fmt.Println() - fmt.Println("Search Filters:") - fmt.Println(" --name ") - fmt.Println(" --min-size ") - fmt.Println(" --max-size ") + fmt.Println(" store Store a single file") + fmt.Println(" store-folder Store all files in a folder recursively") + fmt.Println(" restore Restore file by ID into directory") + fmt.Println(" remove Remove logical file (decrement refcounts)") + fmt.Println(" gc [options] Run garbage collection") + fmt.Println(" (no options) Perform standard GC") + fmt.Println(" gc --dry-run Show what would be removed without deleting") + fmt.Println(" stats Show storage statistics") + fmt.Println(" verify [target] [fileID] [options] Verify stored files") + fmt.Println(" [target] can be 'system' or 'file'") + fmt.Println(" [options] can be '--standard', '--full', or '--deep'") + fmt.Println(" no options defaults to '--standard'") + fmt.Println(" verify system [options] Perform system-wide verification") + fmt.Println(" verify file [options] Perform verification for specific file") + fmt.Println(" help Show this help message") + fmt.Println(" version Show version information") + fmt.Println(" list List stored logical files") + fmt.Println(" search [filters] Search files by filters") + fmt.Println(" Filters:") + fmt.Println(" --name ") + fmt.Println(" --min-size ") + fmt.Println(" --max-size ") fmt.Println() fmt.Println("Environment Variables:") fmt.Println(" DB_HOST") diff --git a/db/init.sql b/db/init.sql index 6c37550..d669700 100644 --- a/db/init.sql +++ b/db/init.sql @@ -19,6 +19,7 @@ CREATE TABLE IF NOT EXISTS container ( id BIGSERIAL PRIMARY KEY, filename TEXT NOT NULL UNIQUE, sealed BOOLEAN NOT NULL DEFAULT FALSE, + container_hash TEXT DEFAULT NULL, quarantine BOOLEAN NOT NULL DEFAULT FALSE, current_size BIGINT NOT NULL DEFAULT 0 CHECK (current_size >= 0), max_size BIGINT NOT NULL CHECK (max_size > 0), diff --git a/internal/container/constants.go b/internal/container/constants.go index 76e052f..9aba31e 100644 --- a/internal/container/constants.go +++ b/internal/container/constants.go @@ -1,12 +1,12 @@ package container import ( - "github.com/franchoy/coldkeep/internal/utils" + "github.com/franchoy/coldkeep/internal/utils_env" ) -var ContainersDir = utils.GetenvOrDefault("COLDKEEP_STORAGE_DIR", "./storage/containers") +var ContainersDir = utils_env.GetenvOrDefault("COLDKEEP_STORAGE_DIR", "./storage/containers") -var containerMaxSize = utils.GetenvOrDefaultInt64("COLDKEEP_CONTAINER_MAX_SIZE_MB", 64) * 1024 * 1024 //MB +var containerMaxSize = utils_env.GetenvOrDefaultInt64("COLDKEEP_CONTAINER_MAX_SIZE_MB", 64) * 1024 * 1024 //MB // GetContainerMaxSize returns the current container max size func GetContainerMaxSize() int64 { diff --git a/internal/container/container.go b/internal/container/container.go index 29f127f..2760e58 100644 --- a/internal/container/container.go +++ b/internal/container/container.go @@ -10,7 +10,7 @@ import ( "time" "github.com/franchoy/coldkeep/internal/db" - "github.com/franchoy/coldkeep/internal/utils" + "github.com/franchoy/coldkeep/internal/utils_compression" ) func GetOrCreateOpenContainer(db db.DBTX) (int64, string, int64, error) { @@ -139,7 +139,7 @@ func SealContainer(tx db.DBTX, containerID int64, filename string) error { originalPath := filepath.Join(ContainersDir, filename) // Compress file - compressedPath, compressed_size, err := utils.CompressFile(originalPath, utils.DefaultCompression) + compressedPath, compressed_size, sumHex, err := utils_compression.CompressFile(originalPath, utils_compression.DefaultCompression) if err != nil { return err } @@ -149,14 +149,35 @@ func SealContainer(tx db.DBTX, containerID int64, filename string) error { UPDATE container SET sealed = TRUE, compression_algorithm = $1, - compressed_size = $2 - WHERE id = $3 - `, string(utils.DefaultCompression), compressed_size, containerID) + compressed_size = $2, + container_hash = $3 + WHERE id = $4 + `, string(utils_compression.DefaultCompression), compressed_size, sumHex, containerID) if err != nil { return fmt.Errorf("update/seal container failed: %w", err) } - fmt.Printf("Container %d sealed and compressed with type %s : %s\n", containerID, utils.DefaultCompression, compressedPath) + fmt.Printf("Container %d sealed and compressed with type %s : %s\n", containerID, utils_compression.DefaultCompression, compressedPath) + return nil +} + +func CheckContainerHashFile(id int, filename, storedHash string) error { + containerPath := filepath.Join(ContainersDir, filename) + + computedHash, err := utils_compression.ComputeFileHashHex(containerPath) + if err != nil { + return fmt.Errorf("compute container file hash: %w", err) + } + + //if stored has is null or empty, we can skip the check (for backward compatibility with old containers) + if len(storedHash) == 0 || storedHash == "null" || storedHash == "NULL" { + return fmt.Errorf("container file hash is missing in db for container %d, calculated hash: %s", id, computedHash) + } + + if computedHash != storedHash { + return fmt.Errorf("container file hash mismatch for container %d: expected %s, got %s", id, storedHash, computedHash) + } + return nil } diff --git a/internal/db/db.go b/internal/db/db.go index 1274065..4894b6c 100644 --- a/internal/db/db.go +++ b/internal/db/db.go @@ -6,7 +6,7 @@ import ( "os" "strings" - "github.com/franchoy/coldkeep/internal/utils" + "github.com/franchoy/coldkeep/internal/utils_env" _ "github.com/lib/pq" ) @@ -16,7 +16,7 @@ func ConnectDB() (*sql.DB, error) { " user=" + os.Getenv("DB_USER") + " password=" + os.Getenv("DB_PASSWORD") + " dbname=" + os.Getenv("DB_NAME") + - " sslmode=" + utils.GetenvOrDefault("DB_SSLMODE", "disable") + " sslmode=" + utils_env.GetenvOrDefault("DB_SSLMODE", "disable") safeConnStr := strings.ReplaceAll(connStr, "password="+os.Getenv("DB_PASSWORD"), "password=***") log.Printf("Connecting to DB with: %s", safeConnStr) // Log the connection string (without password) diff --git a/internal/maintenance/constants.go b/internal/maintenance/constants.go index 4a0a54f..3f25899 100644 --- a/internal/maintenance/constants.go +++ b/internal/maintenance/constants.go @@ -1,26 +1,3 @@ package maintenance const gcAdvisoryLockID = 847362 - -type VerifyLevel int - -const ( - VerifyStandard VerifyLevel = iota - VerifyFull - VerifyDeep -) - -func VerifyLevelString(v VerifyLevel) string { - switch v { - case VerifyStandard: - return "standard" - case VerifyFull: - return "full" - case VerifyDeep: - return "deep" - default: - return "unknown" - } -} - -const maxErrorsToPrint = 50 diff --git a/internal/maintenance/gc.go b/internal/maintenance/gc.go index 5f456be..32f5897 100644 --- a/internal/maintenance/gc.go +++ b/internal/maintenance/gc.go @@ -8,7 +8,7 @@ import ( "github.com/franchoy/coldkeep/internal/container" "github.com/franchoy/coldkeep/internal/db" - "github.com/franchoy/coldkeep/internal/utils" + "github.com/franchoy/coldkeep/internal/utils_compression" ) func RunGC(dryRun bool) error { @@ -112,7 +112,7 @@ func RunGC(dryRun bool) error { // After commit, delete file from disk containerPath := filepath.Join(container.ContainersDir, filename) - if algo != "" && algo != string(utils.CompressionNone) { + if algo != "" && algo != string(utils_compression.CompressionNone) { containerPath += "." + algo } diff --git a/internal/maintenance/verify.go b/internal/maintenance/verify.go deleted file mode 100644 index 4737742..0000000 --- a/internal/maintenance/verify.go +++ /dev/null @@ -1,835 +0,0 @@ -package maintenance - -import ( - "bytes" - "crypto/sha256" - "database/sql" - "encoding/hex" - "fmt" - "io" - "log" - "os" - "path/filepath" - - "github.com/franchoy/coldkeep/internal/chunk" - "github.com/franchoy/coldkeep/internal/container" - "github.com/franchoy/coldkeep/internal/db" - "github.com/franchoy/coldkeep/internal/utils" -) - -func RunVerify(VerifyLevel VerifyLevel) error { - log.Printf("Starting verification with method: %s", VerifyLevelString(VerifyLevel)) - dbconn, err := db.ConnectDB() - if err != nil { - return fmt.Errorf("failed to connect to DB: %w", err) - } - defer dbconn.Close() - - switch VerifyLevel { - case VerifyStandard: - // standard verification - case VerifyFull: - // full verification - case VerifyDeep: - // deep verification - default: - return fmt.Errorf("invalid verification level: %s", VerifyLevelString(VerifyLevel)) - } - - //verifymethod - // standard - // reference count check - // orphan chunk check - // file chunk ordering check - // full - // standard checks + - // container file existence and size check - // chunk-container consistency check - // chunk offsets consistency check - // chunk offset validity check - // checkContainerCompleteness - // deep - // full checks + - // actual file integrity checks (e.g. read container files and verify chunk data against stored hashes) - - //standard checks ini - - var containerCount, chunkCount, fileCount int - //list container counter to be checked - err = dbconn.QueryRow("SELECT COUNT(*) FROM container").Scan(&containerCount) - if err != nil { - return fmt.Errorf("failed to query container count: %w", err) - } - //list chunk counter to be checked - err = dbconn.QueryRow("SELECT COUNT(*) FROM chunk").Scan(&chunkCount) - if err != nil { - return fmt.Errorf("failed to query chunk count: %w", err) - } - //list file counter to be checked - err = dbconn.QueryRow("SELECT COUNT(*) FROM logical_file").Scan(&fileCount) - if err != nil { - return fmt.Errorf("failed to query logical file count: %w", err) - } - - log.Printf("Starting verification: %d containers, %d chunks, %d logical files to check", containerCount, chunkCount, fileCount) - - //check that all chunks have correct reference counts (chunk.ref_count should match the actual number of file_chunk references) - if err = checkReferenceCounts(dbconn); err != nil { - return err - } - - //check that there are no orphan chunks (chunks with ref_count > 0 but no file_chunk references) - if err = checkOrphanChunks(dbconn); err != nil { - return err - } - - //check that file_chunks for each file are ordered by chunk_offset without gaps - if err = checkFileChunkOrdering(dbconn); err != nil { - return err - } - - //standard checks end - - //full checks ini - if VerifyLevel == VerifyFull || VerifyLevel == VerifyDeep { - //check that all containers have their files present on disk and that the file sizes match the DB records - if err = checkContainersFileExistence(dbconn); err != nil { - return err - } - - //check that all chunks are correctly associated with their containers (if container_id != NULL → chunk.status must be COMPLETED) - if err = checkChunkContainerConsistency(dbconn); err != nil { - return err - } - - //check that all chunks have location (container_id + chunk_offset) consistent with their status (if status = COMPLETED → container_id NOT NULL chunk_offset NOT NULL, if status != COMPLETED → container_id NULL chunk_offset NULL) - if err = checkChunkOffsets(dbconn); err != nil { - return err - } - - //check that all chunks with status = COMPLETED have valid container_id and chunk_offset values and that the chunk_offset + size does not exceed the container's current_size - if err = checkChunkOffsetValidity(dbconn); err != nil { - return err - } - - //check that sealed containers should not accept new chunks - if err = checkContainerCompleteness(dbconn); err != nil { - return err - } - } - //full checks end - - //deep checks ini - if VerifyLevel == VerifyDeep { - if err = verifyDeep(dbconn); err != nil { - return err - } - } - //deep checks end - - log.Printf("Verification completed successfully with method: %s", VerifyLevelString(VerifyLevel)) - return nil -} - -func checkContainersFileExistence(dbconn *sql.DB) error { - // Check that all containers have their files present on disk - // and that the file sizes match the DB records - log.Printf("Checking container file existence and size consistency...") - var errorList []error - var errorCount int - rows, err := dbconn.Query(`select id, filename, compression_algo, current_size - from container - where quarantine = false and sealed = true`) - if err != nil { - log.Println(" ERROR ") - log.Printf("Failed to query container files: %v", err) - return fmt.Errorf("failed to query container files: %w", err) - } - defer rows.Close() - - for rows.Next() { - var id int - var filename string - var compressionalgo string - var currentSize int64 - if err := rows.Scan(&id, &filename, &compressionalgo, ¤tSize); err != nil { - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("failed to scan container file: %w", err)) - continue - } - // Check if the file exists on disk and has the correct size - if err := checkContainerFile(id, filename, compressionalgo, currentSize); err != nil { - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("container file check failed for container %d: %w", id, err)) - } - } - - if err := rows.Err(); err != nil { - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("row iteration failed: %w", err)) - } - - if len(errorList) > 0 { - log.Println(" ERROR ") - log.Printf("Found %d errors in checkContainersFileExistence checks:", errorCount) - if errorCount > maxErrorsToPrint { - log.Printf("showing only first %d:", len(errorList)) - } - for _, err := range errorList { - log.Printf(" - %v", err) - } - return fmt.Errorf("found %d errors in checkContainersFileExistence checks", errorCount) - } - log.Println(" SUCCESS ") - - return nil -} - -func checkContainerFile(id int, filename string, compressionalgo string, currentSize int64) error { - // Check if the file exists on disk and has the correct size - if compressionalgo != "" && compressionalgo != string(utils.CompressionNone) { - filename = filename + "." + compressionalgo - } - - fullPath := filepath.Join(container.ContainersDir, filename) - - info, err := os.Stat(fullPath) - if err != nil { - return err - } - - // check if file exists - if !info.Mode().IsRegular() { - return fmt.Errorf("file does not exist or is not a regular file: %s", fullPath) - } - - // check if file size matches the DB record - if info.Size() != currentSize { - return fmt.Errorf("file size mismatch: expected %d, got %d", currentSize, info.Size()) - } - - return nil -} - -func checkChunkContainerConsistency(dbconn *sql.DB) error { - // Check that all chunks are correctly associated with their containers - // if container_id != NULL → chunk.status must be COMPLETED - log.Printf("Checking chunk-container consistency...") - var errorList []error - var errorCount int - rows, err := dbconn.Query(`SELECT id - FROM chunk - WHERE container_id IS NOT NULL - AND status != 'COMPLETED';`) - if err != nil { - log.Println(" ERROR ") - log.Printf("Failed to query chunk-container consistency: %v", err) - return fmt.Errorf("failed to query chunk-container consistency: %w", err) - } - defer rows.Close() - - for rows.Next() { - var id int - if err := rows.Scan(&id); err != nil { - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("failed to scan inconsistent chunk: %w", err)) - continue - } - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("chunk with id %d has container_id but status is not COMPLETED", id)) - } - - if err := rows.Err(); err != nil { - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("row iteration failed: %w", err)) - } - - if len(errorList) > 0 { - log.Println(" ERROR ") - log.Printf("Found %d errors in checkChunkContainerConsistency checks:", errorCount) - if errorCount > maxErrorsToPrint { - log.Printf("showing only first %d:", len(errorList)) - } - for _, err := range errorList { - log.Printf(" - %v", err) - } - return fmt.Errorf("found %d errors in checkChunkContainerConsistency checks", errorCount) - } - - log.Println(" SUCCESS ") - return nil -} - -func checkChunkOffsets(dbconn *sql.DB) error { - // Check that all chunks have location (container_id + chunk_offset) consistent with their status - // if status = COMPLETED → container_id NOT NULL chunk_offset NOT NULL - - log.Printf("Checking chunk offsets consistency with status...") - var errorList []error - var errorCount int - rows1, err := dbconn.Query(`SELECT id, container_id, chunk_offset, size, status - FROM chunk - WHERE status = 'COMPLETED' - AND (container_id IS NULL OR chunk_offset IS NULL);`) - if err != nil { - log.Println(" ERROR ") - log.Printf("Failed to query completed chunks: %v", err) - return fmt.Errorf("failed to query completed chunks: %w", err) - } - defer rows1.Close() - - type chunkInfo struct { - id int - containerID int - chunkOffset int64 - size int64 - status string - } - - for rows1.Next() { - var c chunkInfo - if err := rows1.Scan(&c.id, &c.containerID, &c.chunkOffset, &c.size, &c.status); err != nil { - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("failed to scan completed chunk: %w", err)) - continue - } - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("chunk ID %d has status COMPLETED but missing location info: container_id=%v chunk_offset=%v", c.id, c.containerID, c.chunkOffset)) - } - - if err := rows1.Err(); err != nil { - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("row iteration failed: %w", err)) - } - - rows1.Close() - - // if status != COMPLETED → container_id NULL chunk_offset NULL - rows2, err := dbconn.Query(`SELECT id, container_id, chunk_offset, size, status - FROM chunk - WHERE status != 'COMPLETED' - AND (container_id IS NOT NULL OR chunk_offset IS NOT NULL);`) - if err != nil { - log.Println(" ERROR ") - log.Printf("Failed to query non-completed chunks: %v", err) - return fmt.Errorf("failed to query non-completed chunks: %w", err) - } - defer rows2.Close() - - for rows2.Next() { - var c chunkInfo - if err := rows2.Scan(&c.id, &c.containerID, &c.chunkOffset, &c.size, &c.status); err != nil { - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("failed to scan non-completed chunk: %w", err)) - continue - } - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("chunk ID %d has status %s but has location info: container_id=%v chunk_offset=%v", c.id, c.status, c.containerID, c.chunkOffset)) - } - - if err := rows2.Err(); err != nil { - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("row iteration failed: %w", err)) - } - - if len(errorList) > 0 { - log.Println(" ERROR ") - log.Printf("Found %d errors in checkChunkOffsets checks : ", errorCount) - if errorCount > maxErrorsToPrint { - log.Printf("showing only first %d:", len(errorList)) - } - for _, err := range errorList { - log.Printf(" - %v", err) - } - return fmt.Errorf("found %d errors in checkChunkOffsets checks", errorCount) - } - - log.Println(" SUCCESS ") - return nil -} - -func checkChunkOffsetValidity(dbconn *sql.DB) error { - // Check that all chunks with status = COMPLETED have valid container_id and chunk_offset values - // and that the chunk_offset + size does not exceed the container's current_size - log.Printf("Checking chunk offset validity for completed chunks...") - var errorList []error - var errorCount int - rows, err := dbconn.Query(`SELECT c.id, c.container_id, c.chunk_offset, c.size, cont.current_size - FROM chunk c - JOIN container cont ON c.container_id = cont.id - WHERE c.status = 'COMPLETED';`) - if err != nil { - log.Println(" ERROR ") - log.Printf("Failed to query completed chunks for offset validity: %v", err) - return fmt.Errorf("failed to query completed chunks for offset validity: %w", err) - } - defer rows.Close() - - type chunkInfo struct { - id int - containerID int - chunkOffset int64 - size int64 - containerSize int64 - } - - for rows.Next() { - var c chunkInfo - if err := rows.Scan(&c.id, &c.containerID, &c.chunkOffset, &c.size, &c.containerSize); err != nil { - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("failed to scan completed chunk for offset validity: %w", err)) - continue - } - - if c.chunkOffset < 0 || c.size <= 0 || c.chunkOffset > c.containerSize-c.size { - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("chunk ID %d in container %d has invalid location: chunk_offset=%d size=%d container_size=%d", c.id, c.containerID, c.chunkOffset, c.size, c.containerSize)) - } - } - - if err := rows.Err(); err != nil { - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("row iteration failed: %w", err)) - } - - if len(errorList) > 0 { - log.Println(" ERROR ") - log.Printf("Found %d errors in checkChunkOffsetValidity checks :", errorCount) - if errorCount > maxErrorsToPrint { - log.Printf("showing only first %d:", len(errorList)) - } - for _, err := range errorList { - log.Printf(" - %v", err) - } - return fmt.Errorf("found %d errors in checkChunkOffsetValidity checks", errorCount) - } - log.Println(" SUCCESS ") - return nil -} - -func checkReferenceCounts(dbconn *sql.DB) error { - // Check that all chunks have correct reference counts (chunk.ref_count should match the actual number of file_chunk references) - log.Printf("Checking chunk reference counts consistency...") - var errorList []error - var errorCount int - rows, err := dbconn.Query(` - SELECT chunk.id, - chunk.ref_count, - COUNT(file_chunk.chunk_id) AS actual - FROM chunk - LEFT JOIN file_chunk - ON chunk.id = file_chunk.chunk_id - GROUP BY chunk.id - HAVING chunk.ref_count != COUNT(file_chunk.chunk_id) - `) - if err != nil { - log.Println(" ERROR ") - log.Printf("Failed to query chunk reference counts: %v", err) - return fmt.Errorf("failed to query chunk reference counts: %w", err) - } - defer rows.Close() - - type chunkRefCount struct { - id int - refCount int - actual int - } - - for rows.Next() { - var c chunkRefCount - if err := rows.Scan(&c.id, &c.refCount, &c.actual); err != nil { - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("failed to scan inconsistent chunk reference count: %w", err)) - continue - } - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("Inconsistent chunk reference count: chunk ID %d has ref_count=%d but actual references=%d", c.id, c.refCount, c.actual)) - } - - if err := rows.Err(); err != nil { - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("row iteration failed: %w", err)) - } - - if len(errorList) > 0 { - log.Println(" ERROR ") - log.Printf("Found %d errors in checkReferenceCounts checks : ", errorCount) - if errorCount > maxErrorsToPrint { - log.Printf("showing only first %d:", len(errorList)) - } - for _, err := range errorList { - log.Printf(" - %v", err) - } - return fmt.Errorf("found %d errors in checkReferenceCounts checks", errorCount) - } - log.Println(" SUCCESS ") - return nil -} - -func checkOrphanChunks(dbconn *sql.DB) error { - // Check that there are no orphan chunks (chunks with ref_count > 0 but no file_chunk references) - log.Printf("Checking for orphan chunks with ref_count > 0 but no file_chunk references...") - var errorList []error - var errorCount int - rows, err := dbconn.Query(`SELECT chunk.id - FROM chunk - LEFT JOIN file_chunk ON chunk.id = file_chunk.chunk_id - WHERE file_chunk.chunk_id IS NULL AND chunk.ref_count > 0;`) - if err != nil { - log.Println(" ERROR ") - log.Printf("Failed to query orphan chunks: %v", err) - return fmt.Errorf("failed to query orphan chunks: %w", err) - } - defer rows.Close() - - for rows.Next() { - var id int - if err := rows.Scan(&id); err != nil { - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("failed to scan orphan chunk: %w", err)) - continue - } - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("orphan chunk found: chunk ID %d has ref_count > 0 but no file_chunk references", id)) - } - - if err := rows.Err(); err != nil { - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("row iteration failed: %w", err)) - } - - if len(errorList) > 0 { - log.Println(" ERROR ") - log.Printf("Found %d errors in checkOrphanChunks checks:", errorCount) - if errorCount > maxErrorsToPrint { - log.Printf("showing only first %d:", len(errorList)) - } - for _, err := range errorList { - log.Printf(" - %v", err) - } - return fmt.Errorf("found %d errors in checkOrphanChunks checks", errorCount) - } - log.Println(" SUCCESS ") - return nil -} - -func checkFileChunkOrdering(dbconn *sql.DB) error { - // Check that file_chunks for each file are ordered by chunk_offset without gaps - log.Printf("Checking file chunk ordering and gaps...") - var errorList []error - var errorCount int - rows, err := dbconn.Query(`SELECT id - FROM logical_file lf - WHERE NOT EXISTS ( - SELECT 1 - FROM file_chunk fc - WHERE fc.logical_file_id = lf.id - );`) - if err != nil { - log.Println(" ERROR ") - log.Printf("Failed to query file chunk ordering: %v", err) - return fmt.Errorf("failed to query file chunk ordering: %w", err) - } - defer rows.Close() - - for rows.Next() { - var logicalFileID int - if err := rows.Scan(&logicalFileID); err != nil { - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("failed to scan file chunk info: %w", err)) - continue - } - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("File with no chunks found: logical file ID %d has no chunks", logicalFileID)) - } - - if err := rows.Err(); err != nil { - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("row iteration failed: %w", err)) - } - - if len(errorList) > 0 { - log.Println(" ERROR ") - log.Printf("Found %d lerrors in checkFileChunkOrdering checks :", errorCount) - if errorCount > maxErrorsToPrint { - log.Printf("showing only first %d:", len(errorList)) - } - for _, err := range errorList { - log.Printf(" - %v", err) - } - return fmt.Errorf("found %d errors in checkFileChunkOrdering checks", errorCount) - } - log.Println(" SUCCESS ") - return nil -} - -func appendToErrorList(errorList []error, err error) []error { - if len(errorList) < maxErrorsToPrint { - return append(errorList, err) - } - return errorList -} - -func checkContainerCompleteness(dbconn *sql.DB) error { - //sealed containers should not accept new chunks - log.Println("Checking sealed containers for completeness (no new chunks should be added to sealed containers)...") - var errorList []error - var errorCount int - rows, err := dbconn.Query(`SELECT id - FROM container - WHERE sealed = TRUE - AND EXISTS ( - SELECT 1 - FROM chunk - WHERE chunk.container_id = container.id - AND chunk.status != 'COMPLETED' - )`) - if err != nil { - log.Println(" ERROR ") - log.Printf("Failed to query container completeness: %v", err) - return fmt.Errorf("failed to query container completeness: %w", err) - } - defer rows.Close() - - for rows.Next() { - var containerID int - if err := rows.Scan(&containerID); err != nil { - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("failed to scan container info: %w", err)) - continue - } - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("Sealed container with incomplete chunks found: container ID %d has incomplete chunks", containerID)) - } - - if err := rows.Err(); err != nil { - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("row iteration failed: %w", err)) - } - - if len(errorList) > 0 { - log.Println(" ERROR ") - log.Printf("Found %d errors in checkContainerCompleteness checks:", errorCount) - if errorCount > maxErrorsToPrint { - log.Printf("showing only first %d:", len(errorList)) - } - for _, err := range errorList { - log.Printf(" - %v", err) - } - return fmt.Errorf("found %d errors in checkContainerCompleteness checks", errorCount) - } - log.Println(" SUCCESS ") - return nil - -} - -func verifyDeep(dbconn *sql.DB) error { - //for each container: - //open container file - //fetch chunks ordered by offset - //read container sequentially - //verify each chunk - log.Println("Starting deep verification of container files...") - var errorList []error - var errorCount int - //retrieve sealer container count - containerCount := 0 - containerCountErr := dbconn.QueryRow(`SELECT COUNT(*) FROM container WHERE sealed=true`).Scan(&containerCount) - if containerCountErr != nil { - log.Println(" ERROR ") - log.Printf("Failed to query sealed container count: %v", containerCountErr) - return fmt.Errorf("failed to query sealed container count: %w", containerCountErr) - } - - processedContainers := 0 - - containers, err := dbconn.Query(`SELECT id, filename, compression_algo FROM container WHERE sealed=true`) - if err != nil { - log.Println(" ERROR ") - log.Printf("Failed to query sealed containers: %v", err) - return fmt.Errorf("failed to query sealed containers: %w", err) - } - defer containers.Close() - - maxChunkSize := chunk.MaxChunkSize - buffer := make([]byte, maxChunkSize) - - for containers.Next() { - processedContainers++ - var containerID int - var filename string - var compressionAlgo string - if err := containers.Scan(&containerID, &filename, &compressionAlgo); err != nil { - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("failed to scan container info: %w", err)) - continue - } - log.Printf("Verifying container %d/%d: %s", processedContainers, containerCount, filename) - - //open container file - fullPath := filepath.Join(container.ContainersDir, filename) - if compressionAlgo != "" && compressionAlgo != string(utils.CompressionNone) { - fullPath = fullPath + "." + compressionAlgo - } - - file, err := os.Open(fullPath) - if err != nil { - log.Printf("Failed to open container file %s: %v", fullPath, err) - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("failed to open container file %s: %w", fullPath, err)) - continue - } - - info, err := file.Stat() - if err != nil { - errorCount++ - log.Printf("Failed to stat container file %s: %v", fullPath, err) - errorList = appendToErrorList(errorList, fmt.Errorf("failed to stat container file %s: %w", fullPath, err)) - file.Close() - continue - } - fileSize := info.Size() - - //fetch chunks ordered by offset - chunks, err := dbconn.Query(`SELECT chunk_offset, size, hash - FROM chunk - WHERE container_id = $1 - AND status = 'COMPLETED' - ORDER BY chunk_offset`, containerID) - if err != nil { - log.Printf("Failed to query chunks for container %d: %v", containerID, err) - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("failed to query chunks for container %d: %w", containerID, err)) - file.Close() - continue - } - - currentOffset := int64(0) - - hasChunks := false - - for chunks.Next() { - hasChunks = true - var chunkOffset int64 - var chunkSize int64 - var chunkHash string - if err := chunks.Scan(&chunkOffset, &chunkSize, &chunkHash); err != nil { - log.Printf("Failed to scan chunk info for container %d: %v", containerID, err) - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("failed to scan chunk info for container %d: %w", containerID, err)) - continue - } - - if chunkOffset < 0 || chunkSize < 0 { - log.Printf("Invalid chunk offset or size for container %d at offset %d: chunk size %d", containerID, chunkOffset, chunkSize) - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("invalid chunk offset or size for container %d at offset %d: chunk size %d", containerID, chunkOffset, chunkSize)) - continue - } - - if chunkSize > int64(maxChunkSize) { - log.Printf("Chunk size %d exceeds maximum allowed size %d for chunk in container %d", chunkSize, maxChunkSize, containerID) - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("chunk size %d exceeds maximum allowed size %d for chunk in container %d", chunkSize, maxChunkSize, containerID)) - continue - } - - if chunkOffset+chunkSize > fileSize { - log.Printf("Chunk exceeds file size for container %d at offset %d: chunk size %d, file size %d", containerID, chunkOffset, chunkSize, fileSize) - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("chunk exceeds file size for container %d at offset %d: chunk size %d, file size %d", containerID, chunkOffset, chunkSize, fileSize)) - continue - } - - if chunkOffset != currentOffset { - _, err = file.Seek(chunkOffset, io.SeekStart) - if err != nil { - errorCount++ - errorList = appendToErrorList(errorList, - fmt.Errorf("failed to seek container %d to offset %d: %w", containerID, chunkOffset, err)) - continue - } - } - - _, err = io.ReadFull(file, buffer[:chunkSize]) - if err != nil { - log.Printf("Failed to read chunk data for container %d at offset %d: %v", containerID, chunkOffset, err) - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("failed to read chunk data for container %d at offset %d: %w", containerID, chunkOffset, err)) - continue - } - - currentOffset = chunkOffset + chunkSize - - //compute hash of the chunk data - hash := sha256.Sum256(buffer[:chunkSize]) - storedHash, err := hex.DecodeString(chunkHash) - if err != nil { - log.Printf("Failed to decode stored hash for container %d at offset %d: %v", containerID, chunkOffset, err) - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("failed to decode stored hash for container %d at offset %d: %w", containerID, chunkOffset, err)) - continue - } - - if len(storedHash) != sha256.Size { - log.Printf("Invalid stored hash length for container %d at offset %d: expected %d, got %d", containerID, chunkOffset, sha256.Size, len(storedHash)) - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("invalid stored hash length for container %d at offset %d: expected %d, got %d", containerID, chunkOffset, sha256.Size, len(storedHash))) - continue - } - - //compare with stored hash - if !bytes.Equal(hash[:], storedHash) { - //if mismatch → corruption detected - computedHex := hex.EncodeToString(hash[:]) - log.Printf("Chunk hash mismatch for container %d at offset %d: expected %s, got %s", containerID, chunkOffset, chunkHash, computedHex) - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("chunk hash mismatch for container %d at offset %d: expected %s, got %s", containerID, chunkOffset, chunkHash, computedHex)) - } - - } - - if !hasChunks { - log.Printf("WARNING: container %d has no chunks", containerID) - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("container %d has no chunks", containerID)) - _ = chunks.Close() - _ = file.Close() - continue - } - - if err := chunks.Err(); err != nil { - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("row iteration failed for chunks of container %d: %w", containerID, err)) - _ = chunks.Close() - _ = file.Close() - continue - } - - _ = chunks.Close() - - _ = file.Close() - - } - - if err := containers.Err(); err != nil { - errorCount++ - errorList = appendToErrorList(errorList, fmt.Errorf("row iteration failed for containers: %w", err)) - return fmt.Errorf("row iteration failed for containers: %w", err) - } - - if len(errorList) > 0 { - log.Println(" ERROR ") - log.Printf("Found %d errors in deep verification of container files:", errorCount) - if errorCount > maxErrorsToPrint { - log.Printf("showing only first %d:", len(errorList)) - } - for _, err := range errorList { - log.Printf(" - %v", err) - } - return fmt.Errorf("found %d errors in deep verification of container files", errorCount) - } - - log.Println("Deep verification completed successfully.") - return nil -} diff --git a/internal/maintenance/verify_command.go b/internal/maintenance/verify_command.go new file mode 100644 index 0000000..e319ab7 --- /dev/null +++ b/internal/maintenance/verify_command.go @@ -0,0 +1,89 @@ +package maintenance + +import ( + "database/sql" + "fmt" + + "github.com/franchoy/coldkeep/internal/db" + "github.com/franchoy/coldkeep/internal/verify" +) + +//verify system --standard +//verify system --full +//verify system --deep + +//verify file --standard +//verify file --full +//verify file --deep + +func VerifyCommand(target string, fileId int, verifyLevel verify.VerifyLevel) error { + + dbconn, err := db.ConnectDB() + if err != nil { + return fmt.Errorf("failed to connect to database: %w", err) + } + defer dbconn.Close() + + switch target { + case "system": + return verifySystem(dbconn, verifyLevel) + case "file": + return verifyFile(dbconn, fileId, verifyLevel) + default: + return fmt.Errorf("invalid target for verify command: %s", target) + } +} + +func verifySystem(dbconn *sql.DB, verifyLevel verify.VerifyLevel) error { + + switch verifyLevel { + case verify.VerifyStandard: + if err := verify.VerifySystemStandard(dbconn); err != nil { + return fmt.Errorf("system standard verification failed: %w", err) + } + case verify.VerifyFull: + if err := verify.VerifySystemFull(dbconn); err != nil { + return fmt.Errorf("system full verification failed: %w", err) + } + case verify.VerifyDeep: + if err := verify.VerifySystemDeep(dbconn); err != nil { + return fmt.Errorf("system deep verification failed: %w", err) + } + default: + return fmt.Errorf("invalid system verify level: %d", verifyLevel) + } + + return nil +} + +func verifyFile(dbconn *sql.DB, fileId int, verifyLevel verify.VerifyLevel) error { + + //verify that the file id exists + var exists bool + err := dbconn.QueryRow("SELECT EXISTS(SELECT 1 FROM logical_file WHERE id = ?)", fileId).Scan(&exists) + if err != nil { + return fmt.Errorf("failed to check if file exists: %w", err) + } + if !exists { + return fmt.Errorf("file with ID %d does not exist", fileId) + } + + switch verifyLevel { + case verify.VerifyStandard: + if err := verify.VerifyFileStandard(dbconn, fileId); err != nil { + return fmt.Errorf("file standard verification failed: %w", err) + } + case verify.VerifyFull: + if err := verify.VerifyFileFull(dbconn, fileId); err != nil { + return fmt.Errorf("file full verification failed: %w", err) + } + case verify.VerifyDeep: + if err := verify.VerifyFileDeep(dbconn, fileId); err != nil { + return fmt.Errorf("file deep verification failed: %w", err) + } + default: + return fmt.Errorf("invalid file verify level: %d", verifyLevel) + } + + return nil +} diff --git a/internal/storage/restore.go b/internal/storage/restore.go index 629c082..1f0de3d 100644 --- a/internal/storage/restore.go +++ b/internal/storage/restore.go @@ -15,7 +15,8 @@ import ( "github.com/franchoy/coldkeep/internal/container" "github.com/franchoy/coldkeep/internal/db" - "github.com/franchoy/coldkeep/internal/utils" + "github.com/franchoy/coldkeep/internal/utils_compression" + "github.com/franchoy/coldkeep/internal/utils_print" ) func RestoreFile(id int64, outputPath string) error { @@ -111,11 +112,11 @@ func RestoreFileWithDB(dbconn *sql.DB, fileID int64, outputPath string) error { return fmt.Errorf("scan chunk row: %w", err) } - algo := utils.CompressionType(algoStr) + algo := utils_compression.CompressionType(algoStr) // Container filename changes when compressed (CompressFile removes the original and writes filename.) containerFilename := filename - if algo != utils.CompressionNone { + if algo != utils_compression.CompressionNone { containerFilename = filename + "." + algoStr } @@ -125,7 +126,7 @@ func RestoreFileWithDB(dbconn *sql.DB, fileID int64, outputPath string) error { var r io.ReadCloser var f *os.File - if algo == utils.CompressionNone { + if algo == utils_compression.CompressionNone { f, err = os.Open(containerPath) if err != nil { return fmt.Errorf("open container %q: %w", containerFilename, err) @@ -138,7 +139,7 @@ func RestoreFileWithDB(dbconn *sql.DB, fileID int64, outputPath string) error { // Use file as reader; close via f.Close() below r = f } else { - r, err = utils.OpenDecompressionReader(containerPath, algo) + r, err = utils_compression.OpenDecompressionReader(containerPath, algo) if err != nil { return fmt.Errorf("open compressed container %q: %w", containerFilename, err) } @@ -221,7 +222,7 @@ func RestoreFileWithDB(dbconn *sql.DB, fileID int64, outputPath string) error { fmt.Printf("File %s restored successfully\n", originalName) fmt.Printf(" Output: %s\n", outputPath) fmt.Printf(" SHA256: %s\n", restoredHash) - utils.PrintDuration(start) + utils_print.PrintDuration(start) return nil } diff --git a/internal/storage/store.go b/internal/storage/store.go index 49cfd6e..9c17d46 100644 --- a/internal/storage/store.go +++ b/internal/storage/store.go @@ -14,7 +14,7 @@ import ( "github.com/franchoy/coldkeep/internal/chunk" "github.com/franchoy/coldkeep/internal/container" "github.com/franchoy/coldkeep/internal/db" - "github.com/franchoy/coldkeep/internal/utils" + "github.com/franchoy/coldkeep/internal/utils_print" ) func StoreFile(path string) error { @@ -415,11 +415,11 @@ func StoreFileWithDB(dbconn *sql.DB, path string) (err error) { // Mark the operation as completed to avoid aborting it in the deferred function completed = true - utils.PrintSuccess("File stored successfully") + utils_print.PrintSuccess("File stored successfully") fmt.Printf(" FileID: %d\n", fileID) fmt.Printf(" Path: %s\n", path) fmt.Printf(" SHA256: %s\n", fileHash) - utils.PrintDuration(start) + utils_print.PrintDuration(start) return nil } @@ -476,8 +476,8 @@ func StoreFolder(root string) error { } } - utils.PrintSuccess("Folder stored successfully") - utils.PrintDuration(start) + utils_print.PrintSuccess("Folder stored successfully") + utils_print.PrintDuration(start) return nil } diff --git a/internal/utils/print.go b/internal/utils/print.go deleted file mode 100644 index 5907bc0..0000000 --- a/internal/utils/print.go +++ /dev/null @@ -1,14 +0,0 @@ -package utils - -import ( - "fmt" - "time" -) - -func PrintSuccess(title string) { - fmt.Println(title) -} - -func PrintDuration(start time.Time) { - fmt.Printf(" Time: %v\n", time.Since(start)) -} diff --git a/internal/utils/compression.go b/internal/utils_compression/compression.go similarity index 70% rename from internal/utils/compression.go rename to internal/utils_compression/compression.go index 71fc2e4..f056c10 100644 --- a/internal/utils/compression.go +++ b/internal/utils_compression/compression.go @@ -1,7 +1,9 @@ -package utils +package utils_compression import ( "compress/gzip" + "crypto/sha256" + "encoding/hex" "fmt" "io" "os" @@ -19,19 +21,23 @@ const ( var DefaultCompression = CompressionNone //CompressionZstd -func CompressFile(path string, algo CompressionType) (string, int64, error) { +func CompressFile(path string, algo CompressionType) (string, int64, string, error) { // No compression if algo == CompressionNone { info, err := os.Stat(path) if err != nil { - return "", 0, err + return "", 0, "", err } - return path, info.Size(), nil + sumHex, err := ComputeFileHashHex(path) + if err != nil { + return "", 0, "", fmt.Errorf("compute file hash: %w", err) + } + return path, info.Size(), sumHex, nil } input, err := os.Open(path) if err != nil { - return "", 0, err + return "", 0, "", err } defer input.Close() @@ -39,7 +45,7 @@ func CompressFile(path string, algo CompressionType) (string, int64, error) { output, err := os.Create(outputPath) if err != nil { - return "", 0, err + return "", 0, "", err } var writer io.WriteCloser @@ -52,13 +58,13 @@ func CompressFile(path string, algo CompressionType) (string, int64, error) { encoder, err := zstd.NewWriter(output) if err != nil { output.Close() - return "", 0, err + return "", 0, "", err } writer = encoder default: output.Close() - return "", 0, fmt.Errorf("unknown compression algorithm: %q", algo) + return "", 0, "", fmt.Errorf("unknown compression algorithm: %q", algo) } // Copy data @@ -66,49 +72,73 @@ func CompressFile(path string, algo CompressionType) (string, int64, error) { _ = writer.Close() _ = output.Close() _ = os.Remove(outputPath) // cleanup broken file - return "", 0, err + return "", 0, "", err } // Close compression writer (flush buffers) if err := writer.Close(); err != nil { _ = output.Close() _ = os.Remove(outputPath) - return "", 0, err + return "", 0, "", err } // Ensure file is flushed to disk if err := output.Sync(); err != nil { _ = output.Close() _ = os.Remove(outputPath) - return "", 0, err + return "", 0, "", err } if err := output.Close(); err != nil { _ = os.Remove(outputPath) - return "", 0, err + return "", 0, "", err } // Validate compressed file info, err := os.Stat(outputPath) if err != nil { _ = os.Remove(outputPath) - return "", 0, err + return "", 0, "", err } if info.Size() == 0 { _ = os.Remove(outputPath) - return "", 0, fmt.Errorf("compressed file is empty") + return "", 0, "", fmt.Errorf("compressed file is empty") } // Only now remove original if err := os.Remove(path); err != nil { - return "", 0, err + return "", 0, "", err + } + + // Compute hash of compressed file + sumHex, err := ComputeFileHashHex(outputPath) + if err != nil { + return "", 0, "", fmt.Errorf("compute compressed file hash: %w", err) + } + + return outputPath, info.Size(), sumHex, nil +} + +func ComputeFileHashHex(path string) (string, error) { + + file, err := os.Open(path) + if err != nil { + return "", err + } + defer file.Close() + + hash := sha256.New() + + _, err = io.Copy(hash, file) + if err != nil { + return "", err } - return outputPath, info.Size(), nil + return hex.EncodeToString(hash.Sum(nil)), nil } -// zstd wrapper to satisfy io.ReadCloser +// zstd wrapper to satisfy io.ReadClose type zstdReadCloser struct { decoder *zstd.Decoder file *os.File diff --git a/internal/utils/env.go b/internal/utils_env/env.go similarity index 95% rename from internal/utils/env.go rename to internal/utils_env/env.go index 8703da2..a29a75e 100644 --- a/internal/utils/env.go +++ b/internal/utils_env/env.go @@ -1,4 +1,4 @@ -package utils +package utils_env import ( "fmt" diff --git a/internal/utils_print/print.go b/internal/utils_print/print.go new file mode 100644 index 0000000..2dd749b --- /dev/null +++ b/internal/utils_print/print.go @@ -0,0 +1,23 @@ +package utils_print + +import ( + "fmt" + "time" +) + +const MaxErrorsToPrint = 50 + +func PrintSuccess(title string) { + fmt.Println(title) +} + +func PrintDuration(start time.Time) { + fmt.Printf(" Time: %v\n", time.Since(start)) +} + +func AppendToErrorList(errorList []error, err error) []error { + if len(errorList) < MaxErrorsToPrint { + return append(errorList, err) + } + return errorList +} diff --git a/internal/verify/verify_chunk.go b/internal/verify/verify_chunk.go new file mode 100644 index 0000000..b96c7cb --- /dev/null +++ b/internal/verify/verify_chunk.go @@ -0,0 +1,262 @@ +package verify + +import ( + "database/sql" + "fmt" + "log" + + "github.com/franchoy/coldkeep/internal/utils_print" +) + +func checkReferenceCounts(dbconn *sql.DB) error { + // Check that all chunks have correct reference counts (chunk.ref_count should match the actual number of file_chunk references) + log.Printf("Checking chunk reference counts consistency...") + var errorList []error + var errorCount int + rows, err := dbconn.Query(` + SELECT chunk.id, + chunk.ref_count, + COUNT(file_chunk.chunk_id) AS actual + FROM chunk + LEFT JOIN file_chunk + ON chunk.id = file_chunk.chunk_id + GROUP BY chunk.id + HAVING chunk.ref_count != COUNT(file_chunk.chunk_id) + `) + if err != nil { + log.Println(" ERROR ") + log.Printf("Failed to query chunk reference counts: %v", err) + return fmt.Errorf("failed to query chunk reference counts: %w", err) + } + defer rows.Close() + + type chunkRefCount struct { + id int + refCount int + actual int + } + + for rows.Next() { + var c chunkRefCount + if err := rows.Scan(&c.id, &c.refCount, &c.actual); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("failed to scan inconsistent chunk reference count: %w", err)) + continue + } + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("Inconsistent chunk reference count: chunk ID %d has ref_count=%d but actual references=%d", c.id, c.refCount, c.actual)) + } + + if err := rows.Err(); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("row iteration failed: %w", err)) + } + + if len(errorList) > 0 { + log.Println(" ERROR ") + log.Printf("Found %d errors in checkReferenceCounts checks : ", errorCount) + if errorCount > utils_print.MaxErrorsToPrint { + log.Printf("showing only first %d:", len(errorList)) + } + for _, err := range errorList { + log.Printf(" - %v", err) + } + return fmt.Errorf("found %d errors in checkReferenceCounts checks", errorCount) + } + log.Println(" SUCCESS ") + return nil +} + +func checkOrphanChunks(dbconn *sql.DB) error { + // Check that there are no orphan chunks (chunks with ref_count > 0 but no file_chunk references) + log.Printf("Checking for orphan chunks with ref_count > 0 but no file_chunk references...") + var errorList []error + var errorCount int + rows, err := dbconn.Query(`SELECT chunk.id + FROM chunk + LEFT JOIN file_chunk ON chunk.id = file_chunk.chunk_id + WHERE file_chunk.chunk_id IS NULL AND chunk.ref_count > 0;`) + if err != nil { + log.Println(" ERROR ") + log.Printf("Failed to query orphan chunks: %v", err) + return fmt.Errorf("failed to query orphan chunks: %w", err) + } + defer rows.Close() + + for rows.Next() { + var id int + if err := rows.Scan(&id); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("failed to scan orphan chunk: %w", err)) + continue + } + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("orphan chunk found: chunk ID %d has ref_count > 0 but no file_chunk references", id)) + } + + if err := rows.Err(); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("row iteration failed: %w", err)) + } + + if len(errorList) > 0 { + log.Println(" ERROR ") + log.Printf("Found %d errors in checkOrphanChunks checks:", errorCount) + if errorCount > utils_print.MaxErrorsToPrint { + log.Printf("showing only first %d:", len(errorList)) + } + for _, err := range errorList { + log.Printf(" - %v", err) + } + return fmt.Errorf("found %d errors in checkOrphanChunks checks", errorCount) + } + log.Println(" SUCCESS ") + return nil +} + +func checkChunkOffsets(dbconn *sql.DB) error { + // Check that all chunks have location (container_id + chunk_offset) consistent with their status + // if status = COMPLETED → container_id NOT NULL chunk_offset NOT NULL + + log.Printf("Checking chunk offsets consistency with status...") + var errorList []error + var errorCount int + rows1, err := dbconn.Query(`SELECT id, container_id, chunk_offset, size, status + FROM chunk + WHERE status = 'COMPLETED' + AND (container_id IS NULL OR chunk_offset IS NULL);`) + if err != nil { + log.Println(" ERROR ") + log.Printf("Failed to query completed chunks: %v", err) + return fmt.Errorf("failed to query completed chunks: %w", err) + } + defer rows1.Close() + + type chunkInfo struct { + id int + containerID int + chunkOffset int64 + size int64 + status string + } + + for rows1.Next() { + var c chunkInfo + if err := rows1.Scan(&c.id, &c.containerID, &c.chunkOffset, &c.size, &c.status); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("failed to scan completed chunk: %w", err)) + continue + } + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("chunk ID %d has status COMPLETED but missing location info: container_id=%v chunk_offset=%v", c.id, c.containerID, c.chunkOffset)) + } + + if err := rows1.Err(); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("row iteration failed: %w", err)) + } + + rows1.Close() + + // if status != COMPLETED → container_id NULL chunk_offset NULL + rows2, err := dbconn.Query(`SELECT id, container_id, chunk_offset, size, status + FROM chunk + WHERE status != 'COMPLETED' + AND (container_id IS NOT NULL OR chunk_offset IS NOT NULL);`) + if err != nil { + log.Println(" ERROR ") + log.Printf("Failed to query non-completed chunks: %v", err) + return fmt.Errorf("failed to query non-completed chunks: %w", err) + } + defer rows2.Close() + + for rows2.Next() { + var c chunkInfo + if err := rows2.Scan(&c.id, &c.containerID, &c.chunkOffset, &c.size, &c.status); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("failed to scan non-completed chunk: %w", err)) + continue + } + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("chunk ID %d has status %s but has location info: container_id=%v chunk_offset=%v", c.id, c.status, c.containerID, c.chunkOffset)) + } + + if err := rows2.Err(); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("row iteration failed: %w", err)) + } + + if len(errorList) > 0 { + log.Println(" ERROR ") + log.Printf("Found %d errors in checkChunkOffsets checks : ", errorCount) + if errorCount > utils_print.MaxErrorsToPrint { + log.Printf("showing only first %d:", len(errorList)) + } + for _, err := range errorList { + log.Printf(" - %v", err) + } + return fmt.Errorf("found %d errors in checkChunkOffsets checks", errorCount) + } + + log.Println(" SUCCESS ") + return nil +} + +func checkChunkOffsetValidity(dbconn *sql.DB) error { + // Check that all chunks with status = COMPLETED have valid container_id and chunk_offset values + // and that the chunk_offset + size does not exceed the container's current_size + log.Printf("Checking chunk offset validity for completed chunks...") + var errorList []error + var errorCount int + rows, err := dbconn.Query(`SELECT c.id, c.container_id, c.chunk_offset, c.size, cont.current_size + FROM chunk c + JOIN container cont ON c.container_id = cont.id + WHERE c.status = 'COMPLETED';`) + if err != nil { + log.Println(" ERROR ") + log.Printf("Failed to query completed chunks for offset validity: %v", err) + return fmt.Errorf("failed to query completed chunks for offset validity: %w", err) + } + defer rows.Close() + + type chunkInfo struct { + id int + containerID int + chunkOffset int64 + size int64 + containerSize int64 + } + + for rows.Next() { + var c chunkInfo + if err := rows.Scan(&c.id, &c.containerID, &c.chunkOffset, &c.size, &c.containerSize); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("failed to scan completed chunk for offset validity: %w", err)) + continue + } + + if c.chunkOffset < 0 || c.size <= 0 || c.chunkOffset > c.containerSize-c.size { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("chunk ID %d in container %d has invalid location: chunk_offset=%d size=%d container_size=%d", c.id, c.containerID, c.chunkOffset, c.size, c.containerSize)) + } + } + + if err := rows.Err(); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("row iteration failed: %w", err)) + } + + if len(errorList) > 0 { + log.Println(" ERROR ") + log.Printf("Found %d errors in checkChunkOffsetValidity checks :", errorCount) + if errorCount > utils_print.MaxErrorsToPrint { + log.Printf("showing only first %d:", len(errorList)) + } + for _, err := range errorList { + log.Printf(" - %v", err) + } + return fmt.Errorf("found %d errors in checkChunkOffsetValidity checks", errorCount) + } + log.Println(" SUCCESS ") + return nil +} diff --git a/internal/verify/verify_constants.go b/internal/verify/verify_constants.go new file mode 100644 index 0000000..844d5e0 --- /dev/null +++ b/internal/verify/verify_constants.go @@ -0,0 +1,22 @@ +package verify + +type VerifyLevel int + +const ( + VerifyStandard VerifyLevel = iota + VerifyFull + VerifyDeep +) + +func VerifyLevelString(v VerifyLevel) string { + switch v { + case VerifyStandard: + return "standard" + case VerifyFull: + return "full" + case VerifyDeep: + return "deep" + default: + return "unknown" + } +} diff --git a/internal/verify/verify_container.go b/internal/verify/verify_container.go new file mode 100644 index 0000000..7de0b95 --- /dev/null +++ b/internal/verify/verify_container.go @@ -0,0 +1,256 @@ +package verify + +import ( + "database/sql" + "fmt" + "log" + "os" + "path/filepath" + + "github.com/franchoy/coldkeep/internal/container" + "github.com/franchoy/coldkeep/internal/utils_compression" + "github.com/franchoy/coldkeep/internal/utils_print" +) + +func checkContainersFileExistence(dbconn *sql.DB) error { + // Check that all containers have their files present on disk + // and that the file sizes match the DB records + log.Printf("Checking container file existence and size consistency...") + var errorList []error + var errorCount int + rows, err := dbconn.Query(`select id, filename, compression_algorithm, current_size + from container + where quarantine = false and sealed = true`) + if err != nil { + log.Println(" ERROR ") + log.Printf("Failed to query container files: %v", err) + return fmt.Errorf("failed to query container files: %w", err) + } + defer rows.Close() + + for rows.Next() { + var id int + var filename string + var compressionalgo string + var currentSize int64 + if err := rows.Scan(&id, &filename, &compressionalgo, ¤tSize); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("failed to scan container file: %w", err)) + continue + } + // Check if the file exists on disk and has the correct size + if err := checkContainerFile(id, filename, compressionalgo, currentSize); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("container file check failed for container %d: %w", id, err)) + } + } + + if err := rows.Err(); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("row iteration failed: %w", err)) + } + + if len(errorList) > 0 { + log.Println(" ERROR ") + log.Printf("Found %d errors in checkContainersFileExistence checks:", errorCount) + if errorCount > utils_print.MaxErrorsToPrint { + log.Printf("showing only first %d:", len(errorList)) + } + for _, err := range errorList { + log.Printf(" - %v", err) + } + return fmt.Errorf("found %d errors in checkContainersFileExistence checks", errorCount) + } + log.Println(" SUCCESS ") + + return nil +} + +func checkContainerFile(id int, filename string, compressionalgo string, currentSize int64) error { + // Check if the file exists on disk and has the correct size + if compressionalgo != "" && compressionalgo != string(utils_compression.CompressionNone) { + filename = filename + "." + compressionalgo + } + + fullPath := filepath.Join(container.ContainersDir, filename) + + info, err := os.Stat(fullPath) + if err != nil { + return err + } + + // check if file exists + if !info.Mode().IsRegular() { + return fmt.Errorf("file does not exist or is not a regular file: %s", fullPath) + } + + // check if file size matches the DB record + if info.Size() != currentSize { + return fmt.Errorf("file size mismatch: expected %d, got %d", currentSize, info.Size()) + } + + return nil +} + +func checkChunkContainerConsistency(dbconn *sql.DB) error { + // Check that all chunks are correctly associated with their containers + // if container_id != NULL → chunk.status must be COMPLETED + log.Printf("Checking chunk-container consistency...") + var errorList []error + var errorCount int + rows, err := dbconn.Query(`SELECT id + FROM chunk + WHERE container_id IS NOT NULL + AND status != 'COMPLETED';`) + if err != nil { + log.Println(" ERROR ") + log.Printf("Failed to query chunk-container consistency: %v", err) + return fmt.Errorf("failed to query chunk-container consistency: %w", err) + } + defer rows.Close() + + for rows.Next() { + var id int + if err := rows.Scan(&id); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("failed to scan inconsistent chunk: %w", err)) + continue + } + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("chunk with id %d has container_id but status is not COMPLETED", id)) + } + + if err := rows.Err(); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("row iteration failed: %w", err)) + } + + if len(errorList) > 0 { + log.Println(" ERROR ") + log.Printf("Found %d errors in checkChunkContainerConsistency checks:", errorCount) + if errorCount > utils_print.MaxErrorsToPrint { + log.Printf("showing only first %d:", len(errorList)) + } + for _, err := range errorList { + log.Printf(" - %v", err) + } + return fmt.Errorf("found %d errors in checkChunkContainerConsistency checks", errorCount) + } + + log.Println(" SUCCESS ") + return nil +} + +func checkContainerHash(dbconn *sql.DB) error { + // Check that all sealed containers have a valid hash that matches the file content + log.Printf("Checking container file hash consistency...") + var errorList []error + var errorCount int + rows, err := dbconn.Query(`select id, filename, compression_algorithm, container_hash + from container + where quarantine = false and sealed = true`) + if err != nil { + log.Println(" ERROR ") + log.Printf("Failed to query container hashes: %v", err) + return fmt.Errorf("failed to query container hashes: %w", err) + } + defer rows.Close() + + var totalRows int + if err := dbconn.QueryRow(`SELECT COUNT(*) FROM container WHERE quarantine = false AND sealed = true`).Scan(&totalRows); err != nil { + log.Printf("Failed to query total container count: %v", err) + return fmt.Errorf("failed to query total container count: %w", err) + } + var containercount int + for rows.Next() { + containercount++ + log.Printf("Checking container %d / %d", containercount, totalRows) + var id int + var filename string + var compressionalgo string + var storedHash string + if err := rows.Scan(&id, &filename, &compressionalgo, &storedHash); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("failed to scan container hash: %w", err)) + continue + } + // Check if the file exists on disk and has the correct hash + if err := container.CheckContainerHashFile(id, filename, storedHash); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("container hash check failed for container %d: %w", id, err)) + } + } + + if err := rows.Err(); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("row iteration failed: %w", err)) + } + + if len(errorList) > 0 { + log.Println(" ERROR ") + log.Printf("Found %d errors in checkContainerHash checks:", errorCount) + if errorCount > utils_print.MaxErrorsToPrint { + log.Printf("showing only first %d:", len(errorList)) + } + for _, err := range errorList { + log.Printf(" - %v", err) + } + return fmt.Errorf("found %d errors in checkContainerHash checks", errorCount) + } + log.Println(" SUCCESS ") + + return nil +} + +func checkContainerCompleteness(dbconn *sql.DB) error { + //sealed containers should not accept new chunks + log.Println("Checking sealed containers for completeness (no new chunks should be added to sealed containers)...") + var errorList []error + var errorCount int + rows, err := dbconn.Query(`SELECT id + FROM container + WHERE sealed = TRUE + AND EXISTS ( + SELECT 1 + FROM chunk + WHERE chunk.container_id = container.id + AND chunk.status != 'COMPLETED' + )`) + if err != nil { + log.Println(" ERROR ") + log.Printf("Failed to query container completeness: %v", err) + return fmt.Errorf("failed to query container completeness: %w", err) + } + defer rows.Close() + + for rows.Next() { + var containerID int + if err := rows.Scan(&containerID); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("failed to scan container info: %w", err)) + continue + } + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("Sealed container with incomplete chunks found: container ID %d has incomplete chunks", containerID)) + } + + if err := rows.Err(); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("row iteration failed: %w", err)) + } + + if len(errorList) > 0 { + log.Println(" ERROR ") + log.Printf("Found %d errors in checkContainerCompleteness checks:", errorCount) + if errorCount > utils_print.MaxErrorsToPrint { + log.Printf("showing only first %d:", len(errorList)) + } + for _, err := range errorList { + log.Printf(" - %v", err) + } + return fmt.Errorf("found %d errors in checkContainerCompleteness checks", errorCount) + } + log.Println(" SUCCESS ") + return nil + +} diff --git a/internal/verify/verify_file.go b/internal/verify/verify_file.go new file mode 100644 index 0000000..7735b12 --- /dev/null +++ b/internal/verify/verify_file.go @@ -0,0 +1,414 @@ +package verify + +import ( + "crypto/sha256" + "database/sql" + "encoding/binary" + "encoding/hex" + "fmt" + "io" + "log" + "os" + "path/filepath" + + "github.com/franchoy/coldkeep/internal/container" + "github.com/franchoy/coldkeep/internal/utils_compression" + "github.com/franchoy/coldkeep/internal/utils_print" +) + +func checkFileChunkOrdering(dbconn *sql.DB) error { + // Check that file_chunks for each file are ordered by chunk_offset without gaps + log.Printf("Checking file chunk ordering and gaps...") + var errorList []error + var errorCount int + rows, err := dbconn.Query(`SELECT id + FROM logical_file lf + WHERE NOT EXISTS ( + SELECT 1 + FROM file_chunk fc + WHERE fc.logical_file_id = lf.id + );`) + if err != nil { + log.Println(" ERROR ") + log.Printf("Failed to query file chunk ordering: %v", err) + return fmt.Errorf("failed to query file chunk ordering: %w", err) + } + defer rows.Close() + + for rows.Next() { + var logicalFileID int + if err := rows.Scan(&logicalFileID); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("failed to scan file chunk info: %w", err)) + continue + } + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("File with no chunks found: logical file ID %d has no chunks", logicalFileID)) + } + + if err := rows.Err(); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("row iteration failed: %w", err)) + } + + if len(errorList) > 0 { + log.Println(" ERROR ") + log.Printf("Found %d errors in checkFileChunkOrdering checks :", errorCount) + if errorCount > utils_print.MaxErrorsToPrint { + log.Printf("showing only first %d:", len(errorList)) + } + for _, err := range errorList { + log.Printf(" - %v", err) + } + return fmt.Errorf("found %d errors in checkFileChunkOrdering checks", errorCount) + } + log.Println(" SUCCESS ") + return nil +} + +func VerifyFileStandard(dbconn *sql.DB, fileId int) error { + log.Printf("starting standard file verification for logical file with ID %d...", fileId) + if fileId <= 0 { + return fmt.Errorf("invalid file ID: %d", fileId) + } + + //ensure file id exists + var id int + var status string + err := dbconn.QueryRow(`SELECT id, + status + from logical_file + where id = ?`, fileId).Scan(&id, &status) + if err != nil { + return fmt.Errorf("failed to check if file exists: %w", err) + } + + if status != "COMPLETED" { + return fmt.Errorf("logical file %d has invalid status: expected COMPLETED but got %s", fileId, status) + } + var hasChunks bool = false + //ensure file_chunks exists for the file + filechunkrows, err := dbconn.Query(`SELECT chunk_id, chunk_order FROM file_chunk WHERE logical_file_id = ? order by chunk_order asc`, fileId) + if err != nil { + return fmt.Errorf("failed to query file chunks: %w", err) + } + defer filechunkrows.Close() + + var chunkIdList []int + var previousChunkOrder int = 0 + for filechunkrows.Next() { + hasChunks = true + + var chunkId int + var chunkOrder int + if err := filechunkrows.Scan(&chunkId, &chunkOrder); err != nil { + return fmt.Errorf("failed to scan file chunk info: %w", err) + } + chunkIdList = append(chunkIdList, chunkId) + //if there is any missing chunk_order, it means the file chunk ordering is wrong + if chunkOrder != previousChunkOrder { + return fmt.Errorf("file chunk ordering error: expected chunk_order %d but got %d for chunk ID %d", previousChunkOrder, chunkOrder, chunkId) + } + previousChunkOrder++ + } + + if err := filechunkrows.Err(); err != nil { + return fmt.Errorf("row iteration failed: %w", err) + } + + if !hasChunks { + return fmt.Errorf("logical file %d has no chunks", fileId) + } + + //ensure all chunks exists with status COMPLETED and have valid container_id and chunk_offset + chunkrows, err := dbconn.Query(`SELECT + c.id, + c.container_id, + c.chunk_offset, + c.size, + c.status + FROM chunk c + JOIN file_chunk fc ON fc.chunk_id = c.id + WHERE fc.logical_file_id = ? + ORDER BY fc.chunk_order ASC`, fileId) + if err != nil { + return fmt.Errorf("failed to query chunks: %w", err) + } + defer chunkrows.Close() + + var chunkCount int + for chunkrows.Next() { + var id int + var containerId sql.NullInt64 + var chunkOffset sql.NullInt64 + var size int64 + var status string + if err := chunkrows.Scan(&id, &containerId, &chunkOffset, &size, &status); err != nil { + return fmt.Errorf("failed to scan chunk info: %w", err) + } + if status != "COMPLETED" { + return fmt.Errorf("chunk with ID %d has invalid status: expected COMPLETED but got %s", id, status) + } + if !containerId.Valid { + return fmt.Errorf("chunk with ID %d has invalid location info: container_id is NULL", id) + } + if !chunkOffset.Valid { + return fmt.Errorf("chunk with ID %d has invalid location info: chunk_offset is NULL", id) + } + chunkCount++ + } + if err := chunkrows.Err(); err != nil { + return fmt.Errorf("row iteration failed: %w", err) + } + + if chunkCount != len(chunkIdList) { + return fmt.Errorf("chunk count mismatch: expected %d but got %d", len(chunkIdList), chunkCount) + } + + log.Printf("standard file verification for logical file with ID %d completed successfully", fileId) + + return nil +} + +func verifyFileContainersAndOffsets(db *sql.DB, fileID int) error { + rows, err := db.Query(` + SELECT + c.id, + c.chunk_offset, + c.size, + ctr.id, + ctr.filename, + ctr.current_size, + ctr.sealed, + ctr.quarantine, + ctr.container_hash, + ctr.compression_algorithm + FROM file_chunk fc + JOIN chunk c ON c.id = fc.chunk_id + JOIN container ctr ON ctr.id = c.container_id + WHERE fc.logical_file_id = ? + ORDER BY fc.chunk_order + `, fileID) + if err != nil { + return fmt.Errorf("query file containers and offsets: %w", err) + } + defer rows.Close() + + const chunkRecordHeaderSize = int64(32 + 4) + + type containerInfo struct { + path string + physicalSize int64 + currentSize int64 + algo utils_compression.CompressionType + } + + containerInfoByID := map[int64]containerInfo{} + + for rows.Next() { + var chunkID int + var chunkOffset int64 + var chunkSize int64 + var containerID int64 + var filename string + var currentSize int64 + var sealed bool + var quarantine bool + var containerHash string + var compressionAlgorithm string + + if err := rows.Scan( + &chunkID, + &chunkOffset, + &chunkSize, + &containerID, + &filename, + ¤tSize, + &sealed, + &quarantine, + &containerHash, + &compressionAlgorithm, + ); err != nil { + return fmt.Errorf("scan file containers and offsets: %w", err) + } + + if !sealed { + return fmt.Errorf("container %d is not sealed", containerID) + } + if quarantine { + return fmt.Errorf("container %d is quarantined", containerID) + } + if containerHash == "" { + return fmt.Errorf("container %d missing hash", containerID) + } + + info, ok := containerInfoByID[containerID] + if !ok { + algo := utils_compression.CompressionType(compressionAlgorithm) + containerFilename := filename + if algo != utils_compression.CompressionNone { + containerFilename = filename + "." + compressionAlgorithm + } + + fullPath := filepath.Join(container.ContainersDir, containerFilename) + stat, err := os.Stat(fullPath) + if err != nil { + return fmt.Errorf("missing container file: %s: %w", fullPath, err) + } + + info = containerInfo{ + path: fullPath, + physicalSize: stat.Size(), + currentSize: currentSize, + algo: algo, + } + containerInfoByID[containerID] = info + + if algo == utils_compression.CompressionNone && stat.Size() != currentSize { + return fmt.Errorf("container %d size mismatch: expected %d got %d", containerID, currentSize, stat.Size()) + } + } + + recordEnd := chunkOffset + chunkRecordHeaderSize + chunkSize + if recordEnd > info.currentSize { + return fmt.Errorf("chunk %d exceeds container %d bounds in metadata", chunkID, containerID) + } + if info.algo == utils_compression.CompressionNone && recordEnd > info.physicalSize { + return fmt.Errorf("chunk %d exceeds container %d physical file size", chunkID, containerID) + } + } + + if err := rows.Err(); err != nil { + return fmt.Errorf("iterate file containers and offsets: %w", err) + } + + return nil +} + +func VerifyFileFull(dbconn *sql.DB, fileId int) error { + if err := VerifyFileStandard(dbconn, fileId); err != nil { + return fmt.Errorf("standard verification failed: %w", err) + } + + log.Printf("starting Full file verification for logical file with ID %d...", fileId) + if err := verifyFileContainersAndOffsets(dbconn, fileId); err != nil { + return fmt.Errorf("container and offset verification failed: %w", err) + } + log.Printf("Full file verification for logical file with ID %d completed successfully", fileId) + + return nil +} + +func verifyFileChunkHashes(db *sql.DB, fileID int) error { + rows, err := db.Query(` + SELECT + c.id, + c.chunk_offset, + c.size, + c.chunk_hash, + ctr.filename, + ctr.compression_algorithm + FROM file_chunk fc + JOIN chunk c ON c.id = fc.chunk_id + JOIN container ctr ON ctr.id = c.container_id + WHERE fc.logical_file_id = ? + ORDER BY ctr.id, c.chunk_offset + `, fileID) + if err != nil { + return fmt.Errorf("query file chunk hashes: %w", err) + } + defer rows.Close() + + for rows.Next() { + var chunkID int + var chunkOffset int64 + var expectedSize int64 + var expectedChunkHash string + var filename string + var compressionAlgorithm string + + if err := rows.Scan( + &chunkID, + &chunkOffset, + &expectedSize, + &expectedChunkHash, + &filename, + &compressionAlgorithm, + ); err != nil { + return fmt.Errorf("scan file chunk hashes: %w", err) + } + + containerFilename := filename + algo := utils_compression.CompressionType(compressionAlgorithm) + if algo != utils_compression.CompressionNone { + containerFilename = filename + "." + compressionAlgorithm + } + + r, err := utils_compression.OpenDecompressionReader(filepath.Join(container.ContainersDir, containerFilename), algo) + if err != nil { + return fmt.Errorf("open container for chunk %d: %w", chunkID, err) + } + + if _, err := io.CopyN(io.Discard, r, chunkOffset); err != nil { + _ = r.Close() + return fmt.Errorf("seek chunk %d within container stream: %w", chunkID, err) + } + + headerHash := make([]byte, sha256.Size) + if _, err := io.ReadFull(r, headerHash); err != nil { + _ = r.Close() + return fmt.Errorf("read chunk %d header hash: %w", chunkID, err) + } + + sizeBuf := make([]byte, 4) + if _, err := io.ReadFull(r, sizeBuf); err != nil { + _ = r.Close() + return fmt.Errorf("read chunk %d header size: %w", chunkID, err) + } + + recordSize := int64(binary.LittleEndian.Uint32(sizeBuf)) + if recordSize != expectedSize { + _ = r.Close() + return fmt.Errorf("chunk %d size mismatch: expected %d got %d", chunkID, expectedSize, recordSize) + } + + chunkData := make([]byte, recordSize) + if _, err := io.ReadFull(r, chunkData); err != nil { + _ = r.Close() + return fmt.Errorf("read chunk %d data: %w", chunkID, err) + } + + if err := r.Close(); err != nil { + return fmt.Errorf("close container reader for chunk %d: %w", chunkID, err) + } + + sum := sha256.Sum256(chunkData) + computedHash := hex.EncodeToString(sum[:]) + if computedHash != expectedChunkHash { + return fmt.Errorf("chunk %d corrupted: expected %s got %s", chunkID, expectedChunkHash, computedHash) + } + if hex.EncodeToString(headerHash) != expectedChunkHash { + return fmt.Errorf("chunk %d record header hash mismatch", chunkID) + } + } + + if err := rows.Err(); err != nil { + return fmt.Errorf("iterate file chunk hashes: %w", err) + } + + return nil +} + +func VerifyFileDeep(dbconn *sql.DB, fileId int) error { + if err := VerifyFileFull(dbconn, fileId); err != nil { + return fmt.Errorf("full verification failed: %w", err) + } + + log.Printf("starting deep file verification for logical file with ID %d...", fileId) + if err := verifyFileChunkHashes(dbconn, fileId); err != nil { + return fmt.Errorf("chunk hash verification failed: %w", err) + } + log.Printf("Deep file verification for logical file with ID %d completed successfully", fileId) + + return nil +} diff --git a/internal/verify/verify_system.go b/internal/verify/verify_system.go new file mode 100644 index 0000000..ff13674 --- /dev/null +++ b/internal/verify/verify_system.go @@ -0,0 +1,365 @@ +package verify + +import ( + "bytes" + "crypto/sha256" + "database/sql" + "encoding/binary" + "encoding/hex" + "fmt" + "io" + "log" + "os" + "path/filepath" + + "github.com/franchoy/coldkeep/internal/chunk" + "github.com/franchoy/coldkeep/internal/container" + "github.com/franchoy/coldkeep/internal/utils_compression" + "github.com/franchoy/coldkeep/internal/utils_print" +) + +func printCounters(dbconn *sql.DB) error { + var containerCount, chunkCount, fileCount int + //list container counter to be checked + err := dbconn.QueryRow("SELECT COUNT(*) FROM container").Scan(&containerCount) + if err != nil { + return fmt.Errorf("failed to query container count: %w", err) + } + //list chunk counter to be checked + err = dbconn.QueryRow("SELECT COUNT(*) FROM chunk").Scan(&chunkCount) + if err != nil { + return fmt.Errorf("failed to query chunk count: %w", err) + } + //list file counter to be checked + err = dbconn.QueryRow("SELECT COUNT(*) FROM logical_file").Scan(&fileCount) + if err != nil { + return fmt.Errorf("failed to query logical file count: %w", err) + } + + log.Printf("Starting verification: %d containers, %d chunks, %d logical files to check", containerCount, chunkCount, fileCount) + + return nil +} + +func VerifySystemStandard(dbconn *sql.DB) error { + // standard + // reference count check + // orphan chunk check + // file chunk ordering check + log.Printf("Starting standard system verification...") + + var err error + + //print counters to be checked + if err := printCounters(dbconn); err != nil { + return err + } + + //check that all chunks have correct reference counts (chunk.ref_count should match the actual number of file_chunk references) + if err = checkReferenceCounts(dbconn); err != nil { + return err + } + + //check that there are no orphan chunks (chunks with ref_count > 0 but no file_chunk references) + if err = checkOrphanChunks(dbconn); err != nil { + return err + } + + //check that file_chunks for each file are ordered by chunk_offset without gaps + if err = checkFileChunkOrdering(dbconn); err != nil { + return err + } + + log.Printf("Standard system verification completed successfully.") + + return nil +} + +func VerifySystemFull(dbconn *sql.DB) error { + // full + // standard checks + + // container file existence and size check + // container hash check + // chunk-container consistency check + // chunk offsets consistency check + // chunk offset validity check + // checkContainerCompleteness + log.Printf("Starting Full system verification...") + + var err error + + //first verify standard checks + if err = VerifySystemStandard(dbconn); err != nil { + return err + } + + //check that all containers have their files present on disk and that the file sizes match the DB records + if err = checkContainersFileExistence(dbconn); err != nil { + return err + } + + //check that all sealed containers have a valid hash that matches the file content + if err = checkContainerHash(dbconn); err != nil { + return err + } + + //check that all chunks are correctly associated with their containers (if container_id != NULL → chunk.status must be COMPLETED) + if err = checkChunkContainerConsistency(dbconn); err != nil { + return err + } + + //check that all chunks have location (container_id + chunk_offset) consistent with their status (if status = COMPLETED → container_id NOT NULL chunk_offset NOT NULL, if status != COMPLETED → container_id NULL chunk_offset NULL) + if err = checkChunkOffsets(dbconn); err != nil { + return err + } + + //check that all chunks with status = COMPLETED have valid container_id and chunk_offset values and that the chunk_offset + size does not exceed the container's current_size + if err = checkChunkOffsetValidity(dbconn); err != nil { + return err + } + + //check that sealed containers should not accept new chunks + if err = checkContainerCompleteness(dbconn); err != nil { + return err + } + + log.Printf("Full system verification completed successfully.") + + return nil +} + +func VerifySystemDeep(dbconn *sql.DB) error { + // deep + // standard checks + + // full checks + + // actual file integrity checks (e.g. read container files and verify chunk data against stored hashes) + log.Printf("Starting Deep system verification...") + + var err error + + //first verify full checks + if err = VerifySystemFull(dbconn); err != nil { + return err + } + + //real deep verification + //for each container: + //open container file + //fetch chunks ordered by offset + //read container sequentially + //verify each chunk + log.Println("Starting deep verification of container files...") + var errorList []error + var errorCount int + //retrieve sealer container count + containerCount := 0 + containerCountErr := dbconn.QueryRow(`SELECT COUNT(*) FROM container WHERE sealed=true`).Scan(&containerCount) + if containerCountErr != nil { + log.Println(" ERROR ") + log.Printf("Failed to query sealed container count: %v", containerCountErr) + return fmt.Errorf("failed to query sealed container count: %w", containerCountErr) + } + + processedContainers := 0 + + containers, err := dbconn.Query(`SELECT id, filename, compression_algorithm FROM container WHERE sealed=true`) + if err != nil { + log.Println(" ERROR ") + log.Printf("Failed to query sealed containers: %v", err) + return fmt.Errorf("failed to query sealed containers: %w", err) + } + defer containers.Close() + + maxChunkSize := chunk.MaxChunkSize + + for containers.Next() { + processedContainers++ + var containerID int + var filename string + var compressionAlgo string + if err := containers.Scan(&containerID, &filename, &compressionAlgo); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("failed to scan container info: %w", err)) + continue + } + log.Printf("Verifying container %d/%d: %s", processedContainers, containerCount, filename) + + //construct full path with compression extension if needed + fullPath := filepath.Join(container.ContainersDir, filename) + algo := utils_compression.CompressionType(compressionAlgo) + if algo != utils_compression.CompressionNone { + fullPath = fullPath + "." + compressionAlgo + } + + //get file size for validation + info, err := os.Stat(fullPath) + if err != nil { + errorCount++ + log.Printf("Failed to stat container file %s: %v", fullPath, err) + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("failed to stat container file %s: %w", fullPath, err)) + continue + } + fileSize := info.Size() + + //fetch chunks ordered by offset + chunks, err := dbconn.Query(`SELECT chunk_offset, size, chunk_hash + FROM chunk + WHERE container_id = $1 + AND status = 'COMPLETED' + ORDER BY chunk_offset`, containerID) + if err != nil { + log.Printf("Failed to query chunks for container %d: %v", containerID, err) + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("failed to query chunks for container %d: %w", containerID, err)) + continue + } + + hasChunks := false + + for chunks.Next() { + hasChunks = true + var chunkOffset int64 + var chunkSize int64 + var chunkHash string + if err := chunks.Scan(&chunkOffset, &chunkSize, &chunkHash); err != nil { + log.Printf("Failed to scan chunk info for container %d: %v", containerID, err) + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("failed to scan chunk info for container %d: %w", containerID, err)) + continue + } + + if chunkOffset < 0 || chunkSize < 0 { + log.Printf("Invalid chunk offset or size for container %d at offset %d: chunk size %d", containerID, chunkOffset, chunkSize) + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("invalid chunk offset or size for container %d at offset %d: chunk size %d", containerID, chunkOffset, chunkSize)) + continue + } + + if chunkSize > int64(maxChunkSize) { + log.Printf("Chunk size %d exceeds maximum allowed size %d for chunk in container %d", chunkSize, maxChunkSize, containerID) + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("chunk size %d exceeds maximum allowed size %d for chunk in container %d", chunkSize, maxChunkSize, containerID)) + continue + } + + if chunkOffset+chunkSize > fileSize { + log.Printf("Chunk exceeds file size for container %d at offset %d: chunk size %d, file size %d", containerID, chunkOffset, chunkSize, fileSize) + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("chunk exceeds file size for container %d at offset %d: chunk size %d, file size %d", containerID, chunkOffset, chunkSize, fileSize)) + continue + } + + algo := utils_compression.CompressionType(compressionAlgo) + + containerPath := filepath.Join(container.ContainersDir, filename) + + // Open as plain file (seek) or as decompressed stream (skip bytes) + var r io.ReadCloser + var f *os.File + + if algo == utils_compression.CompressionNone { + f, err = os.Open(containerPath) + if err != nil { + return fmt.Errorf("open container %q: %w", filename, err) + } + // Seek to record start + if _, err := f.Seek(chunkOffset, io.SeekStart); err != nil { + _ = f.Close() + return fmt.Errorf("seek container %q to offset %d: %w", filename, chunkOffset, err) + } + // Use file as reader; close via f.Close() below + r = f + } else { + r, err = utils_compression.OpenDecompressionReader(containerPath, algo) + if err != nil { + return fmt.Errorf("open compressed container %q: %w", filename, err) + } + // Skip to record offset inside the *uncompressed* stream + if _, err := io.CopyN(io.Discard, r, chunkOffset); err != nil { + _ = r.Close() + return fmt.Errorf("skip to chunk offset in decompressed stream for container %q: %w", filename, err) + } + } + + // Read record header + headerHash := make([]byte, 32) + if _, err := io.ReadFull(r, headerHash); err != nil { + _ = r.Close() + return fmt.Errorf("read chunk header hash for container %q: %w", filename, err) + } + + sizeBuf := make([]byte, 4) + if _, err := io.ReadFull(r, sizeBuf); err != nil { + _ = r.Close() + return fmt.Errorf("read chunk header size for container %q: %w", filename, err) + } + recordSize := int64(binary.LittleEndian.Uint32(sizeBuf)) + + if recordSize != chunkSize { + _ = r.Close() + return fmt.Errorf("chunk size mismatch at offset %d (db=%d record=%d) for container %q", chunkOffset, chunkSize, recordSize, filename) + } + + // Read chunk data + chunkData := make([]byte, recordSize) + if _, err := io.ReadFull(r, chunkData); err != nil { + _ = r.Close() + return fmt.Errorf("read chunk data for container %q: %w", filename, err) + } + + // Close container reader + if err := r.Close(); err != nil { + return fmt.Errorf("close container reader for container %q: %w", filename, err) + } + + // Validate hashes (DB hash and on-disk record hash) + sum := sha256.Sum256(chunkData) + sumHex := hex.EncodeToString(sum[:]) + + if sumHex != chunkHash { + return fmt.Errorf("chunk hash mismatch at offset %d (db=%s computed=%s) for container %q", chunkOffset, chunkHash, sumHex, filename) + } + if !bytes.Equal(sum[:], headerHash) { + return fmt.Errorf("chunk record header hash mismatch at offset %d for container %q", chunkOffset, filename) + } + + } + + if !hasChunks { + log.Printf("WARNING: container %d has no chunks", containerID) + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("container %d has no chunks", containerID)) + _ = chunks.Close() + continue + } + + if err := chunks.Err(); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("row iteration failed for chunks of container %d: %w", containerID, err)) + _ = chunks.Close() + continue + } + + _ = chunks.Close() + } + if err := containers.Err(); err != nil { + errorCount++ + errorList = utils_print.AppendToErrorList(errorList, fmt.Errorf("row iteration failed for containers: %w", err)) + return fmt.Errorf("row iteration failed for containers: %w", err) + } + + if len(errorList) > 0 { + log.Println(" ERROR ") + log.Printf("Found %d errors in deep verification of container files:", errorCount) + if errorCount > utils_print.MaxErrorsToPrint { + log.Printf("showing only first %d:", len(errorList)) + } + for _, err := range errorList { + log.Printf(" - %v", err) + } + return fmt.Errorf("found %d errors in deep verification of container files", errorCount) + } + + log.Println("Deep verification completed successfully.") + return nil +} diff --git a/internal/verify/verify_utils.go b/internal/verify/verify_utils.go new file mode 100644 index 0000000..efc7e18 --- /dev/null +++ b/internal/verify/verify_utils.go @@ -0,0 +1 @@ +package verify diff --git a/tests/integration_test.go b/tests/integration_test.go index 3aa0911..0e6fb79 100644 --- a/tests/integration_test.go +++ b/tests/integration_test.go @@ -16,7 +16,8 @@ import ( "github.com/franchoy/coldkeep/internal/maintenance" "github.com/franchoy/coldkeep/internal/recovery" "github.com/franchoy/coldkeep/internal/storage" - "github.com/franchoy/coldkeep/internal/utils" + "github.com/franchoy/coldkeep/internal/utils_compression" + "github.com/franchoy/coldkeep/internal/verify" ) // NOTE: @@ -160,6 +161,95 @@ func fetchFileIDByHash(t *testing.T, dbconn *sql.DB, fileHash string) int64 { return id } +type fileChunkRecord struct { + chunkID int64 + containerID int64 + chunkOffset int64 + chunkSize int64 + containerFilename string + compressionAlgorithm string + containerCurrentSize int64 +} + +func setupStoredFileForVerification(t *testing.T, filename string, size int) (*sql.DB, string, int64) { + t.Helper() + requireDB(t) + + tmp := t.TempDir() + container.ContainersDir = filepath.Join(tmp, "containers") + _ = os.Setenv("COLDKEEP_STORAGE_DIR", container.ContainersDir) + resetStorage(t) + + dbconn, err := db.ConnectDB() + if err != nil { + t.Fatalf("connectDB: %v", err) + } + + applySchema(t, dbconn) + resetDB(t, dbconn) + + utils_compression.DefaultCompression = utils_compression.CompressionNone + + inputDir := filepath.Join(tmp, "input") + if err := os.MkdirAll(inputDir, 0o755); err != nil { + dbconn.Close() + t.Fatalf("mkdir inputDir: %v", err) + } + + inPath := createTempFile(t, inputDir, filename, size) + fileHash := sha256File(t, inPath) + + if err := storage.StoreFileWithDB(dbconn, inPath); err != nil { + dbconn.Close() + t.Fatalf("store file: %v", err) + } + + return dbconn, inPath, fetchFileIDByHash(t, dbconn, fileHash) +} + +func fetchFirstChunkRecord(t *testing.T, dbconn *sql.DB, fileID int64) fileChunkRecord { + t.Helper() + + var record fileChunkRecord + err := dbconn.QueryRow(` + SELECT + c.id, + c.container_id, + c.chunk_offset, + c.size, + ctr.filename, + ctr.compression_algorithm, + ctr.current_size + FROM file_chunk fc + JOIN chunk c ON c.id = fc.chunk_id + JOIN container ctr ON ctr.id = c.container_id + WHERE fc.logical_file_id = $1 + ORDER BY fc.chunk_order ASC + LIMIT 1 + `, fileID).Scan( + &record.chunkID, + &record.containerID, + &record.chunkOffset, + &record.chunkSize, + &record.containerFilename, + &record.compressionAlgorithm, + &record.containerCurrentSize, + ) + if err != nil { + t.Fatalf("query first chunk record: %v", err) + } + + return record +} + +func containerPathForRecord(record fileChunkRecord) string { + filename := record.containerFilename + if record.compressionAlgorithm != "" && record.compressionAlgorithm != string(utils_compression.CompressionNone) { + filename += "." + record.compressionAlgorithm + } + return filepath.Join(container.ContainersDir, filename) +} + func TestRoundTripStoreRestore(t *testing.T) { requireDB(t) @@ -179,7 +269,7 @@ func TestRoundTripStoreRestore(t *testing.T) { resetDB(t, dbconn) // Ensure we don't exercise heavy compression here. - utils.DefaultCompression = utils.CompressionNone + utils_compression.DefaultCompression = utils_compression.CompressionNone inputDir := filepath.Join(tmp, "input") _ = os.MkdirAll(inputDir, 0o755) @@ -231,7 +321,7 @@ func TestDedupSameFile(t *testing.T) { applySchema(t, dbconn) resetDB(t, dbconn) - utils.DefaultCompression = utils.CompressionNone + utils_compression.DefaultCompression = utils_compression.CompressionNone inputDir := filepath.Join(tmp, "input") _ = os.MkdirAll(inputDir, 0o755) @@ -271,7 +361,7 @@ func TestStoreFolderParallelSmoke(t *testing.T) { applySchema(t, dbconn) resetDB(t, dbconn) - utils.DefaultCompression = utils.CompressionNone + utils_compression.DefaultCompression = utils_compression.CompressionNone // Build folder with duplicates + shared-chunk variants inputDir := filepath.Join(tmp, "folder") @@ -409,7 +499,7 @@ func TestGCRemovesUnusedContainers(t *testing.T) { applySchema(t, dbconn) resetDB(t, dbconn) - utils.DefaultCompression = utils.CompressionNone + utils_compression.DefaultCompression = utils_compression.CompressionNone inputDir := filepath.Join(tmp, "input") _ = os.MkdirAll(inputDir, 0o755) @@ -461,7 +551,7 @@ func TestGCRemovesUnusedContainers(t *testing.T) { } // Run verify before GC to check for any issues with ref_counts or metadata integrity. - if err := maintenance.RunVerify(maintenance.VerifyStandard); err != nil { + if err := maintenance.VerifyCommand("system", 0, verify.VerifyStandard); err != nil { t.Fatalf("verify standard after GC: %v", err) } @@ -471,7 +561,7 @@ func TestGCRemovesUnusedContainers(t *testing.T) { } // Verify again after dry-run GC to ensure it doesn't break anything. - if err := maintenance.RunVerify(maintenance.VerifyFull); err != nil { + if err := maintenance.VerifyCommand("system", 0, verify.VerifyFull); err != nil { t.Fatalf("verify full after GC: %v", err) } @@ -546,7 +636,7 @@ func TestConcurrentStoreSameFile(t *testing.T) { applySchema(t, dbconn) resetDB(t, dbconn) - utils.DefaultCompression = utils.CompressionNone + utils_compression.DefaultCompression = utils_compression.CompressionNone inputDir := filepath.Join(tmp, "input") _ = os.MkdirAll(inputDir, 0o755) @@ -596,7 +686,7 @@ func TestConcurrentStoreSameChunk(t *testing.T) { applySchema(t, dbconn) resetDB(t, dbconn) - utils.DefaultCompression = utils.CompressionNone + utils_compression.DefaultCompression = utils_compression.CompressionNone inputDir := filepath.Join(tmp, "input") _ = os.MkdirAll(inputDir, 0o755) @@ -672,7 +762,7 @@ func TestRetryAfterAbortedFile(t *testing.T) { applySchema(t, dbconn) resetDB(t, dbconn) - utils.DefaultCompression = utils.CompressionNone + utils_compression.DefaultCompression = utils_compression.CompressionNone inputDir := filepath.Join(tmp, "input") _ = os.MkdirAll(inputDir, 0o755) @@ -722,7 +812,7 @@ func TestRetryAfterAbortedChunk(t *testing.T) { applySchema(t, dbconn) resetDB(t, dbconn) - utils.DefaultCompression = utils.CompressionNone + utils_compression.DefaultCompression = utils_compression.CompressionNone inputDir := filepath.Join(tmp, "input") _ = os.MkdirAll(inputDir, 0o755) @@ -789,7 +879,7 @@ func TestContainerRollover(t *testing.T) { container.SetContainerMaxSize(1 * 1024 * 1024) // 1MB for quick test defer container.SetContainerMaxSize(originalMaxSize) // restore - utils.DefaultCompression = utils.CompressionNone + utils_compression.DefaultCompression = utils_compression.CompressionNone inputDir := filepath.Join(tmp, "input") _ = os.MkdirAll(inputDir, 0o755) @@ -876,7 +966,7 @@ func TestStartupRecoverySimulation(t *testing.T) { applySchema(t, dbconn) resetDB(t, dbconn) - utils.DefaultCompression = utils.CompressionNone + utils_compression.DefaultCompression = utils_compression.CompressionNone inputDir := filepath.Join(tmp, "input") _ = os.MkdirAll(inputDir, 0o755) @@ -1000,7 +1090,7 @@ func TestVerifyStandard(t *testing.T) { applySchema(t, dbconn) resetDB(t, dbconn) - utils.DefaultCompression = utils.CompressionNone + utils_compression.DefaultCompression = utils_compression.CompressionNone inputDir := filepath.Join(tmp, "input") _ = os.MkdirAll(inputDir, 0o755) @@ -1011,7 +1101,7 @@ func TestVerifyStandard(t *testing.T) { } t.Run("passes on clean database", func(t *testing.T) { - if err := maintenance.RunVerify(maintenance.VerifyStandard); err != nil { + if err := maintenance.VerifyCommand("system", 0, verify.VerifyStandard); err != nil { t.Fatalf("RunVerify on clean DB should not fail: %v", err) } }) @@ -1026,7 +1116,7 @@ func TestVerifyStandard(t *testing.T) { dbconn.Exec(`UPDATE chunk SET ref_count = ref_count - 99 WHERE id = (SELECT id FROM chunk LIMIT 1)`) }() - if err := maintenance.RunVerify(maintenance.VerifyStandard); err == nil { + if err := maintenance.VerifyCommand("system", 0, verify.VerifyStandard); err == nil { t.Fatal("RunVerify should have detected the corrupted ref_count but returned nil") } }) @@ -1043,7 +1133,7 @@ func TestVerifyStandard(t *testing.T) { dbconn.Exec(`DELETE FROM chunk WHERE chunk_hash = 'orphan_chunk_hash_test'`) }() - if err := maintenance.RunVerify(maintenance.VerifyStandard); err == nil { + if err := maintenance.VerifyCommand("system", 0, verify.VerifyStandard); err == nil { t.Fatal("RunVerify should have detected the orphan chunk but returned nil") } }) @@ -1062,7 +1152,7 @@ func TestVerifyStandard(t *testing.T) { t.Fatalf("remove container file: %v", err) } - if err := maintenance.RunVerify(maintenance.VerifyFull); err == nil { + if err := maintenance.VerifyCommand("system", 0, verify.VerifyFull); err == nil { t.Fatal("verify full should detect missing container file") } }) @@ -1085,7 +1175,7 @@ func TestVerifyFull(t *testing.T) { applySchema(t, dbconn) resetDB(t, dbconn) - utils.DefaultCompression = utils.CompressionNone + utils_compression.DefaultCompression = utils_compression.CompressionNone inputDir := filepath.Join(tmp, "input") _ = os.MkdirAll(inputDir, 0o755) @@ -1096,7 +1186,7 @@ func TestVerifyFull(t *testing.T) { } t.Run("passes on clean database", func(t *testing.T) { - if err := maintenance.RunVerify(maintenance.VerifyFull); err != nil { + if err := maintenance.VerifyCommand("system", 0, verify.VerifyFull); err != nil { t.Fatalf("RunVerify full on clean DB should not fail: %v", err) } }) @@ -1112,7 +1202,7 @@ func TestVerifyFull(t *testing.T) { dbconn.Exec(`DELETE FROM chunk WHERE chunk_hash = 'verify_full_bad_chunk'`) }() - if err := maintenance.RunVerify(maintenance.VerifyFull); err == nil { + if err := maintenance.VerifyCommand("system", 0, verify.VerifyFull); err == nil { t.Fatal("RunVerify full should have detected malformed completed chunk but returned nil") } }) @@ -1131,12 +1221,162 @@ func TestVerifyFull(t *testing.T) { t.Fatalf("remove container file: %v", err) } - if err := maintenance.RunVerify(maintenance.VerifyFull); err == nil { + if err := maintenance.VerifyCommand("system", 0, verify.VerifyFull); err == nil { t.Fatal("verify full should detect missing container file") } }) } +func TestVerifyFileDeepDetectsChunkDataCorruption(t *testing.T) { + dbconn, _, fileID := setupStoredFileForVerification(t, "verify_file_deep_corruption.bin", 512*1024) + defer dbconn.Close() + + record := fetchFirstChunkRecord(t, dbconn, fileID) + containerPath := containerPathForRecord(record) + + file, err := os.OpenFile(containerPath, os.O_RDWR, 0) + if err != nil { + t.Fatalf("open container file: %v", err) + } + + corruptionOffset := record.chunkOffset + 32 + 4 + if record.chunkSize > 10 { + corruptionOffset += 10 + } + + if _, err := file.WriteAt([]byte{0xFF}, corruptionOffset); err != nil { + _ = file.Close() + t.Fatalf("corrupt chunk byte: %v", err) + } + if err := file.Close(); err != nil { + t.Fatalf("close container file: %v", err) + } + + if err := maintenance.VerifyCommand("file", int(fileID), verify.VerifyDeep); err == nil { + t.Fatal("verify file --deep should detect chunk data corruption") + } +} + +func TestVerifyFileStandardPassesOnCleanStoredFile(t *testing.T) { + dbconn, _, fileID := setupStoredFileForVerification(t, "verify_file_standard_clean.bin", 256*1024) + defer dbconn.Close() + + if err := maintenance.VerifyCommand("file", int(fileID), verify.VerifyStandard); err != nil { + t.Fatalf("verify file --standard on clean file should pass: %v", err) + } +} + +func TestVerifyFileFullPassesOnCleanStoredFile(t *testing.T) { + dbconn, _, fileID := setupStoredFileForVerification(t, "verify_file_full_clean.bin", 256*1024) + defer dbconn.Close() + + if err := maintenance.VerifyCommand("file", int(fileID), verify.VerifyFull); err != nil { + t.Fatalf("verify file --full on clean file should pass: %v", err) + } +} + +func TestVerifyFileDeepPassesOnCleanStoredFile(t *testing.T) { + dbconn, _, fileID := setupStoredFileForVerification(t, "verify_file_deep_clean.bin", 256*1024) + defer dbconn.Close() + + if err := maintenance.VerifyCommand("file", int(fileID), verify.VerifyDeep); err != nil { + t.Fatalf("verify file --deep on clean file should pass: %v", err) + } +} + +func TestVerifyFileFullDetectsContainerTruncation(t *testing.T) { + dbconn, _, fileID := setupStoredFileForVerification(t, "verify_file_full_truncation.bin", 512*1024) + defer dbconn.Close() + + record := fetchFirstChunkRecord(t, dbconn, fileID) + containerPath := containerPathForRecord(record) + + truncatedSize := record.containerCurrentSize - 100 + if truncatedSize <= 0 { + t.Fatalf("invalid truncated size derived from container size %d", record.containerCurrentSize) + } + + if err := os.Truncate(containerPath, truncatedSize); err != nil { + t.Fatalf("truncate container file: %v", err) + } + + if err := maintenance.VerifyCommand("file", int(fileID), verify.VerifyFull); err == nil { + t.Fatal("verify file --full should detect truncated container data") + } +} + +func TestVerifyFileFullDetectsMissingContainerFile(t *testing.T) { + dbconn, _, fileID := setupStoredFileForVerification(t, "verify_file_full_missing_container.bin", 256*1024) + defer dbconn.Close() + + record := fetchFirstChunkRecord(t, dbconn, fileID) + containerPath := containerPathForRecord(record) + + if err := os.Remove(containerPath); err != nil { + t.Fatalf("remove container file: %v", err) + } + + if err := maintenance.VerifyCommand("file", int(fileID), verify.VerifyFull); err == nil { + t.Fatal("verify file --full should detect a missing container file") + } +} + +func TestVerifyFileStandardDetectsMissingChunkMetadata(t *testing.T) { + dbconn, _, fileID := setupStoredFileForVerification(t, "verify_file_standard_missing_chunk.bin", 512*1024) + defer dbconn.Close() + + record := fetchFirstChunkRecord(t, dbconn, fileID) + + if _, err := dbconn.Exec(`ALTER TABLE file_chunk DROP CONSTRAINT IF EXISTS file_chunk_chunk_id_fkey`); err != nil { + t.Fatalf("drop file_chunk foreign key: %v", err) + } + + if _, err := dbconn.Exec(`DELETE FROM chunk WHERE id = $1`, record.chunkID); err != nil { + t.Fatalf("delete chunk row: %v", err) + } + + if err := maintenance.VerifyCommand("file", int(fileID), verify.VerifyStandard); err == nil { + t.Fatal("verify file should detect missing chunk metadata") + } +} + +func TestVerifyFileStandardDetectsBrokenChunkOrder(t *testing.T) { + dbconn, _, fileID := setupStoredFileForVerification(t, "verify_file_standard_chunk_order.bin", 512*1024) + defer dbconn.Close() + + if _, err := dbconn.Exec(`UPDATE file_chunk SET chunk_order = chunk_order + 1 WHERE logical_file_id = $1`, fileID); err != nil { + t.Fatalf("corrupt chunk ordering: %v", err) + } + + if err := maintenance.VerifyCommand("file", int(fileID), verify.VerifyStandard); err == nil { + t.Fatal("verify file should detect broken chunk ordering") + } +} + +func TestVerifySystemFullDetectsContainerHashMismatch(t *testing.T) { + dbconn, _, fileID := setupStoredFileForVerification(t, "verify_system_full_container_hash.bin", 256*1024) + defer dbconn.Close() + + record := fetchFirstChunkRecord(t, dbconn, fileID) + containerPath := containerPathForRecord(record) + + file, err := os.OpenFile(containerPath, os.O_RDWR, 0) + if err != nil { + t.Fatalf("open container file: %v", err) + } + if _, err := file.WriteAt([]byte{0xAB}, 0); err != nil { + _ = file.Close() + t.Fatalf("mutate container file: %v", err) + } + if err := file.Close(); err != nil { + t.Fatalf("close container file: %v", err) + } + + if err := maintenance.VerifyCommand("system", 0, verify.VerifyFull); err == nil { + t.Fatal("verify system --full should detect a container hash mismatch") + } +} + func TestSharedChunkSafety(t *testing.T) { requireDB(t) @@ -1156,7 +1396,7 @@ func TestSharedChunkSafety(t *testing.T) { applySchema(t, dbconn) resetDB(t, dbconn) - utils.DefaultCompression = utils.CompressionNone + utils_compression.DefaultCompression = utils_compression.CompressionNone inputDir := filepath.Join(tmp, "input") if err := os.MkdirAll(inputDir, 0o755); err != nil { @@ -1214,3 +1454,98 @@ func TestSharedChunkSafety(t *testing.T) { t.Fatalf("hash mismatch: expected %s, got %s", origHash, restoreHash) } } + +func TestVerifySystemDeepPassesOnCleanStoredFile(t *testing.T) { + requireDB(t) + + tmp := t.TempDir() + container.ContainersDir = filepath.Join(tmp, "containers") + _ = os.Setenv("COLDKEEP_STORAGE_DIR", container.ContainersDir) + resetStorage(t) + + dbconn, err := db.ConnectDB() + if err != nil { + t.Fatalf("connectDB: %v", err) + } + defer dbconn.Close() + + applySchema(t, dbconn) + resetDB(t, dbconn) + + utils_compression.DefaultCompression = utils_compression.CompressionNone + + inputDir := filepath.Join(tmp, "input") + _ = os.MkdirAll(inputDir, 0o755) + inPath := createTempFile(t, inputDir, "verify_system_deep_clean.bin", 512*1024) + + if err := storage.StoreFileWithDB(dbconn, inPath); err != nil { + t.Fatalf("store file: %v", err) + } + + if err := maintenance.VerifyCommand("system", 0, verify.VerifyDeep); err != nil { + t.Fatalf("verify system --deep on clean stored file should pass: %v", err) + } +} + +func TestVerifySystemDeepDetectsChunkDataCorruption(t *testing.T) { + requireDB(t) + + tmp := t.TempDir() + container.ContainersDir = filepath.Join(tmp, "containers") + _ = os.Setenv("COLDKEEP_STORAGE_DIR", container.ContainersDir) + resetStorage(t) + + dbconn, err := db.ConnectDB() + if err != nil { + t.Fatalf("connectDB: %v", err) + } + defer dbconn.Close() + + applySchema(t, dbconn) + resetDB(t, dbconn) + + utils_compression.DefaultCompression = utils_compression.CompressionNone + + inputDir := filepath.Join(tmp, "input") + _ = os.MkdirAll(inputDir, 0o755) + inPath := createTempFile(t, inputDir, "verify_system_deep_corruption.bin", 512*1024) + + if err := storage.StoreFileWithDB(dbconn, inPath); err != nil { + t.Fatalf("store file: %v", err) + } + + // Fetch first chunk record to find where to corrupt + var chunkOffset int64 + var chunkSize int64 + var containerFilename string + err = dbconn.QueryRow(` + SELECT c.chunk_offset, c.size, ctr.filename + FROM chunk c + JOIN container ctr ON ctr.id = c.container_id + WHERE c.status = 'COMPLETED' + ORDER BY c.chunk_offset ASC + LIMIT 1 + `).Scan(&chunkOffset, &chunkSize, &containerFilename) + if err != nil { + t.Fatalf("query first chunk: %v", err) + } + + containerPath := filepath.Join(container.ContainersDir, containerFilename) + + // Open container and corrupt a byte in the first chunk's data + // Skip past the header (32 bytes hash + 4 bytes size) to reach the actual chunk data + f, err := os.OpenFile(containerPath, os.O_RDWR, 0) + if err != nil { + t.Fatalf("open container file: %v", err) + } + defer f.Close() + + corruptionOffset := chunkOffset + 32 + 4 + 10 // header (32+4 bytes) + 10 bytes into data + if _, err := f.WriteAt([]byte{0xFF}, corruptionOffset); err != nil { + t.Fatalf("corrupt chunk byte: %v", err) + } + + if err := maintenance.VerifyCommand("system", 0, verify.VerifyDeep); err == nil { + t.Fatal("verify system --deep should detect chunk data corruption but returned nil") + } +}