From 5fbeb34f3dd76c966b93d370b0efd09ea0d22e42 Mon Sep 17 00:00:00 2001 From: AI Principal Architect Date: Fri, 17 Apr 2026 17:53:16 +0100 Subject: [PATCH 1/4] feat: offline secret pre-flight scanner --- dummy_config.js | 2 + internal/appcore/review_runtime.go | 7 +++ internal/appcore/secscan.go | 74 ++++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+) create mode 100644 dummy_config.js create mode 100644 internal/appcore/secscan.go diff --git a/dummy_config.js b/dummy_config.js new file mode 100644 index 0000000..90b6389 --- /dev/null +++ b/dummy_config.js @@ -0,0 +1,2 @@ +// Mock database configuration +const DUMMY_AWS_KEY = "AKIAIOSFODNN7EXAMPLE"; diff --git a/internal/appcore/review_runtime.go b/internal/appcore/review_runtime.go index 188f3d3..6362fa6 100644 --- a/internal/appcore/review_runtime.go +++ b/internal/appcore/review_runtime.go @@ -278,6 +278,13 @@ func runReviewWithOptions(opts reviewopts.Options) error { return fmt.Errorf("no diff content collected") } + // [Offline PII/Secret Pre-Flight Scanner] + // Run offline secret scanning before it's shipped across the network (LiveReview or BYOK). + if err := ScanDiffForSecrets(diffContent); err != nil { + fmt.Fprintf(os.Stderr, "\n[FATAL] %v\n", err) + return cli.Exit(err.Error(), 1) + } + var fakeBaseFiles []reviewmodel.DiffReviewFileResult if fakeMode { fakeBaseFiles, err = parseDiffToFiles(diffContent) diff --git a/internal/appcore/secscan.go b/internal/appcore/secscan.go new file mode 100644 index 0000000..336eed0 --- /dev/null +++ b/internal/appcore/secscan.go @@ -0,0 +1,74 @@ +package appcore + +import ( + "fmt" + "regexp" + "strings" +) + +// SecretPattern represents a regex rule to match known sensitive patterns +type SecretPattern struct { + Name string + Pattern *regexp.Regexp +} + +// Pre-compiled high-confidence secret patterns +var secretPatterns = []SecretPattern{ + { + Name: "AWS Access Key ID", + Pattern: regexp.MustCompile(`AKIA[0-9A-Z]{16}`), + }, + { + Name: "GitHub Personal Access Token", + Pattern: regexp.MustCompile(`ghp_[a-zA-Z0-9]{36}`), + }, + { + Name: "Slack Token", + Pattern: regexp.MustCompile(`xox[baprs]-[0-9]{10,}-[0-9]{10,}-[a-zA-Z0-9]{24}`), + }, + { + Name: "RSA / OpenSSH Private Key", + Pattern: regexp.MustCompile(`-----BEGIN (?:RSA|OPENSSH) PRIVATE KEY-----`), + }, + { + Name: "Generic High Entropy Secret", + Pattern: regexp.MustCompile(`(?i)(?:sk|api_key|token|secret)[-_]?(?:key|token)?(?:[\s:=]+)(['"]?)([a-zA-Z0-9_\-\.]{20,})\1`), + }, +} + +// ScanDiffForSecrets scans the provided git diff content for high-confidence secrets +// Returns an error detailing the found secrets, or nil if safe. +func ScanDiffForSecrets(diffContent []byte) error { + if len(diffContent) == 0 { + return nil + } + + contentStr := string(diffContent) + var foundSecrets []string + + for _, sp := range secretPatterns { + if sp.Pattern.MatchString(contentStr) { + // Find all matches for reporting + matches := sp.Pattern.FindAllString(contentStr, -1) + for _, match := range matches { + redacted := redactSecretMatch(match) + foundSecrets = append(foundSecrets, fmt.Sprintf("%s (%s)", sp.Name, redacted)) + } + } + } + + if len(foundSecrets) > 0 { + return fmt.Errorf("local security check failed. Found %d potentially sensitive credential(s) in the staged diff:\n - %s\n\nAborting review. If you must commit this, please bypass using the `--skip` flag.", + len(foundSecrets), strings.Join(foundSecrets, "\n - ")) + } + + return nil +} + +// redactSecretMatch masks all but the first 4 and last 4 characters of the matched secret +func redactSecretMatch(secret string) string { + if len(secret) <= 8 { + return strings.Repeat("*", len(secret)) + } + return secret[:4] + "...." + secret[len(secret)-4:] +} From 5821c039a1de1a28e8ca2e0760d52ae1efb5429a Mon Sep 17 00:00:00 2001 From: AI Principal Architect Date: Fri, 17 Apr 2026 18:01:29 +0100 Subject: [PATCH 2/4] perf: token-optimized lockfile diff chunking filter --- internal/appcore/review_runtime.go | 36 ++++++++++++++++++++--------- test.txt | Bin 0 -> 12 bytes 2 files changed, 25 insertions(+), 11 deletions(-) create mode 100644 test.txt diff --git a/internal/appcore/review_runtime.go b/internal/appcore/review_runtime.go index 6362fa6..0433422 100644 --- a/internal/appcore/review_runtime.go +++ b/internal/appcore/review_runtime.go @@ -1308,6 +1308,16 @@ func runReviewWithOptions(opts reviewopts.Options) error { return nil } +var standardTokenExclusions = []string{ + `:(exclude)package-lock.json`, + `:(exclude)yarn.lock`, + `:(exclude)pnpm-lock.yaml`, + `:(exclude)go.sum`, + `:(exclude)Cargo.lock`, + `:(exclude)poetry.lock`, + `:(exclude)Gemfile.lock`, +} + func collectDiffWithOptions(opts reviewopts.Options) ([]byte, error) { diffSource := opts.DiffSource verbose := opts.Verbose @@ -1315,15 +1325,17 @@ func collectDiffWithOptions(opts reviewopts.Options) ([]byte, error) { switch diffSource { case "staged": if verbose { - log.Println("Collecting staged changes...") + log.Println("Collecting staged changes (excluding standard lockfiles)...") } - return reviewapi.RunGitCommand("diff", "--staged") + args := append([]string{"diff", "--staged", "--", "."}, standardTokenExclusions...) + return reviewapi.RunGitCommand(args...) case "working": if verbose { - log.Println("Collecting working tree changes...") + log.Println("Collecting working tree changes (excluding standard lockfiles)...") } - return reviewapi.RunGitCommand("diff") + args := append([]string{"diff", "--", "."}, standardTokenExclusions...) + return reviewapi.RunGitCommand(args...) case "commit": commitVal := opts.CommitVal @@ -1331,15 +1343,16 @@ func collectDiffWithOptions(opts reviewopts.Options) ([]byte, error) { return nil, fmt.Errorf("--commit is required when diff-source=commit") } if verbose { - log.Printf("Collecting diff for commit: %s", commitVal) + log.Printf("Collecting diff for commit: %s (excluding standard lockfiles)", commitVal) } - // Check if it's a range (contains .. or ...) if strings.Contains(commitVal, "..") { // It's a commit range, use git diff - return reviewapi.RunGitCommand("diff", commitVal) + args := append([]string{"diff", commitVal, "--"}, standardTokenExclusions...) + return reviewapi.RunGitCommand(args...) } // Single commit, use git show to get the commit's changes - return reviewapi.RunGitCommand("show", "--format=", commitVal) + args := append([]string{"show", "--format=", commitVal, "--"}, standardTokenExclusions...) + return reviewapi.RunGitCommand(args...) case "range": rangeVal := opts.RangeVal @@ -1347,9 +1360,10 @@ func collectDiffWithOptions(opts reviewopts.Options) ([]byte, error) { return nil, fmt.Errorf("--range is required when diff-source=range") } if verbose { - log.Printf("Collecting diff for range: %s", rangeVal) + log.Printf("Collecting diff for range: %s (excluding standard lockfiles)", rangeVal) } - return reviewapi.RunGitCommand("diff", rangeVal) + args := append([]string{"diff", rangeVal, "--"}, standardTokenExclusions...) + return reviewapi.RunGitCommand(args...) case "file": filePath := opts.DiffFile @@ -1357,7 +1371,7 @@ func collectDiffWithOptions(opts reviewopts.Options) ([]byte, error) { return nil, fmt.Errorf("--diff-file is required when diff-source=file") } if verbose { - log.Printf("Reading diff from file: %s", filePath) + log.Printf("Reading diff from file (no automatic exclusions applied): %s", filePath) } return storage.ReadDiffFile(filePath) diff --git a/test.txt b/test.txt new file mode 100644 index 0000000000000000000000000000000000000000..c75b3dcf2572caae098be44dc890c4980b0c69b8 GIT binary patch literal 12 RcmezWFO4A|2zeQ}7yuza16cq7 literal 0 HcmV?d00001 From bdedaddcf397a7fb99bb7c40f5b7876df9258226 Mon Sep 17 00:00:00 2001 From: AI Principal Architect Date: Fri, 17 Apr 2026 18:05:52 +0100 Subject: [PATCH 3/4] fix(network): enterprise-grade exponential backoff for HTTP resiliency --- network/http_client.go | 131 +++++++++++++++++++++++++---------------- 1 file changed, 81 insertions(+), 50 deletions(-) diff --git a/network/http_client.go b/network/http_client.go index 62aebd8..927f11e 100644 --- a/network/http_client.go +++ b/network/http_client.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "io" + "math/rand" "net/http" "time" ) @@ -40,80 +41,110 @@ func NewClient(timeout time.Duration) *Client { } } -func (c *Client) DoJSON(method, url string, payload any, bearerToken, orgContext string, headers map[string]string) (*Response, error) { - var bodyReader io.Reader - if payload != nil { - bodyJSON, err := json.Marshal(payload) +const maxRetries = 3 + +// doWithRetry encapsulates the core HTTP execution with exponential backoff and jitter. +func (c *Client) doWithRetry(reqBody []byte, reqBuilder func(io.Reader) (*http.Request, error)) (*Response, error) { + var resp *http.Response + var err error + + for attempt := 0; attempt <= maxRetries; attempt++ { + var bodyReader io.Reader + if reqBody != nil { + bodyReader = bytes.NewReader(reqBody) + } + + req, reqErr := reqBuilder(bodyReader) + if reqErr != nil { + return nil, reqErr + } + + resp, err = c.httpClient.Do(req) + + // Determine if the failure is transient + shouldRetry := false if err != nil { - return nil, fmt.Errorf("failed to marshal request: %w", err) + shouldRetry = true + } else if resp.StatusCode >= 500 || resp.StatusCode == 429 { + shouldRetry = true } - bodyReader = bytes.NewReader(bodyJSON) - } - req, err := http.NewRequest(method, url, bodyReader) - if err != nil { - return nil, err - } + if !shouldRetry || attempt == maxRetries { + break + } - if payload != nil { - req.Header.Set("Content-Type", "application/json") - } - if bearerToken != "" { - req.Header.Set("Authorization", "Bearer "+bearerToken) - } - if orgContext != "" { - req.Header.Set("X-Org-Context", orgContext) - } - for key, value := range headers { - req.Header.Set(key, value) + if resp != nil && resp.Body != nil { + resp.Body.Close() + } + + // Calculate backoff: wait = base * 2^attempt + jitter + baseWait := time.Duration(500*(1< Date: Fri, 17 Apr 2026 18:10:54 +0100 Subject: [PATCH 4/4] fix(security): resolve RE2 engine panic and eliminate vouch bypass vector --- internal/appcore/review_runtime.go | 24 +++++++++++++++++------- internal/appcore/secscan.go | 2 +- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/internal/appcore/review_runtime.go b/internal/appcore/review_runtime.go index 0433422..6e8e299 100644 --- a/internal/appcore/review_runtime.go +++ b/internal/appcore/review_runtime.go @@ -278,13 +278,6 @@ func runReviewWithOptions(opts reviewopts.Options) error { return fmt.Errorf("no diff content collected") } - // [Offline PII/Secret Pre-Flight Scanner] - // Run offline secret scanning before it's shipped across the network (LiveReview or BYOK). - if err := ScanDiffForSecrets(diffContent); err != nil { - fmt.Fprintf(os.Stderr, "\n[FATAL] %v\n", err) - return cli.Exit(err.Error(), 1) - } - var fakeBaseFiles []reviewmodel.DiffReviewFileResult if fakeMode { fakeBaseFiles, err = parseDiffToFiles(diffContent) @@ -1318,7 +1311,24 @@ var standardTokenExclusions = []string{ `:(exclude)Gemfile.lock`, } +// collectDiffWithOptions securely intercepts diff collection by filtering out lockfiles globally and running the local Offline Security scanner synchronously before any payload bubbles backwards gracefully. func collectDiffWithOptions(opts reviewopts.Options) ([]byte, error) { + diffContent, err := collectDiffWithOptionsRaw(opts) + if err != nil { + return nil, err + } + + // [Offline PII/Secret Pre-Flight Scanner] + // Run offline secret scanning right as the subsystem collects bytes, protecting BOTH standard --review AND --vouch. + if err := ScanDiffForSecrets(diffContent); err != nil { + fmt.Fprintf(os.Stderr, "\n[FATAL] %v\n", err) + return nil, cli.Exit(err.Error(), 1) + } + + return diffContent, nil +} + +func collectDiffWithOptionsRaw(opts reviewopts.Options) ([]byte, error) { diffSource := opts.DiffSource verbose := opts.Verbose diff --git a/internal/appcore/secscan.go b/internal/appcore/secscan.go index 336eed0..5e80c3d 100644 --- a/internal/appcore/secscan.go +++ b/internal/appcore/secscan.go @@ -32,7 +32,7 @@ var secretPatterns = []SecretPattern{ }, { Name: "Generic High Entropy Secret", - Pattern: regexp.MustCompile(`(?i)(?:sk|api_key|token|secret)[-_]?(?:key|token)?(?:[\s:=]+)(['"]?)([a-zA-Z0-9_\-\.]{20,})\1`), + Pattern: regexp.MustCompile(`(?i)(?:sk|api_key|token|secret)[-_]?(?:key|token)?(?:[\s:=]+)['"]?([a-zA-Z0-9_\-\.]{20,})['"]?`), }, }