From a906ff4a4043a9ee98fa6e4df8c80ad589fdc778 Mon Sep 17 00:00:00 2001
From: Valentyn Sobol <8640896+Saloed@users.noreply.github.com>
Date: Wed, 22 Apr 2026 12:35:42 +0300
Subject: [PATCH 1/6] Add scan experimental flags

---
 cli/cmd/analyzer_exit.go               |  68 ++++++++
 cli/cmd/command_builder.go             |  80 ++++++++--
 cli/cmd/compile.go                     |  10 +-
 cli/cmd/compile_approximations.go      | 209 +++++++++++++++++++++++++
 cli/cmd/project.go                     |   5 +-
 cli/cmd/root.go                        |   8 +
 cli/cmd/scan.go                        | 123 ++++++++++++---
 cli/internal/globals/global.go         |   2 +
 cli/internal/utils/java/runner.go      |  49 ++++--
 cli/internal/utils/java/runner_test.go |   5 +-
 10 files changed, 502 insertions(+), 57 deletions(-)
 create mode 100644 cli/cmd/analyzer_exit.go
 create mode 100644 cli/cmd/compile_approximations.go

diff --git a/cli/cmd/analyzer_exit.go b/cli/cmd/analyzer_exit.go
new file mode 100644
index 000000000..8dd5a3cf4
--- /dev/null
+++ b/cli/cmd/analyzer_exit.go
@@ -0,0 +1,68 @@
+package cmd
+
+import (
+	"fmt"
+
+	"github.com/seqra/opentaint/internal/utils/java"
+)
+
+// Analyzer exit codes as seen by the OS (unsigned byte values).
+// These correspond to the Kotlin exitProcess() calls in AbstractAnalyzerRunner:
+//
+//	exitProcess(-1)  → 255  (project configuration error)
+//	exitProcess(-2)  → 254  (analysis timeout)
+//	exitProcess(-3)  → 253  (out of memory)
+//	exitProcess(-4)  → 252  (unhandled exception)
+const (
+	analyzerExitConfigError = 255
+	analyzerExitTimeout     = 254
+	analyzerExitOOM         = 253
+	analyzerExitException   = 252
+)
+
+// analyzerError holds information about an analyzer failure.
+// exitCode is the process exit code to forward to os.Exit.
+type analyzerError struct {
+	exitCode int
+	message  string
+}
+
+// analyzerExitMessage returns a human-readable description for a known
+// analyzer exit code, or empty string if the code is not recognized.
+func analyzerExitMessage(code int) string {
+	switch code {
+	case analyzerExitConfigError:
+		return "project configuration error"
+	case analyzerExitTimeout:
+		return "analysis timed out — try increasing --timeout or --max-memory"
+	case analyzerExitOOM:
+		return "out of memory — try increasing --max-memory (e.g. --max-memory 16G)"
+	case analyzerExitException:
+		return "unhandled analyzer exception"
+	default:
+		return ""
+	}
+}
+
+// classifyAnalyzerError converts a *JavaCommandError into an *analyzerError
+// with a human-readable message. Returns nil when cmdErr is nil.
+//
+// The error message is printed immediately. The caller is responsible for
+// eventually calling os.Exit with the returned exit code after performing
+// any post-failure work (e.g. printing summaries).
+func classifyAnalyzerError(cmdErr *java.JavaCommandError) *analyzerError {
+	if cmdErr == nil {
+		return nil
+	}
+
+	code := cmdErr.ExitCode
+	if msg := analyzerExitMessage(code); msg != "" {
+		formatted := fmt.Sprintf("Analysis failed (exit code %d): %s", code, msg)
+		out.Error(formatted)
+		return &analyzerError{exitCode: code, message: formatted}
+	}
+
+	formatted := fmt.Sprintf("Analysis failed with exit code %d", code)
+	out.Error(formatted)
+	return &analyzerError{exitCode: code, message: formatted}
+}
diff --git a/cli/cmd/command_builder.go b/cli/cmd/command_builder.go
index 772957de1..aa353a07b 100644
--- a/cli/cmd/command_builder.go
+++ b/cli/cmd/command_builder.go
@@ -42,21 +42,26 @@ func NewAutobuilderBuilder() *AutobuilderBuilder {
 
 type AnalyzerBuilder struct {
 	*BaseCommandBuilder
-	projectPath              string
-	outputDir                string
-	sarifFileName            string
-	sarifCodeFlowLimit       int64
-	sarifToolVersion         string
-	sarifToolSemanticVersion string
-	sarifUriBase             string
-	semgrepCompatibility     bool
-	partialFingerprints      bool
-	ifdsAnalysisTimeout      int64
-	severities               []string
-	ruleSetPaths             []string
-	ruleLoadTracePath        string
-	jarPath                  string
-	maxMemory                string
+	projectPath                string
+	outputDir                  string
+	sarifFileName              string
+	sarifCodeFlowLimit         int64
+	sarifToolVersion           string
+	sarifToolSemanticVersion   string
+	sarifUriBase               string
+	semgrepCompatibility       bool
+	partialFingerprints        bool
+	ifdsAnalysisTimeout        int64
+	severities                 []string
+	ruleSetPaths               []string
+	ruleLoadTracePath          string
+	jarPath                    string
+	maxMemory                  string
+	ruleIDs                    []string
+	approximationsConfig       []string
+	dataflowApproximations     []string
+	trackExternalMethods       bool
+	debugFactReachabilitySarif bool
 }
 
 func (a *AnalyzerBuilder) SetProject(projectPath string) *AnalyzerBuilder {
@@ -134,6 +139,31 @@ func (a *AnalyzerBuilder) SetMaxMemory(maxMemory string) *AnalyzerBuilder {
 	return a
 }
 
+func (a *AnalyzerBuilder) AddRuleID(ruleID string) *AnalyzerBuilder {
+	a.ruleIDs = append(a.ruleIDs, ruleID)
+	return a
+}
+
+func (a *AnalyzerBuilder) AddApproximationsConfig(configPath string) *AnalyzerBuilder {
+	a.approximationsConfig = append(a.approximationsConfig, configPath)
+	return a
+}
+
+func (a *AnalyzerBuilder) AddDataflowApproximations(approxPath string) *AnalyzerBuilder {
+	a.dataflowApproximations = append(a.dataflowApproximations, approxPath)
+	return a
+}
+
+func (a *AnalyzerBuilder) SetTrackExternalMethods(track bool) *AnalyzerBuilder {
+	a.trackExternalMethods = track
+	return a
+}
+
+func (a *AnalyzerBuilder) EnableDebugFactReachabilitySarif() *AnalyzerBuilder {
+	a.debugFactReachabilitySarif = true
+	return a
+}
+
 func (a *AnalyzerBuilder) BuildNativeCommand() []string {
 	// For native execution, create a temporary logs directory
 	tempLogsDir, err := os.MkdirTemp("", "opentaint-*")
@@ -203,6 +233,26 @@ func (a *AnalyzerBuilder) BuildNativeCommand() []string {
 		flags = append(flags, "--semgrep-rule-load-trace", a.ruleLoadTracePath)
 	}
 
+	for _, ruleID := range a.ruleIDs {
+		flags = append(flags, "--semgrep-rule-id", ruleID)
+	}
+
+	for _, configPath := range a.approximationsConfig {
+		flags = append(flags, "--approximations-config", configPath)
+	}
+
+	for _, approxPath := range a.dataflowApproximations {
+		flags = append(flags, "--dataflow-approximations", approxPath)
+	}
+
+	if a.trackExternalMethods {
+		flags = append(flags, "--track-external-methods")
+	}
+
+	if a.debugFactReachabilitySarif {
+		flags = append(flags, "--debug-fact-reachability-sarif")
+	}
+
 	return append(command, flags...)
 }
 
diff --git a/cli/cmd/compile.go b/cli/cmd/compile.go
index 4dc97fbaa..f7de00a88 100644
--- a/cli/cmd/compile.go
+++ b/cli/cmd/compile.go
@@ -118,6 +118,10 @@ func init() {
 }
 
 func ensureAutobuilderAvailable() (string, error) {
+	if globals.Config.Autobuilder.JarPath != "" {
+		return globals.Config.Autobuilder.JarPath, nil
+	}
+
 	autobuilderJarPath, err := utils.GetAutobuilderJarPath(globals.Config.Autobuilder.Version)
 	if err != nil {
 		return "", fmt.Errorf("failed to construct path to the autobuilder: %w", err)
@@ -189,11 +193,15 @@ func compileProject(absOutputProjectModelPath, absProjectRoot, autobuilderJarPat
 		return true
 	}
 	// Execute the command using JavaRunner
-	err = javaRunner.ExecuteJavaCommand(autobuilderCommand, commandSucceeded)
+	cmdErr, err := javaRunner.ExecuteJavaCommand(autobuilderCommand, commandSucceeded)
 	if err != nil {
 		output.LogInfof("Native compilation has failed: %s", err)
 		return fmt.Errorf("native compilation has failed: %w", err)
 	}
+	if cmdErr != nil {
+		output.LogInfof("Native compilation has failed: %s", cmdErr)
+		return fmt.Errorf("native compilation has failed: %w", cmdErr)
+	}
 
 	return nil
 }
diff --git a/cli/cmd/compile_approximations.go b/cli/cmd/compile_approximations.go
new file mode 100644
index 000000000..b09cae861
--- /dev/null
+++ b/cli/cmd/compile_approximations.go
@@ -0,0 +1,209 @@
+package cmd
+
+import (
+	"archive/zip"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+
+	"github.com/seqra/opentaint/internal/globals"
+	"github.com/seqra/opentaint/internal/output"
+	"github.com/seqra/opentaint/internal/utils/java"
+	"github.com/seqra/opentaint/internal/utils/project"
+)
+
+// compileApproximationsIfNeeded checks whether a --dataflow-approximations directory
+// contains .java source files. If so, it compiles them using javac (with the
+// analyzer JAR + project dependencies on the classpath) and returns the path to
+// the compiled .class output directory. If the directory already contains only
+// .class files (or no .java files at all), it is returned as-is.
+//
+// projectModelDir is the directory containing project.yaml — used to resolve
+// project dependencies for the javac classpath (approximation code may reference
+// library types like org.apache.pdfbox.pdmodel.PDDocument).
+func compileApproximationsIfNeeded(approxPath string, analyzerJarPath string, projectModelDir string) (string, error) {
+	info, err := os.Stat(approxPath)
+	if err != nil {
+		return "", fmt.Errorf("approximation path does not exist: %w", err)
+	}
+
+	// If it's a single file, return as-is (nothing to compile)
+	if !info.IsDir() {
+		return approxPath, nil
+	}
+
+	// Collect .java files in the directory tree
+	var javaFiles []string
+	_ = filepath.Walk(approxPath, func(path string, fi os.FileInfo, walkErr error) error {
+		if walkErr != nil {
+			return walkErr
+		}
+		if !fi.IsDir() && strings.HasSuffix(fi.Name(), ".java") {
+			javaFiles = append(javaFiles, path)
+		}
+		return nil
+	})
+
+	if len(javaFiles) == 0 {
+		// No Java sources — directory may contain .class files or be empty; pass through.
+		return approxPath, nil
+	}
+
+	output.LogInfof("Found %d .java file(s) in approximations directory, compiling...", len(javaFiles))
+
+	// Resolve javac from the managed JDK
+	javacRunner := java.NewJavaRunner().
+		WithSkipVerify(globals.Config.SkipVerify).
+		WithImageType(java.AdoptiumImageJDK).
+		TrySystem().
+		TrySpecificVersion(globals.DefaultJavaVersion)
+
+	javaPath, err := javacRunner.EnsureJava()
+	if err != nil {
+		return "", fmt.Errorf("failed to resolve Java for approximation compilation: %w", err)
+	}
+
+	javacPath := deriveJavacPath(javaPath)
+	if _, err := os.Stat(javacPath); err != nil {
+		return "", fmt.Errorf("javac not found at %s (resolved from java at %s). A JDK (not JRE) is required to compile approximation sources", javacPath, javaPath)
+	}
+
+	// Extract approximation support classes from the analyzer JAR.
+	// The JAR bundles utility classes (OpentaintNdUtil, ArgumentTypeContext)
+	// under "opentaint-dataflow-approximations/" prefix.
+	extractedDir, err := extractApproxClassesFromJar(analyzerJarPath)
+	if err != nil {
+		return "", fmt.Errorf("failed to extract approximation classes from analyzer JAR: %w", err)
+	}
+
+	// Create temp output directory for compiled .class files
+	outputDir, err := os.MkdirTemp("", "opentaint-approx-compiled-*")
+	if err != nil {
+		_ = os.RemoveAll(extractedDir)
+		return "", fmt.Errorf("failed to create temp directory for compiled approximations: %w", err)
+	}
+
+	// Build classpath:
+	// 1. Analyzer JAR — contains @Approximate, @ApproximateByName annotations
+	// 2. Extracted approximation utilities — OpentaintNdUtil, ArgumentTypeContext
+	// 3. Project dependencies — library JARs that approximation code may reference
+	cpParts := []string{analyzerJarPath, extractedDir}
+	cpParts = append(cpParts, resolveProjectDependencies(projectModelDir)...)
+	classpath := strings.Join(cpParts, string(os.PathListSeparator))
+
+	args := []string{
+		"-source", "8",
+		"-target", "8",
+		"-cp", classpath,
+		"-d", outputDir,
+	}
+	args = append(args, javaFiles...)
+
+	output.LogDebugf("Running javac: %s %s", javacPath, strings.Join(args, " "))
+
+	cmd := exec.Command(javacPath, args...)
+	cmdOutput, cmdErr := cmd.CombinedOutput()
+
+	// Always clean up extracted dependencies
+	_ = os.RemoveAll(extractedDir)
+
+	if cmdErr != nil {
+		_ = os.RemoveAll(outputDir)
+		return "", fmt.Errorf(
+			"approximation compilation failed:\n%s\njavac exited with: %w",
+			string(cmdOutput), cmdErr,
+		)
+	}
+
+	output.LogInfof("Approximation compilation succeeded, output: %s", outputDir)
+	return outputDir, nil
+}
+
+// resolveProjectDependencies reads project.yaml from the project model directory
+// and returns absolute paths to the dependency JARs listed there.
+func resolveProjectDependencies(projectModelDir string) []string {
+	if projectModelDir == "" {
+		return nil
+	}
+	config, err := project.LoadConfig(projectModelDir)
+	if err != nil {
+		output.LogDebugf("Could not read project config for approximation compilation: %v", err)
+		return nil
+	}
+	var absDeps []string
+	for _, dep := range config.Dependencies {
+		absPath := dep
+		if !filepath.IsAbs(dep) {
+			absPath = filepath.Join(projectModelDir, dep)
+		}
+		if _, err := os.Stat(absPath); err == nil {
+			absDeps = append(absDeps, absPath)
+		}
+	}
+	output.LogDebugf("Resolved %d project dependencies for approximation classpath", len(absDeps))
+	return absDeps
+}
+
+// extractApproxClassesFromJar extracts bundled approximation support classes
+// from the analyzer fat JAR. These are stored under "opentaint-dataflow-approximations/"
+// prefix and need standard package structure for javac to find them.
+func extractApproxClassesFromJar(jarPath string) (string, error) {
+	r, err := zip.OpenReader(jarPath)
+	if err != nil {
+		return "", fmt.Errorf("failed to open JAR: %w", err)
+	}
+	defer r.Close()
+
+	extractDir, err := os.MkdirTemp("", "opentaint-approx-deps-*")
+	if err != nil {
+		return "", err
+	}
+
+	const prefix = "opentaint-dataflow-approximations/"
+	for _, f := range r.File {
+		if !strings.HasPrefix(f.Name, prefix) {
+			continue
+		}
+		if f.FileInfo().IsDir() {
+			continue
+		}
+		relPath := strings.TrimPrefix(f.Name, prefix)
+		if relPath == "" {
+			continue
+		}
+		destPath := filepath.Join(extractDir, relPath)
+		if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil {
+			_ = os.RemoveAll(extractDir)
+			return "", err
+		}
+		src, err := f.Open()
+		if err != nil {
+			_ = os.RemoveAll(extractDir)
+			return "", err
+		}
+		dst, err := os.Create(destPath)
+		if err != nil {
+			src.Close()
+			_ = os.RemoveAll(extractDir)
+			return "", err
+		}
+		_, err = io.Copy(dst, src)
+		src.Close()
+		dst.Close()
+		if err != nil {
+			_ = os.RemoveAll(extractDir)
+			return "", err
+		}
+	}
+
+	return extractDir, nil
+}
+
+// deriveJavacPath returns the path to javac given the path to java.
+func deriveJavacPath(javaPath string) string {
+	dir := filepath.Dir(javaPath)
+	return filepath.Join(dir, "javac")
+}
diff --git a/cli/cmd/project.go b/cli/cmd/project.go
index 216ad36d5..9b7564ad0 100644
--- a/cli/cmd/project.go
+++ b/cli/cmd/project.go
@@ -164,10 +164,13 @@ func (c *JavaAutobuilderConfig) runAutobuilder() error {
 		return true
 	}
 
-	err = javaRunner.ExecuteJavaCommand(autobuilderCommand, commandSucceeded)
+	cmdErr, err := javaRunner.ExecuteJavaCommand(autobuilderCommand, commandSucceeded)
 	if err != nil {
 		return fmt.Errorf("native autobuilder execution failed: %w", err)
 	}
+	if cmdErr != nil {
+		return fmt.Errorf("native autobuilder execution failed: %w", cmdErr)
+	}
 
 	config, err := validation.ValidateProjectModelOutput(c.outputDir)
 	if err != nil {
diff --git a/cli/cmd/root.go b/cli/cmd/root.go
index fea11a5a8..5ab892d04 100644
--- a/cli/cmd/root.go
+++ b/cli/cmd/root.go
@@ -130,6 +130,14 @@ func init() {
 
 	rootCmd.PersistentFlags().BoolVar(&globals.Config.SkipVerify, "skip-verify", false, "Skip SHA256 checksum verification of downloaded artifacts")
 	_ = viper.BindPFlag("skip-verify", rootCmd.PersistentFlags().Lookup("skip-verify"))
+
+	rootCmd.PersistentFlags().StringVar(&globals.Config.Analyzer.JarPath, "analyzer-jar", "", "Path to analyzer JAR (dev override, skips download)")
+	_ = rootCmd.PersistentFlags().MarkHidden("analyzer-jar")
+	_ = viper.BindPFlag("analyzer.jar_path", rootCmd.PersistentFlags().Lookup("analyzer-jar"))
+
+	rootCmd.PersistentFlags().StringVar(&globals.Config.Autobuilder.JarPath, "autobuilder-jar", "", "Path to autobuilder JAR (dev override, skips download)")
+	_ = rootCmd.PersistentFlags().MarkHidden("autobuilder-jar")
+	_ = viper.BindPFlag("autobuilder.jar_path", rootCmd.PersistentFlags().Lookup("autobuilder-jar"))
 }
 
 // initConfig reads in config file and ENV variables if set.
diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go
index 0be614740..819a5f55a 100644
--- a/cli/cmd/scan.go
+++ b/cli/cmd/scan.go
@@ -23,15 +23,20 @@ import (
 )
 
 var (
-	UserProjectPath           string
-	ProjectModelPath          string
-	SarifReportPath           string
-	SemgrepCompatibilitySarif bool
-	Severity                  []string
-	Ruleset                   []string
-	DryRunScan                bool
-	Recompile                 bool
-	ScanLogFile               string
+	UserProjectPath            string
+	ProjectModelPath           string
+	SarifReportPath            string
+	SemgrepCompatibilitySarif  bool
+	Severity                   []string
+	Ruleset                    []string
+	DryRunScan                 bool
+	Recompile                  bool
+	ScanLogFile                string
+	RuleID                     []string
+	ApproximationsConfig       []string
+	DataflowApproximations     []string
+	TrackExternalMethods       bool
+	DebugFactReachabilitySarif bool
 )
 
 type RulesetType struct {
@@ -125,6 +130,19 @@ func init() {
 	scanCmd.Flags().BoolVar(&Recompile, "recompile", false, "Force recompilation even if a cached project model exists")
 	scanCmd.Flags().StringVar(&ProjectModelPath, "project-model", "", "Path to a pre-compiled project model (skips compilation)")
 	scanCmd.Flags().StringVar(&ScanLogFile, "log-file", "", "Path to the log file (default: <cache-dir>/logs/<timestamp>.log)")
+	scanCmd.Flags().StringArrayVar(&RuleID, "rule-id", nil, "Filter active rules by ID (repeatable)")
+
+	scanCmd.Flags().StringArrayVar(&ApproximationsConfig, "approximations-config", nil, "YAML passThrough approximations config (OVERRIDE mode, repeatable)")
+	_ = scanCmd.PersistentFlags().MarkHidden("approximations-config")
+
+	scanCmd.Flags().StringArrayVar(&DataflowApproximations, "dataflow-approximations", nil, "Directory of compiled approximation class files (repeatable)")
+	_ = scanCmd.PersistentFlags().MarkHidden("dataflow-approximations")
+
+	scanCmd.Flags().BoolVar(&TrackExternalMethods, "track-external-methods", false, "Write external-methods-{without,with}-rules.yaml next to the SARIF report")
+	_ = scanCmd.PersistentFlags().MarkHidden("track-external-methods")
+
+	scanCmd.Flags().BoolVar(&DebugFactReachabilitySarif, "debug-fact-reachability-sarif", false, "Generate SARIF with fact reachability info (debug; use with a single rule only)")
+	_ = scanCmd.PersistentFlags().MarkHidden("debug-fact-reachability-sarif")
 }
 
 // currentScanBuilder returns a builder pre-populated with the user's current scan flags.
@@ -334,6 +352,19 @@ func scan(cmd *cobra.Command) {
 	if maxMemory != "" {
 		nativeBuilder.SetMaxMemory(maxMemory)
 	}
+	for _, ruleID := range RuleID {
+		nativeBuilder.AddRuleID(ruleID)
+	}
+	for _, approxConfig := range ApproximationsConfig {
+		absApproxConfig := log.AbsPathOrExit(approxConfig, "approximations-config")
+		nativeBuilder.AddApproximationsConfig(absApproxConfig)
+	}
+	if TrackExternalMethods {
+		nativeBuilder.SetTrackExternalMethods(true)
+	}
+	if DebugFactReachabilitySarif {
+		nativeBuilder.EnableDebugFactReachabilitySarif()
+	}
 
 	analyzerJarPath, err := ensureAnalyzerAvailable()
 	if err != nil {
@@ -341,6 +372,16 @@ func scan(cmd *cobra.Command) {
 	}
 	nativeBuilder.SetJarPath(analyzerJarPath)
 
+	// Process --dataflow-approximations: auto-compile .java sources if needed
+	for _, approxPath := range DataflowApproximations {
+		absApproxPath := log.AbsPathOrExit(approxPath, "dataflow-approximations")
+		compiledPath, compileErr := compileApproximationsIfNeeded(absApproxPath, analyzerJarPath, absProjectModelPath)
+		if compileErr != nil {
+			out.Fatalf("Approximation compilation failed: %s", compileErr)
+		}
+		nativeBuilder.AddDataflowApproximations(compiledPath)
+	}
+
 	analyzerJavaRunner := java.NewJavaRunner().
 		WithSkipVerify(globals.Config.SkipVerify).
 		WithDebugOutput(out.DebugStream("Analyzer")).
@@ -350,38 +391,68 @@ func scan(cmd *cobra.Command) {
 		out.Fatalf("Failed to resolve Java for analyzer: %s", err)
 	}
 
+	var analyzerFail *analyzerError
+	var scanCmdErr *java.JavaCommandError
 	if err := out.RunWithSpinner("Analyzing project", func() error {
-		return scanProject(nativeBuilder, analyzerJavaRunner)
+		var scanErr error
+		scanCmdErr, scanErr = scanProject(nativeBuilder, analyzerJavaRunner)
+		return scanErr
 	}); err != nil {
 		out.Fatalf("Native scan has failed: %s", err)
 	}
+	analyzerFail = classifyAnalyzerError(scanCmdErr)
 
+	// Always attempt to print summary information — even when the analyzer
+	// failed, partial SARIF and rule-load-trace files may have been written.
 	report, err := validation.ValidateSarifOutput(absSarifReportPath)
 	if err != nil {
 		output.LogInfof("Scan output validation failed: %v", err)
-		out.Fatalf("There was a problem during the scan step, check the full logs: %s", globals.LogPath)
+		if analyzerFail == nil {
+			// Analyzer reported success but produced no valid SARIF — treat as failure.
+			out.Error(fmt.Sprintf("There was a problem during the scan step, check the full logs: %s", globals.LogPath))
+			analyzerFail = &analyzerError{exitCode: 1, message: "scan output validation failed"}
+		}
 	}
 
 	out.Blank()
 
 	el, err := validation.ValidateRuleLoadTraceOutput(absSemgrepRuleLoadTracePath)
 	if err != nil {
-		out.Fatalf("Failed to validate rule load trace output: %s", err)
+		output.LogInfof("Rule load trace validation failed: %v", err)
+		if analyzerFail == nil {
+			out.Error(fmt.Sprintf("Failed to validate rule load trace output: %s", err))
+			analyzerFail = &analyzerError{exitCode: 1, message: "rule load trace validation failed"}
+		}
 	}
-	ruleLoadTraceSummary := load_trace.CollectRuleLoadTraceSummary(el, nonBuiltinRulesetPaths)
 
-	res := load_trace.CollectRulesetLoadErrorsSummary(ruleLoadTraceSummary)
-	ruleLoadErrorsResult := &res
+	if el != nil {
+		ruleLoadTraceSummary := load_trace.CollectRuleLoadTraceSummary(el, nonBuiltinRulesetPaths)
+
+		res := load_trace.CollectRulesetLoadErrorsSummary(ruleLoadTraceSummary)
+		ruleLoadErrorsResult := &res
 
-	sarifSummary := sarif.GenerateSummary(report)
-	load_trace.PrintRuleStatisticsTree(out, ruleLoadErrorsResult, absSemgrepRuleLoadTracePath, sarifSummary)
+		var sarifSummary sarif.Summary
+		if report != nil {
+			sarifSummary = sarif.GenerateSummary(report)
+		}
+		load_trace.PrintRuleStatisticsTree(out, ruleLoadErrorsResult, absSemgrepRuleLoadTracePath, sarifSummary)
 
-	load_trace.PrintSyntaxErrorReport(out, ruleLoadTraceSummary)
+		load_trace.PrintSyntaxErrorReport(out, ruleLoadTraceSummary)
+	}
 
-	// Process the generated SARIF report if it exists
-	printSarifSummary(report, absSarifReportPath)
+	if report != nil {
+		printSarifSummary(report, absSarifReportPath)
+	}
 
-	suggest("To view findings run", utils.NewSummaryCommand(absSarifReportPath).WithShowFindings().Build())
+	if SarifReportPath == "" {
+		utils.RemoveIfExistsOrExit(absSarifReportPath)
+	} else {
+		suggest("To view findings run", fmt.Sprintf("opentaint summary --show-findings %s", absSarifReportPath))
+	}
+
+	if analyzerFail != nil {
+		os.Exit(analyzerFail.exitCode)
+	}
 }
 
 func resolveScanConfig(absUserProjectRoot string) scanConfig {
@@ -502,6 +573,10 @@ func setupSemgrepRuleLoadTrace() string {
 }
 
 func ensureAnalyzerAvailable() (string, error) {
+	if globals.Config.Analyzer.JarPath != "" {
+		return globals.Config.Analyzer.JarPath, nil
+	}
+
 	analyzerJarPath, err := utils.GetAnalyzerJarPath(globals.Config.Analyzer.Version)
 	if err != nil {
 		return "", fmt.Errorf("failed to construct path to the analyzer: %w", err)
@@ -516,7 +591,7 @@ func ensureAnalyzerAvailable() (string, error) {
 	return analyzerJarPath, nil
 }
 
-func scanProject(analyzerBuilder *AnalyzerBuilder, javaRunner java.JavaRunner) error {
+func scanProject(analyzerBuilder *AnalyzerBuilder, javaRunner java.JavaRunner) (*java.JavaCommandError, error) {
 	analyzerCommand := analyzerBuilder.BuildNativeCommand()
 
 	commandSucceeded := func(err error) bool {
@@ -526,8 +601,6 @@ func scanProject(analyzerBuilder *AnalyzerBuilder, javaRunner java.JavaRunner) e
 		}
 		return true
 	}
-	// Execute the command using JavaRunner
-	err := javaRunner.ExecuteJavaCommand(analyzerCommand, commandSucceeded)
 
-	return err
+	return javaRunner.ExecuteJavaCommand(analyzerCommand, commandSucceeded)
 }
diff --git a/cli/internal/globals/global.go b/cli/internal/globals/global.go
index 8af14008f..8bcba3f39 100644
--- a/cli/internal/globals/global.go
+++ b/cli/internal/globals/global.go
@@ -58,10 +58,12 @@ type Github struct {
 
 type Analyzer struct {
 	Version string `mapstructure:"version"`
+	JarPath string `mapstructure:"jar_path"`
 }
 
 type Autobuilder struct {
 	Version string `mapstructure:"version"`
+	JarPath string `mapstructure:"jar_path"`
 }
 
 type Rules struct {
diff --git a/cli/internal/utils/java/runner.go b/cli/internal/utils/java/runner.go
index 1641e301b..a2878250f 100644
--- a/cli/internal/utils/java/runner.go
+++ b/cli/internal/utils/java/runner.go
@@ -24,6 +24,16 @@ const (
 	None
 )
 
+// JavaCommandError is returned when a Java process exits with a non-zero exit code.
+// It preserves the exit code so callers can interpret process-specific status values.
+type JavaCommandError struct {
+	ExitCode int
+}
+
+func (e *JavaCommandError) Error() string {
+	return fmt.Sprintf("java command failed with exit code %d", e.ExitCode)
+}
+
 type JavaRunner interface {
 	TrySystem() JavaRunner
 	TrySpecificVersion(version int) JavaRunner
@@ -35,7 +45,7 @@ type JavaRunner interface {
 	// Call this before wrapping ExecuteJavaCommand in a spinner to avoid
 	// download progress bars overlapping with spinner output.
 	EnsureJava() (string, error)
-	ExecuteJavaCommand(args []string, commandSucceeded func(error) bool) error
+	ExecuteJavaCommand(args []string, commandSucceeded func(error) bool) (*JavaCommandError, error)
 }
 
 type DebugLineWriter interface {
@@ -147,9 +157,9 @@ func (j *javaRunner) EnsureJava() (string, error) {
 	return "", fmt.Errorf("all Java resolution attempts failed")
 }
 
-func (j *javaRunner) ExecuteJavaCommand(args []string, commandSucceeded func(error) bool) error {
+func (j *javaRunner) ExecuteJavaCommand(args []string, commandSucceeded func(error) bool) (*JavaCommandError, error) {
 	if len(args) == 0 {
-		return fmt.Errorf("no Java command arguments provided")
+		return nil, fmt.Errorf("no Java command arguments provided")
 	}
 
 	// If EnsureJava was called, use the pre-resolved path directly
@@ -157,6 +167,7 @@ func (j *javaRunner) ExecuteJavaCommand(args []string, commandSucceeded func(err
 		return j.executeWithJava(j.resolvedJavaPath, Specific, args, commandSucceeded)
 	}
 
+	var lastCmdErr *JavaCommandError
 	resolutionStrategies := j.GetJavaResolutions()
 	for i, resolutionStrategy := range resolutionStrategies {
 		javaPath, strategy, err := resolutionStrategy()
@@ -165,17 +176,26 @@ func (j *javaRunner) ExecuteJavaCommand(args []string, commandSucceeded func(err
 			continue
 		}
 
-		if err := j.executeWithJava(javaPath, strategy, args, commandSucceeded); err == nil {
-			return nil
+		cmdErr, execErr := j.executeWithJava(javaPath, strategy, args, commandSucceeded)
+		if execErr != nil {
+			output.LogDebugf("Java command setup failed (attempt %d): %v", i+1, execErr)
+			continue
+		}
+		if cmdErr == nil {
+			return nil, nil
 		}
 
+		lastCmdErr = cmdErr
 		output.LogDebugf("Java command failed (attempt %d), trying next resolution", i+1)
 	}
 
-	return fmt.Errorf("all Java resolution attempts failed")
+	if lastCmdErr != nil {
+		return lastCmdErr, nil
+	}
+	return nil, fmt.Errorf("all Java resolution attempts failed")
 }
 
-func (j *javaRunner) executeWithJava(javaPath string, strategy ResolutionStrategy, args []string, commandSucceeded func(error) bool) error {
+func (j *javaRunner) executeWithJava(javaPath string, strategy ResolutionStrategy, args []string, commandSucceeded func(error) bool) (*JavaCommandError, error) {
 	cmdArgs := append([]string{javaPath}, args...)
 	cmd := exec.Command(cmdArgs[0], cmdArgs[1:]...)
 
@@ -190,16 +210,16 @@ func (j *javaRunner) executeWithJava(javaPath string, strategy ResolutionStrateg
 	// Create pipes for stdout and stderr
 	stdoutPipe, err := cmd.StdoutPipe()
 	if err != nil {
-		return fmt.Errorf("failed to create stdout pipe: %w", err)
+		return nil, fmt.Errorf("failed to create stdout pipe: %w", err)
 	}
 
 	stderrPipe, err := cmd.StderrPipe()
 	if err != nil {
-		return fmt.Errorf("failed to create stderr pipe: %w", err)
+		return nil, fmt.Errorf("failed to create stderr pipe: %w", err)
 	}
 
 	if err := cmd.Start(); err != nil {
-		return fmt.Errorf("failed to start Java command: %w", err)
+		return nil, fmt.Errorf("failed to start Java command: %w", err)
 	}
 
 	streamToTerminal := globals.Config.Output.Debug
@@ -230,9 +250,10 @@ func (j *javaRunner) executeWithJava(javaPath string, strategy ResolutionStrateg
 	// Wait for the command to finish
 	err = cmd.Wait()
 
-	// Log any errors at debug level (caller decides severity)
+	// Extract exit code from the process error
+	exitCode := 0
 	if err != nil {
-		exitCode := 1
+		exitCode = 1
 		if exitErr, ok := err.(*exec.ExitError); ok {
 			exitCode = exitErr.ExitCode()
 		}
@@ -240,10 +261,10 @@ func (j *javaRunner) executeWithJava(javaPath string, strategy ResolutionStrateg
 	}
 
 	if commandSucceeded(err) {
-		return nil
+		return nil, nil
 	}
 
-	return fmt.Errorf("java command failed")
+	return &JavaCommandError{ExitCode: exitCode}, nil
 }
 
 func (j *javaRunner) TrySpecificVersion(version int) JavaRunner {
diff --git a/cli/internal/utils/java/runner_test.go b/cli/internal/utils/java/runner_test.go
index a04b19cde..fa0815d0a 100644
--- a/cli/internal/utils/java/runner_test.go
+++ b/cli/internal/utils/java/runner_test.go
@@ -332,8 +332,11 @@ func TestJavaRunner_GetJavaResolutions_BothStrategies(t *testing.T) {
 func TestJavaRunner_ExecuteJavaCommand_NoArgs(t *testing.T) {
 	runner := NewJavaRunner()
 
-	err := runner.ExecuteJavaCommand([]string{}, func(error) bool { return true })
+	cmdErr, err := runner.ExecuteJavaCommand([]string{}, func(error) bool { return true })
 
+	if cmdErr != nil {
+		t.Error("Expected no JavaCommandError for missing arguments")
+	}
 	if err == nil {
 		t.Error("Expected error when no arguments provided")
 	}

From 5bf7a094ef39da43b08dac0274676346522553e6 Mon Sep 17 00:00:00 2001
From: Valentyn Sobol <8640896+Saloed@users.noreply.github.com>
Date: Wed, 22 Apr 2026 12:46:54 +0300
Subject: [PATCH 2/6] Add experimental flag

---
 cli/cmd/root.go | 59 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 58 insertions(+), 1 deletion(-)

diff --git a/cli/cmd/root.go b/cli/cmd/root.go
index 5ab892d04..4b15882a1 100644
--- a/cli/cmd/root.go
+++ b/cli/cmd/root.go
@@ -13,10 +13,16 @@ import (
 	"github.com/seqra/opentaint/internal/utils/log"
 	"github.com/seqra/opentaint/internal/version"
 	"github.com/spf13/cobra"
+	"github.com/spf13/pflag"
 	"github.com/spf13/viper"
 )
 
-var toolVersion bool
+const experimentalFlagName = "experimental"
+
+var (
+	toolVersion      bool
+	experimentalMode bool
+)
 
 // out is the global output printer used by all commands for user-facing output.
 // It is configured in PersistentPreRunE after logging is set up.
@@ -34,6 +40,8 @@ var rootCmd = &cobra.Command{
 	SilenceUsage:  true,
 
 	PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
+		applyExperimentalFlagVisibility(cmd.Root(), experimentalMode)
+
 		if err := log.SetUpLogs(); err != nil {
 			return fmt.Errorf("failed to set up logging: %w", err)
 		}
@@ -83,12 +91,15 @@ func Execute() {
 
 func init() {
 	cobra.OnInitialize(initConfig)
+	configureExperimentalFlagVisibility()
 
 	// Here you will define your flags and configuration settings.
 	// Cobra supports persistent flags, which, if defined here,
 	// will be global for your application.
 
 	rootCmd.PersistentFlags().StringVar(&globals.ConfigFile, "config", "", "Path to a config file")
+	rootCmd.PersistentFlags().BoolVar(&experimentalMode, experimentalFlagName, false, "Show experimental and hidden flags")
+	_ = rootCmd.PersistentFlags().MarkHidden(experimentalFlagName)
 
 	rootCmd.Flags().BoolVarP(&toolVersion, "version", "v", false, "Print the version information")
 
@@ -190,6 +201,52 @@ func addConfigFields(cmd *cobra.Command, sb *output.SectionBuilder) {
 	}
 }
 
+func configureExperimentalFlagVisibility() {
+	defaultHelpFunc := rootCmd.HelpFunc()
+	defaultUsageFunc := rootCmd.UsageFunc()
+
+	rootCmd.SetHelpFunc(func(cmd *cobra.Command, args []string) {
+		applyExperimentalFlagVisibility(cmd.Root(), experimentalMode)
+		defaultHelpFunc(cmd, args)
+	})
+	rootCmd.SetUsageFunc(func(cmd *cobra.Command) error {
+		applyExperimentalFlagVisibility(cmd.Root(), experimentalMode)
+		return defaultUsageFunc(cmd)
+	})
+}
+
+func applyExperimentalFlagVisibility(root *cobra.Command, enabled bool) {
+	if !enabled || root == nil {
+		return
+	}
+
+	visitCommandTree(root, func(cmd *cobra.Command) {
+		setFlagSetHidden(cmd.LocalFlags(), false)
+		setFlagSetHidden(cmd.PersistentFlags(), false)
+	})
+}
+
+func visitCommandTree(root *cobra.Command, visit func(*cobra.Command)) {
+	if root == nil {
+		return
+	}
+
+	visit(root)
+	for _, child := range root.Commands() {
+		visitCommandTree(child, visit)
+	}
+}
+
+func setFlagSetHidden(flags *pflag.FlagSet, hidden bool) {
+	if flags == nil {
+		return
+	}
+
+	flags.VisitAll(func(flag *pflag.Flag) {
+		flag.Hidden = hidden
+	})
+}
+
 // checkForUpdateAsync checks for a newer version in the background, throttled to once per day.
 func checkForUpdateAsync() {
 	currentVersion := version.GetVersion()

From 5499bb10776a7372576c8b68a9795fbb61745075 Mon Sep 17 00:00:00 2001
From: Valentyn Sobol <8640896+Saloed@users.noreply.github.com>
Date: Wed, 22 Apr 2026 13:08:55 +0300
Subject: [PATCH 3/6] Fix defer usage in approximations compilation

---
 cli/cmd/compile_approximations.go | 80 ++++++++++++++++++-------------
 1 file changed, 47 insertions(+), 33 deletions(-)

diff --git a/cli/cmd/compile_approximations.go b/cli/cmd/compile_approximations.go
index b09cae861..8b522d58a 100644
--- a/cli/cmd/compile_approximations.go
+++ b/cli/cmd/compile_approximations.go
@@ -155,7 +155,7 @@ func extractApproxClassesFromJar(jarPath string) (string, error) {
 	if err != nil {
 		return "", fmt.Errorf("failed to open JAR: %w", err)
 	}
-	defer r.Close()
+	defer func() { _ = r.Close() }()
 
 	extractDir, err := os.MkdirTemp("", "opentaint-approx-deps-*")
 	if err != nil {
@@ -164,44 +164,58 @@ func extractApproxClassesFromJar(jarPath string) (string, error) {
 
 	const prefix = "opentaint-dataflow-approximations/"
 	for _, f := range r.File {
-		if !strings.HasPrefix(f.Name, prefix) {
-			continue
-		}
-		if f.FileInfo().IsDir() {
-			continue
-		}
-		relPath := strings.TrimPrefix(f.Name, prefix)
-		if relPath == "" {
-			continue
-		}
-		destPath := filepath.Join(extractDir, relPath)
-		if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil {
-			_ = os.RemoveAll(extractDir)
-			return "", err
-		}
-		src, err := f.Open()
-		if err != nil {
-			_ = os.RemoveAll(extractDir)
-			return "", err
-		}
-		dst, err := os.Create(destPath)
-		if err != nil {
-			src.Close()
-			_ = os.RemoveAll(extractDir)
-			return "", err
-		}
-		_, err = io.Copy(dst, src)
-		src.Close()
-		dst.Close()
-		if err != nil {
-			_ = os.RemoveAll(extractDir)
-			return "", err
+		classExtErr := extractApproxClass(f, prefix, extractDir)
+		if classExtErr != nil {
+			return "", classExtErr
 		}
 	}
 
 	return extractDir, nil
 }
 
+func extractApproxClass(f *zip.File, prefix string, extractDir string) error {
+	if !strings.HasPrefix(f.Name, prefix) {
+		return nil
+	}
+	if f.FileInfo().IsDir() {
+		return nil
+	}
+
+	relPath := strings.TrimPrefix(f.Name, prefix)
+	if relPath == "" {
+		return nil
+	}
+
+	destPath := filepath.Join(extractDir, relPath)
+	if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil {
+		_ = os.RemoveAll(extractDir)
+		return err
+	}
+
+	src, err := f.Open()
+	if err != nil {
+		_ = os.RemoveAll(extractDir)
+		return err
+	}
+	defer func() { _ = src.Close() }()
+
+	dst, err := os.Create(destPath)
+	if err != nil {
+		_ = os.RemoveAll(extractDir)
+		return err
+	}
+	defer func() { _ = dst.Close() }()
+
+	_, err = io.Copy(dst, src)
+
+	if err != nil {
+		_ = os.RemoveAll(extractDir)
+		return err
+	}
+
+	return nil
+}
+
 // deriveJavacPath returns the path to javac given the path to java.
 func deriveJavacPath(javaPath string) string {
 	dir := filepath.Dir(javaPath)

From 42051e8f7f683e6b475ed51309d8674bbf2cfdbc Mon Sep 17 00:00:00 2001
From: Valentyn Sobol <8640896+Saloed@users.noreply.github.com>
Date: Wed, 22 Apr 2026 13:37:33 +0300
Subject: [PATCH 4/6] Avoid sarif removal

---
 cli/cmd/scan.go | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go
index 819a5f55a..21a2f0981 100644
--- a/cli/cmd/scan.go
+++ b/cli/cmd/scan.go
@@ -402,8 +402,6 @@ func scan(cmd *cobra.Command) {
 	}
 	analyzerFail = classifyAnalyzerError(scanCmdErr)
 
-	// Always attempt to print summary information — even when the analyzer
-	// failed, partial SARIF and rule-load-trace files may have been written.
 	report, err := validation.ValidateSarifOutput(absSarifReportPath)
 	if err != nil {
 		output.LogInfof("Scan output validation failed: %v", err)
@@ -442,12 +440,7 @@ func scan(cmd *cobra.Command) {
 
 	if report != nil {
 		printSarifSummary(report, absSarifReportPath)
-	}
-
-	if SarifReportPath == "" {
-		utils.RemoveIfExistsOrExit(absSarifReportPath)
-	} else {
-		suggest("To view findings run", fmt.Sprintf("opentaint summary --show-findings %s", absSarifReportPath))
+		suggest("To view findings run", utils.NewSummaryCommand(absSarifReportPath).WithShowFindings().Build())
 	}
 
 	if analyzerFail != nil {

From 8b91a34c49305f12f2b1a8e98d2b3be7e5f6a2d3 Mon Sep 17 00:00:00 2001
From: Aleksandr Misonizhnik <misonijnik@gmail.com>
Date: Wed, 22 Apr 2026 14:30:09 +0300
Subject: [PATCH 5/6] refactor: hide experimental scan flags and extract
 analyzer domain

Fix silent MarkHidden on the scan command: flags registered with
scanCmd.Flags() were being hidden via scanCmd.PersistentFlags(), whose
Lookup could not find them, so the MarkHidden error was swallowed and
--code-flow-limit, --approximations-config, --dataflow-approximations,
--track-external-methods, and --debug-fact-reachability-sarif all
leaked into the default help output.

Separate analyzer domain logic from the cmd layer:
- internal/analyzer/exit.go owns exit-code constants, ExitMessage,
  Error type, and Classify (no I/O); scan.go handles presentation
- java.DeriveJavacPath moves to internal/utils/java/detection.go and
  is now Windows-aware
- utils.ExtractZipPrefix + copyZipEntry consolidate zip-prefix
  extraction with deferred cleanup, removing four repeated
  os.RemoveAll calls
- compile_approximations.go is split into collectJavaSources,
  resolveJavacPath, buildApproxClasspath, runJavac
---
 cli/cmd/analyzer_exit.go             |  68 ----------
 cli/cmd/compile_approximations.go    | 187 ++++++++++-----------------
 cli/cmd/scan.go                      |  23 ++--
 cli/internal/analyzer/exit.go        |  69 ++++++++++
 cli/internal/utils/extract.go        |  52 ++++++++
 cli/internal/utils/java/detection.go |  13 ++
 6 files changed, 214 insertions(+), 198 deletions(-)
 delete mode 100644 cli/cmd/analyzer_exit.go
 create mode 100644 cli/internal/analyzer/exit.go

diff --git a/cli/cmd/analyzer_exit.go b/cli/cmd/analyzer_exit.go
deleted file mode 100644
index 8dd5a3cf4..000000000
--- a/cli/cmd/analyzer_exit.go
+++ /dev/null
@@ -1,68 +0,0 @@
-package cmd
-
-import (
-	"fmt"
-
-	"github.com/seqra/opentaint/internal/utils/java"
-)
-
-// Analyzer exit codes as seen by the OS (unsigned byte values).
-// These correspond to the Kotlin exitProcess() calls in AbstractAnalyzerRunner:
-//
-//	exitProcess(-1)  → 255  (project configuration error)
-//	exitProcess(-2)  → 254  (analysis timeout)
-//	exitProcess(-3)  → 253  (out of memory)
-//	exitProcess(-4)  → 252  (unhandled exception)
-const (
-	analyzerExitConfigError = 255
-	analyzerExitTimeout     = 254
-	analyzerExitOOM         = 253
-	analyzerExitException   = 252
-)
-
-// analyzerError holds information about an analyzer failure.
-// exitCode is the process exit code to forward to os.Exit.
-type analyzerError struct {
-	exitCode int
-	message  string
-}
-
-// analyzerExitMessage returns a human-readable description for a known
-// analyzer exit code, or empty string if the code is not recognized.
-func analyzerExitMessage(code int) string {
-	switch code {
-	case analyzerExitConfigError:
-		return "project configuration error"
-	case analyzerExitTimeout:
-		return "analysis timed out — try increasing --timeout or --max-memory"
-	case analyzerExitOOM:
-		return "out of memory — try increasing --max-memory (e.g. --max-memory 16G)"
-	case analyzerExitException:
-		return "unhandled analyzer exception"
-	default:
-		return ""
-	}
-}
-
-// classifyAnalyzerError converts a *JavaCommandError into an *analyzerError
-// with a human-readable message. Returns nil when cmdErr is nil.
-//
-// The error message is printed immediately. The caller is responsible for
-// eventually calling os.Exit with the returned exit code after performing
-// any post-failure work (e.g. printing summaries).
-func classifyAnalyzerError(cmdErr *java.JavaCommandError) *analyzerError {
-	if cmdErr == nil {
-		return nil
-	}
-
-	code := cmdErr.ExitCode
-	if msg := analyzerExitMessage(code); msg != "" {
-		formatted := fmt.Sprintf("Analysis failed (exit code %d): %s", code, msg)
-		out.Error(formatted)
-		return &analyzerError{exitCode: code, message: formatted}
-	}
-
-	formatted := fmt.Sprintf("Analysis failed with exit code %d", code)
-	out.Error(formatted)
-	return &analyzerError{exitCode: code, message: formatted}
-}
diff --git a/cli/cmd/compile_approximations.go b/cli/cmd/compile_approximations.go
index 8b522d58a..ebd0ec3b2 100644
--- a/cli/cmd/compile_approximations.go
+++ b/cli/cmd/compile_approximations.go
@@ -1,9 +1,7 @@
 package cmd
 
 import (
-	"archive/zip"
 	"fmt"
-	"io"
 	"os"
 	"os/exec"
 	"path/filepath"
@@ -11,10 +9,15 @@ import (
 
 	"github.com/seqra/opentaint/internal/globals"
 	"github.com/seqra/opentaint/internal/output"
+	"github.com/seqra/opentaint/internal/utils"
 	"github.com/seqra/opentaint/internal/utils/java"
 	"github.com/seqra/opentaint/internal/utils/project"
 )
 
+// approxClassesJarPrefix is the path prefix under which the analyzer fat JAR
+// bundles approximation support sources (OpentaintNdUtil, ArgumentTypeContext).
+const approxClassesJarPrefix = "opentaint-dataflow-approximations/"
+
 // compileApproximationsIfNeeded checks whether a --dataflow-approximations directory
 // contains .java source files. If so, it compiles them using javac (with the
 // analyzer JAR + project dependencies on the classpath) and returns the path to
@@ -29,15 +32,53 @@ func compileApproximationsIfNeeded(approxPath string, analyzerJarPath string, pr
 	if err != nil {
 		return "", fmt.Errorf("approximation path does not exist: %w", err)
 	}
-
-	// If it's a single file, return as-is (nothing to compile)
 	if !info.IsDir() {
 		return approxPath, nil
 	}
 
-	// Collect .java files in the directory tree
+	javaFiles, err := collectJavaSources(approxPath)
+	if err != nil {
+		return "", err
+	}
+	if len(javaFiles) == 0 {
+		return approxPath, nil
+	}
+
+	output.LogInfof("Found %d .java file(s) in approximations directory, compiling...", len(javaFiles))
+
+	javacPath, err := resolveJavacPath()
+	if err != nil {
+		return "", err
+	}
+
+	extractedDir, err := os.MkdirTemp("", "opentaint-approx-deps-*")
+	if err != nil {
+		return "", fmt.Errorf("failed to create temp directory for approximation deps: %w", err)
+	}
+	defer func() { _ = os.RemoveAll(extractedDir) }()
+
+	if err := utils.ExtractZipPrefix(analyzerJarPath, approxClassesJarPrefix, extractedDir); err != nil {
+		return "", fmt.Errorf("failed to extract approximation classes from analyzer JAR: %w", err)
+	}
+
+	outputDir, err := os.MkdirTemp("", "opentaint-approx-compiled-*")
+	if err != nil {
+		return "", fmt.Errorf("failed to create temp directory for compiled approximations: %w", err)
+	}
+
+	classpath := buildApproxClasspath(analyzerJarPath, extractedDir, projectModelDir)
+	if err := runJavac(javacPath, classpath, outputDir, javaFiles); err != nil {
+		_ = os.RemoveAll(outputDir)
+		return "", err
+	}
+
+	output.LogInfof("Approximation compilation succeeded, output: %s", outputDir)
+	return outputDir, nil
+}
+
+func collectJavaSources(root string) ([]string, error) {
 	var javaFiles []string
-	_ = filepath.Walk(approxPath, func(path string, fi os.FileInfo, walkErr error) error {
+	err := filepath.Walk(root, func(path string, fi os.FileInfo, walkErr error) error {
 		if walkErr != nil {
 			return walkErr
 		}
@@ -46,15 +87,13 @@ func compileApproximationsIfNeeded(approxPath string, analyzerJarPath string, pr
 		}
 		return nil
 	})
-
-	if len(javaFiles) == 0 {
-		// No Java sources — directory may contain .class files or be empty; pass through.
-		return approxPath, nil
+	if err != nil {
+		return nil, fmt.Errorf("failed to walk approximations directory: %w", err)
 	}
+	return javaFiles, nil
+}
 
-	output.LogInfof("Found %d .java file(s) in approximations directory, compiling...", len(javaFiles))
-
-	// Resolve javac from the managed JDK
+func resolveJavacPath() (string, error) {
 	javacRunner := java.NewJavaRunner().
 		WithSkipVerify(globals.Config.SkipVerify).
 		WithImageType(java.AdoptiumImageJDK).
@@ -66,34 +105,24 @@ func compileApproximationsIfNeeded(approxPath string, analyzerJarPath string, pr
 		return "", fmt.Errorf("failed to resolve Java for approximation compilation: %w", err)
 	}
 
-	javacPath := deriveJavacPath(javaPath)
+	javacPath := java.DeriveJavacPath(javaPath)
 	if _, err := os.Stat(javacPath); err != nil {
 		return "", fmt.Errorf("javac not found at %s (resolved from java at %s). A JDK (not JRE) is required to compile approximation sources", javacPath, javaPath)
 	}
+	return javacPath, nil
+}
 
-	// Extract approximation support classes from the analyzer JAR.
-	// The JAR bundles utility classes (OpentaintNdUtil, ArgumentTypeContext)
-	// under "opentaint-dataflow-approximations/" prefix.
-	extractedDir, err := extractApproxClassesFromJar(analyzerJarPath)
-	if err != nil {
-		return "", fmt.Errorf("failed to extract approximation classes from analyzer JAR: %w", err)
-	}
-
-	// Create temp output directory for compiled .class files
-	outputDir, err := os.MkdirTemp("", "opentaint-approx-compiled-*")
-	if err != nil {
-		_ = os.RemoveAll(extractedDir)
-		return "", fmt.Errorf("failed to create temp directory for compiled approximations: %w", err)
-	}
-
-	// Build classpath:
-	// 1. Analyzer JAR — contains @Approximate, @ApproximateByName annotations
-	// 2. Extracted approximation utilities — OpentaintNdUtil, ArgumentTypeContext
-	// 3. Project dependencies — library JARs that approximation code may reference
-	cpParts := []string{analyzerJarPath, extractedDir}
-	cpParts = append(cpParts, resolveProjectDependencies(projectModelDir)...)
-	classpath := strings.Join(cpParts, string(os.PathListSeparator))
+// buildApproxClasspath assembles the javac classpath for approximation compilation:
+//  1. Analyzer JAR — contains @Approximate, @ApproximateByName annotations
+//  2. Extracted approximation utilities — OpentaintNdUtil, ArgumentTypeContext
+//  3. Project dependencies — library JARs that approximation code may reference
+func buildApproxClasspath(analyzerJarPath, extractedDir, projectModelDir string) string {
+	parts := []string{analyzerJarPath, extractedDir}
+	parts = append(parts, resolveProjectDependencies(projectModelDir)...)
+	return strings.Join(parts, string(os.PathListSeparator))
+}
 
+func runJavac(javacPath, classpath, outputDir string, javaFiles []string) error {
 	args := []string{
 		"-source", "8",
 		"-target", "8",
@@ -106,20 +135,13 @@ func compileApproximationsIfNeeded(approxPath string, analyzerJarPath string, pr
 
 	cmd := exec.Command(javacPath, args...)
 	cmdOutput, cmdErr := cmd.CombinedOutput()
-
-	// Always clean up extracted dependencies
-	_ = os.RemoveAll(extractedDir)
-
 	if cmdErr != nil {
-		_ = os.RemoveAll(outputDir)
-		return "", fmt.Errorf(
+		return fmt.Errorf(
 			"approximation compilation failed:\n%s\njavac exited with: %w",
 			string(cmdOutput), cmdErr,
 		)
 	}
-
-	output.LogInfof("Approximation compilation succeeded, output: %s", outputDir)
-	return outputDir, nil
+	return nil
 }
 
 // resolveProjectDependencies reads project.yaml from the project model directory
@@ -146,78 +168,3 @@ func resolveProjectDependencies(projectModelDir string) []string {
 	output.LogDebugf("Resolved %d project dependencies for approximation classpath", len(absDeps))
 	return absDeps
 }
-
-// extractApproxClassesFromJar extracts bundled approximation support classes
-// from the analyzer fat JAR. These are stored under "opentaint-dataflow-approximations/"
-// prefix and need standard package structure for javac to find them.
-func extractApproxClassesFromJar(jarPath string) (string, error) {
-	r, err := zip.OpenReader(jarPath)
-	if err != nil {
-		return "", fmt.Errorf("failed to open JAR: %w", err)
-	}
-	defer func() { _ = r.Close() }()
-
-	extractDir, err := os.MkdirTemp("", "opentaint-approx-deps-*")
-	if err != nil {
-		return "", err
-	}
-
-	const prefix = "opentaint-dataflow-approximations/"
-	for _, f := range r.File {
-		classExtErr := extractApproxClass(f, prefix, extractDir)
-		if classExtErr != nil {
-			return "", classExtErr
-		}
-	}
-
-	return extractDir, nil
-}
-
-func extractApproxClass(f *zip.File, prefix string, extractDir string) error {
-	if !strings.HasPrefix(f.Name, prefix) {
-		return nil
-	}
-	if f.FileInfo().IsDir() {
-		return nil
-	}
-
-	relPath := strings.TrimPrefix(f.Name, prefix)
-	if relPath == "" {
-		return nil
-	}
-
-	destPath := filepath.Join(extractDir, relPath)
-	if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil {
-		_ = os.RemoveAll(extractDir)
-		return err
-	}
-
-	src, err := f.Open()
-	if err != nil {
-		_ = os.RemoveAll(extractDir)
-		return err
-	}
-	defer func() { _ = src.Close() }()
-
-	dst, err := os.Create(destPath)
-	if err != nil {
-		_ = os.RemoveAll(extractDir)
-		return err
-	}
-	defer func() { _ = dst.Close() }()
-
-	_, err = io.Copy(dst, src)
-
-	if err != nil {
-		_ = os.RemoveAll(extractDir)
-		return err
-	}
-
-	return nil
-}
-
-// deriveJavacPath returns the path to javac given the path to java.
-func deriveJavacPath(javaPath string) string {
-	dir := filepath.Dir(javaPath)
-	return filepath.Join(dir, "javac")
-}
diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go
index 21a2f0981..9d909e52c 100644
--- a/cli/cmd/scan.go
+++ b/cli/cmd/scan.go
@@ -6,6 +6,7 @@ import (
 	"path/filepath"
 	"time"
 
+	"github.com/seqra/opentaint/internal/analyzer"
 	"github.com/seqra/opentaint/internal/load_trace"
 	"github.com/seqra/opentaint/internal/sarif"
 	"github.com/seqra/opentaint/internal/validation"
@@ -124,7 +125,7 @@ func init() {
 	scanCmd.Flags().StringVar(&globals.Config.Scan.MaxMemory, "max-memory", "8G", "Maximum memory for the analyzer (e.g., 1024m, 8G, 81920k, 83886080)")
 	_ = viper.BindPFlag("scan.max_memory", scanCmd.Flags().Lookup("max-memory"))
 	scanCmd.Flags().Int64Var(&globals.Config.Scan.CodeFlowLimit, "code-flow-limit", 0, "Maximum number of code flows to include in the report (0 = unlimited)")
-	_ = scanCmd.PersistentFlags().MarkHidden("code-flow-limit")
+	_ = scanCmd.Flags().MarkHidden("code-flow-limit")
 	_ = viper.BindPFlag("scan.code_flow_limit", scanCmd.Flags().Lookup("code-flow-limit"))
 	scanCmd.Flags().BoolVar(&DryRunScan, "dry-run", false, "Validate inputs and show what would run without compiling or scanning")
 	scanCmd.Flags().BoolVar(&Recompile, "recompile", false, "Force recompilation even if a cached project model exists")
@@ -133,16 +134,16 @@ func init() {
 	scanCmd.Flags().StringArrayVar(&RuleID, "rule-id", nil, "Filter active rules by ID (repeatable)")
 
 	scanCmd.Flags().StringArrayVar(&ApproximationsConfig, "approximations-config", nil, "YAML passThrough approximations config (OVERRIDE mode, repeatable)")
-	_ = scanCmd.PersistentFlags().MarkHidden("approximations-config")
+	_ = scanCmd.Flags().MarkHidden("approximations-config")
 
 	scanCmd.Flags().StringArrayVar(&DataflowApproximations, "dataflow-approximations", nil, "Directory of compiled approximation class files (repeatable)")
-	_ = scanCmd.PersistentFlags().MarkHidden("dataflow-approximations")
+	_ = scanCmd.Flags().MarkHidden("dataflow-approximations")
 
 	scanCmd.Flags().BoolVar(&TrackExternalMethods, "track-external-methods", false, "Write external-methods-{without,with}-rules.yaml next to the SARIF report")
-	_ = scanCmd.PersistentFlags().MarkHidden("track-external-methods")
+	_ = scanCmd.Flags().MarkHidden("track-external-methods")
 
 	scanCmd.Flags().BoolVar(&DebugFactReachabilitySarif, "debug-fact-reachability-sarif", false, "Generate SARIF with fact reachability info (debug; use with a single rule only)")
-	_ = scanCmd.PersistentFlags().MarkHidden("debug-fact-reachability-sarif")
+	_ = scanCmd.Flags().MarkHidden("debug-fact-reachability-sarif")
 }
 
 // currentScanBuilder returns a builder pre-populated with the user's current scan flags.
@@ -391,7 +392,7 @@ func scan(cmd *cobra.Command) {
 		out.Fatalf("Failed to resolve Java for analyzer: %s", err)
 	}
 
-	var analyzerFail *analyzerError
+	var analyzerFail *analyzer.Error
 	var scanCmdErr *java.JavaCommandError
 	if err := out.RunWithSpinner("Analyzing project", func() error {
 		var scanErr error
@@ -400,7 +401,9 @@ func scan(cmd *cobra.Command) {
 	}); err != nil {
 		out.Fatalf("Native scan has failed: %s", err)
 	}
-	analyzerFail = classifyAnalyzerError(scanCmdErr)
+	if analyzerFail = analyzer.Classify(scanCmdErr); analyzerFail != nil {
+		out.Error(analyzerFail.Message)
+	}
 
 	report, err := validation.ValidateSarifOutput(absSarifReportPath)
 	if err != nil {
@@ -408,7 +411,7 @@ func scan(cmd *cobra.Command) {
 		if analyzerFail == nil {
 			// Analyzer reported success but produced no valid SARIF — treat as failure.
 			out.Error(fmt.Sprintf("There was a problem during the scan step, check the full logs: %s", globals.LogPath))
-			analyzerFail = &analyzerError{exitCode: 1, message: "scan output validation failed"}
+			analyzerFail = &analyzer.Error{ExitCode: 1, Message: "scan output validation failed"}
 		}
 	}
 
@@ -419,7 +422,7 @@ func scan(cmd *cobra.Command) {
 		output.LogInfof("Rule load trace validation failed: %v", err)
 		if analyzerFail == nil {
 			out.Error(fmt.Sprintf("Failed to validate rule load trace output: %s", err))
-			analyzerFail = &analyzerError{exitCode: 1, message: "rule load trace validation failed"}
+			analyzerFail = &analyzer.Error{ExitCode: 1, Message: "rule load trace validation failed"}
 		}
 	}
 
@@ -444,7 +447,7 @@ func scan(cmd *cobra.Command) {
 	}
 
 	if analyzerFail != nil {
-		os.Exit(analyzerFail.exitCode)
+		os.Exit(analyzerFail.ExitCode)
 	}
 }
 
diff --git a/cli/internal/analyzer/exit.go b/cli/internal/analyzer/exit.go
new file mode 100644
index 000000000..6d38e7599
--- /dev/null
+++ b/cli/internal/analyzer/exit.go
@@ -0,0 +1,69 @@
+// Package analyzer holds OpenTaint analyzer domain logic that is independent
+// of the CLI presentation layer. It currently covers exit-code classification
+// for analyzer process failures.
+package analyzer
+
+import (
+	"fmt"
+
+	"github.com/seqra/opentaint/internal/utils/java"
+)
+
+// Analyzer exit codes as seen by the OS (unsigned byte values).
+// These correspond to the Kotlin exitProcess() calls in AbstractAnalyzerRunner:
+//
+//	exitProcess(-1)  → 255  (project configuration error)
+//	exitProcess(-2)  → 254  (analysis timeout)
+//	exitProcess(-3)  → 253  (out of memory)
+//	exitProcess(-4)  → 252  (unhandled exception)
+const (
+	ExitConfigError = 255
+	ExitTimeout     = 254
+	ExitOOM         = 253
+	ExitException   = 252
+)
+
+// Error holds information about an analyzer failure. ExitCode is the process
+// exit code to forward to os.Exit; Message is a human-readable description.
+type Error struct {
+	ExitCode int
+	Message  string
+}
+
+// ExitMessage returns a human-readable description for a known analyzer exit
+// code, or empty string if the code is not recognized.
+func ExitMessage(code int) string {
+	switch code {
+	case ExitConfigError:
+		return "project configuration error"
+	case ExitTimeout:
+		return "analysis timed out — try increasing --timeout or --max-memory"
+	case ExitOOM:
+		return "out of memory — try increasing --max-memory (e.g. --max-memory 16G)"
+	case ExitException:
+		return "unhandled analyzer exception"
+	default:
+		return ""
+	}
+}
+
+// Classify converts a *java.JavaCommandError into an *Error with a formatted
+// message. Returns nil when cmdErr is nil. No I/O is performed — the caller
+// is responsible for presentation and for calling os.Exit(Error.ExitCode).
+func Classify(cmdErr *java.JavaCommandError) *Error {
+	if cmdErr == nil {
+		return nil
+	}
+
+	code := cmdErr.ExitCode
+	if msg := ExitMessage(code); msg != "" {
+		return &Error{
+			ExitCode: code,
+			Message:  fmt.Sprintf("Analysis failed (exit code %d): %s", code, msg),
+		}
+	}
+	return &Error{
+		ExitCode: code,
+		Message:  fmt.Sprintf("Analysis failed with exit code %d", code),
+	}
+}
diff --git a/cli/internal/utils/extract.go b/cli/internal/utils/extract.go
index c2262a133..90421152f 100644
--- a/cli/internal/utils/extract.go
+++ b/cli/internal/utils/extract.go
@@ -56,6 +56,58 @@ func ExtractTar(tr *tar.Reader, basePath, destPath string, isSourceDir bool) err
 	return nil
 }
 
+// ExtractZipPrefix extracts entries whose names begin with prefix from the zip
+// at src into destDir, stripping prefix from each entry's relative path.
+// Directory entries are skipped — parent directories are created on demand.
+// Returns an error if the zip cannot be opened or any entry fails to write.
+func ExtractZipPrefix(src, prefix, destDir string) error {
+	r, err := zip.OpenReader(src)
+	if err != nil {
+		return fmt.Errorf("failed to open zip: %w", err)
+	}
+	defer func() { _ = r.Close() }()
+
+	for _, f := range r.File {
+		if !strings.HasPrefix(f.Name, prefix) || f.FileInfo().IsDir() {
+			continue
+		}
+		relPath := strings.TrimPrefix(f.Name, prefix)
+		if relPath == "" {
+			continue
+		}
+		if err := copyZipEntry(f, filepath.Join(destDir, relPath)); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func copyZipEntry(f *zip.File, destPath string) (err error) {
+	if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil {
+		return err
+	}
+	src, err := f.Open()
+	if err != nil {
+		return err
+	}
+	defer func() {
+		if cerr := src.Close(); cerr != nil && err == nil {
+			err = cerr
+		}
+	}()
+	dst, err := os.Create(destPath)
+	if err != nil {
+		return err
+	}
+	defer func() {
+		if cerr := dst.Close(); cerr != nil && err == nil {
+			err = cerr
+		}
+	}()
+	_, err = io.Copy(dst, src)
+	return err
+}
+
 // ExtractZip extracts the contents of a ZIP file to the specified destination directory.
 func ExtractZip(src, dest string) error {
 	zr, err := zip.OpenReader(src)
diff --git a/cli/internal/utils/java/detection.go b/cli/internal/utils/java/detection.go
index b2cde7b87..736bfb1ca 100644
--- a/cli/internal/utils/java/detection.go
+++ b/cli/internal/utils/java/detection.go
@@ -3,7 +3,9 @@ package java
 import (
 	"os"
 	"os/exec"
+	"path/filepath"
 	"regexp"
+	"runtime"
 	"strconv"
 	"strings"
 
@@ -130,6 +132,17 @@ func validateJavaInstallation(javaPath string) *JavaInstallation {
 	return installation
 }
 
+// DeriveJavacPath returns the javac binary path that sits next to the given
+// java binary. On Windows the ".exe" suffix is preserved.
+func DeriveJavacPath(javaPath string) string {
+	dir := filepath.Dir(javaPath)
+	name := "javac"
+	if runtime.GOOS == "windows" {
+		name = "javac.exe"
+	}
+	return filepath.Join(dir, name)
+}
+
 func extractVendor(versionOutput string) string {
 	output := strings.ToLower(versionOutput)
 

From 2b66e1fa2d63003dbd16cfaf21dc8fa3e5cfa8c9 Mon Sep 17 00:00:00 2001
From: Aleksandr Misonizhnik <misonijnik@gmail.com>
Date: Wed, 22 Apr 2026 15:05:14 +0300
Subject: [PATCH 6/6] fix: Make code flow limit option visible

---
 cli/cmd/scan.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go
index 9d909e52c..0560753d5 100644
--- a/cli/cmd/scan.go
+++ b/cli/cmd/scan.go
@@ -125,7 +125,6 @@ func init() {
 	scanCmd.Flags().StringVar(&globals.Config.Scan.MaxMemory, "max-memory", "8G", "Maximum memory for the analyzer (e.g., 1024m, 8G, 81920k, 83886080)")
 	_ = viper.BindPFlag("scan.max_memory", scanCmd.Flags().Lookup("max-memory"))
 	scanCmd.Flags().Int64Var(&globals.Config.Scan.CodeFlowLimit, "code-flow-limit", 0, "Maximum number of code flows to include in the report (0 = unlimited)")
-	_ = scanCmd.Flags().MarkHidden("code-flow-limit")
 	_ = viper.BindPFlag("scan.code_flow_limit", scanCmd.Flags().Lookup("code-flow-limit"))
 	scanCmd.Flags().BoolVar(&DryRunScan, "dry-run", false, "Validate inputs and show what would run without compiling or scanning")
 	scanCmd.Flags().BoolVar(&Recompile, "recompile", false, "Force recompilation even if a cached project model exists")