From a906ff4a4043a9ee98fa6e4df8c80ad589fdc778 Mon Sep 17 00:00:00 2001 From: Valentyn Sobol <8640896+Saloed@users.noreply.github.com> Date: Wed, 22 Apr 2026 12:35:42 +0300 Subject: [PATCH 1/6] Add scan experimental flags --- cli/cmd/analyzer_exit.go | 68 ++++++++ cli/cmd/command_builder.go | 80 ++++++++-- cli/cmd/compile.go | 10 +- cli/cmd/compile_approximations.go | 209 +++++++++++++++++++++++++ cli/cmd/project.go | 5 +- cli/cmd/root.go | 8 + cli/cmd/scan.go | 123 ++++++++++++--- cli/internal/globals/global.go | 2 + cli/internal/utils/java/runner.go | 49 ++++-- cli/internal/utils/java/runner_test.go | 5 +- 10 files changed, 502 insertions(+), 57 deletions(-) create mode 100644 cli/cmd/analyzer_exit.go create mode 100644 cli/cmd/compile_approximations.go diff --git a/cli/cmd/analyzer_exit.go b/cli/cmd/analyzer_exit.go new file mode 100644 index 000000000..8dd5a3cf4 --- /dev/null +++ b/cli/cmd/analyzer_exit.go @@ -0,0 +1,68 @@ +package cmd + +import ( + "fmt" + + "github.com/seqra/opentaint/internal/utils/java" +) + +// Analyzer exit codes as seen by the OS (unsigned byte values). +// These correspond to the Kotlin exitProcess() calls in AbstractAnalyzerRunner: +// +// exitProcess(-1) → 255 (project configuration error) +// exitProcess(-2) → 254 (analysis timeout) +// exitProcess(-3) → 253 (out of memory) +// exitProcess(-4) → 252 (unhandled exception) +const ( + analyzerExitConfigError = 255 + analyzerExitTimeout = 254 + analyzerExitOOM = 253 + analyzerExitException = 252 +) + +// analyzerError holds information about an analyzer failure. +// exitCode is the process exit code to forward to os.Exit. +type analyzerError struct { + exitCode int + message string +} + +// analyzerExitMessage returns a human-readable description for a known +// analyzer exit code, or empty string if the code is not recognized. +func analyzerExitMessage(code int) string { + switch code { + case analyzerExitConfigError: + return "project configuration error" + case analyzerExitTimeout: + return "analysis timed out — try increasing --timeout or --max-memory" + case analyzerExitOOM: + return "out of memory — try increasing --max-memory (e.g. --max-memory 16G)" + case analyzerExitException: + return "unhandled analyzer exception" + default: + return "" + } +} + +// classifyAnalyzerError converts a *JavaCommandError into an *analyzerError +// with a human-readable message. Returns nil when cmdErr is nil. +// +// The error message is printed immediately. The caller is responsible for +// eventually calling os.Exit with the returned exit code after performing +// any post-failure work (e.g. printing summaries). +func classifyAnalyzerError(cmdErr *java.JavaCommandError) *analyzerError { + if cmdErr == nil { + return nil + } + + code := cmdErr.ExitCode + if msg := analyzerExitMessage(code); msg != "" { + formatted := fmt.Sprintf("Analysis failed (exit code %d): %s", code, msg) + out.Error(formatted) + return &analyzerError{exitCode: code, message: formatted} + } + + formatted := fmt.Sprintf("Analysis failed with exit code %d", code) + out.Error(formatted) + return &analyzerError{exitCode: code, message: formatted} +} diff --git a/cli/cmd/command_builder.go b/cli/cmd/command_builder.go index 772957de1..aa353a07b 100644 --- a/cli/cmd/command_builder.go +++ b/cli/cmd/command_builder.go @@ -42,21 +42,26 @@ func NewAutobuilderBuilder() *AutobuilderBuilder { type AnalyzerBuilder struct { *BaseCommandBuilder - projectPath string - outputDir string - sarifFileName string - sarifCodeFlowLimit int64 - sarifToolVersion string - sarifToolSemanticVersion string - sarifUriBase string - semgrepCompatibility bool - partialFingerprints bool - ifdsAnalysisTimeout int64 - severities []string - ruleSetPaths []string - ruleLoadTracePath string - jarPath string - maxMemory string + projectPath string + outputDir string + sarifFileName string + sarifCodeFlowLimit int64 + sarifToolVersion string + sarifToolSemanticVersion string + sarifUriBase string + semgrepCompatibility bool + partialFingerprints bool + ifdsAnalysisTimeout int64 + severities []string + ruleSetPaths []string + ruleLoadTracePath string + jarPath string + maxMemory string + ruleIDs []string + approximationsConfig []string + dataflowApproximations []string + trackExternalMethods bool + debugFactReachabilitySarif bool } func (a *AnalyzerBuilder) SetProject(projectPath string) *AnalyzerBuilder { @@ -134,6 +139,31 @@ func (a *AnalyzerBuilder) SetMaxMemory(maxMemory string) *AnalyzerBuilder { return a } +func (a *AnalyzerBuilder) AddRuleID(ruleID string) *AnalyzerBuilder { + a.ruleIDs = append(a.ruleIDs, ruleID) + return a +} + +func (a *AnalyzerBuilder) AddApproximationsConfig(configPath string) *AnalyzerBuilder { + a.approximationsConfig = append(a.approximationsConfig, configPath) + return a +} + +func (a *AnalyzerBuilder) AddDataflowApproximations(approxPath string) *AnalyzerBuilder { + a.dataflowApproximations = append(a.dataflowApproximations, approxPath) + return a +} + +func (a *AnalyzerBuilder) SetTrackExternalMethods(track bool) *AnalyzerBuilder { + a.trackExternalMethods = track + return a +} + +func (a *AnalyzerBuilder) EnableDebugFactReachabilitySarif() *AnalyzerBuilder { + a.debugFactReachabilitySarif = true + return a +} + func (a *AnalyzerBuilder) BuildNativeCommand() []string { // For native execution, create a temporary logs directory tempLogsDir, err := os.MkdirTemp("", "opentaint-*") @@ -203,6 +233,26 @@ func (a *AnalyzerBuilder) BuildNativeCommand() []string { flags = append(flags, "--semgrep-rule-load-trace", a.ruleLoadTracePath) } + for _, ruleID := range a.ruleIDs { + flags = append(flags, "--semgrep-rule-id", ruleID) + } + + for _, configPath := range a.approximationsConfig { + flags = append(flags, "--approximations-config", configPath) + } + + for _, approxPath := range a.dataflowApproximations { + flags = append(flags, "--dataflow-approximations", approxPath) + } + + if a.trackExternalMethods { + flags = append(flags, "--track-external-methods") + } + + if a.debugFactReachabilitySarif { + flags = append(flags, "--debug-fact-reachability-sarif") + } + return append(command, flags...) } diff --git a/cli/cmd/compile.go b/cli/cmd/compile.go index 4dc97fbaa..f7de00a88 100644 --- a/cli/cmd/compile.go +++ b/cli/cmd/compile.go @@ -118,6 +118,10 @@ func init() { } func ensureAutobuilderAvailable() (string, error) { + if globals.Config.Autobuilder.JarPath != "" { + return globals.Config.Autobuilder.JarPath, nil + } + autobuilderJarPath, err := utils.GetAutobuilderJarPath(globals.Config.Autobuilder.Version) if err != nil { return "", fmt.Errorf("failed to construct path to the autobuilder: %w", err) @@ -189,11 +193,15 @@ func compileProject(absOutputProjectModelPath, absProjectRoot, autobuilderJarPat return true } // Execute the command using JavaRunner - err = javaRunner.ExecuteJavaCommand(autobuilderCommand, commandSucceeded) + cmdErr, err := javaRunner.ExecuteJavaCommand(autobuilderCommand, commandSucceeded) if err != nil { output.LogInfof("Native compilation has failed: %s", err) return fmt.Errorf("native compilation has failed: %w", err) } + if cmdErr != nil { + output.LogInfof("Native compilation has failed: %s", cmdErr) + return fmt.Errorf("native compilation has failed: %w", cmdErr) + } return nil } diff --git a/cli/cmd/compile_approximations.go b/cli/cmd/compile_approximations.go new file mode 100644 index 000000000..b09cae861 --- /dev/null +++ b/cli/cmd/compile_approximations.go @@ -0,0 +1,209 @@ +package cmd + +import ( + "archive/zip" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/seqra/opentaint/internal/globals" + "github.com/seqra/opentaint/internal/output" + "github.com/seqra/opentaint/internal/utils/java" + "github.com/seqra/opentaint/internal/utils/project" +) + +// compileApproximationsIfNeeded checks whether a --dataflow-approximations directory +// contains .java source files. If so, it compiles them using javac (with the +// analyzer JAR + project dependencies on the classpath) and returns the path to +// the compiled .class output directory. If the directory already contains only +// .class files (or no .java files at all), it is returned as-is. +// +// projectModelDir is the directory containing project.yaml — used to resolve +// project dependencies for the javac classpath (approximation code may reference +// library types like org.apache.pdfbox.pdmodel.PDDocument). +func compileApproximationsIfNeeded(approxPath string, analyzerJarPath string, projectModelDir string) (string, error) { + info, err := os.Stat(approxPath) + if err != nil { + return "", fmt.Errorf("approximation path does not exist: %w", err) + } + + // If it's a single file, return as-is (nothing to compile) + if !info.IsDir() { + return approxPath, nil + } + + // Collect .java files in the directory tree + var javaFiles []string + _ = filepath.Walk(approxPath, func(path string, fi os.FileInfo, walkErr error) error { + if walkErr != nil { + return walkErr + } + if !fi.IsDir() && strings.HasSuffix(fi.Name(), ".java") { + javaFiles = append(javaFiles, path) + } + return nil + }) + + if len(javaFiles) == 0 { + // No Java sources — directory may contain .class files or be empty; pass through. + return approxPath, nil + } + + output.LogInfof("Found %d .java file(s) in approximations directory, compiling...", len(javaFiles)) + + // Resolve javac from the managed JDK + javacRunner := java.NewJavaRunner(). + WithSkipVerify(globals.Config.SkipVerify). + WithImageType(java.AdoptiumImageJDK). + TrySystem(). + TrySpecificVersion(globals.DefaultJavaVersion) + + javaPath, err := javacRunner.EnsureJava() + if err != nil { + return "", fmt.Errorf("failed to resolve Java for approximation compilation: %w", err) + } + + javacPath := deriveJavacPath(javaPath) + if _, err := os.Stat(javacPath); err != nil { + return "", fmt.Errorf("javac not found at %s (resolved from java at %s). A JDK (not JRE) is required to compile approximation sources", javacPath, javaPath) + } + + // Extract approximation support classes from the analyzer JAR. + // The JAR bundles utility classes (OpentaintNdUtil, ArgumentTypeContext) + // under "opentaint-dataflow-approximations/" prefix. + extractedDir, err := extractApproxClassesFromJar(analyzerJarPath) + if err != nil { + return "", fmt.Errorf("failed to extract approximation classes from analyzer JAR: %w", err) + } + + // Create temp output directory for compiled .class files + outputDir, err := os.MkdirTemp("", "opentaint-approx-compiled-*") + if err != nil { + _ = os.RemoveAll(extractedDir) + return "", fmt.Errorf("failed to create temp directory for compiled approximations: %w", err) + } + + // Build classpath: + // 1. Analyzer JAR — contains @Approximate, @ApproximateByName annotations + // 2. Extracted approximation utilities — OpentaintNdUtil, ArgumentTypeContext + // 3. Project dependencies — library JARs that approximation code may reference + cpParts := []string{analyzerJarPath, extractedDir} + cpParts = append(cpParts, resolveProjectDependencies(projectModelDir)...) + classpath := strings.Join(cpParts, string(os.PathListSeparator)) + + args := []string{ + "-source", "8", + "-target", "8", + "-cp", classpath, + "-d", outputDir, + } + args = append(args, javaFiles...) + + output.LogDebugf("Running javac: %s %s", javacPath, strings.Join(args, " ")) + + cmd := exec.Command(javacPath, args...) + cmdOutput, cmdErr := cmd.CombinedOutput() + + // Always clean up extracted dependencies + _ = os.RemoveAll(extractedDir) + + if cmdErr != nil { + _ = os.RemoveAll(outputDir) + return "", fmt.Errorf( + "approximation compilation failed:\n%s\njavac exited with: %w", + string(cmdOutput), cmdErr, + ) + } + + output.LogInfof("Approximation compilation succeeded, output: %s", outputDir) + return outputDir, nil +} + +// resolveProjectDependencies reads project.yaml from the project model directory +// and returns absolute paths to the dependency JARs listed there. +func resolveProjectDependencies(projectModelDir string) []string { + if projectModelDir == "" { + return nil + } + config, err := project.LoadConfig(projectModelDir) + if err != nil { + output.LogDebugf("Could not read project config for approximation compilation: %v", err) + return nil + } + var absDeps []string + for _, dep := range config.Dependencies { + absPath := dep + if !filepath.IsAbs(dep) { + absPath = filepath.Join(projectModelDir, dep) + } + if _, err := os.Stat(absPath); err == nil { + absDeps = append(absDeps, absPath) + } + } + output.LogDebugf("Resolved %d project dependencies for approximation classpath", len(absDeps)) + return absDeps +} + +// extractApproxClassesFromJar extracts bundled approximation support classes +// from the analyzer fat JAR. These are stored under "opentaint-dataflow-approximations/" +// prefix and need standard package structure for javac to find them. +func extractApproxClassesFromJar(jarPath string) (string, error) { + r, err := zip.OpenReader(jarPath) + if err != nil { + return "", fmt.Errorf("failed to open JAR: %w", err) + } + defer r.Close() + + extractDir, err := os.MkdirTemp("", "opentaint-approx-deps-*") + if err != nil { + return "", err + } + + const prefix = "opentaint-dataflow-approximations/" + for _, f := range r.File { + if !strings.HasPrefix(f.Name, prefix) { + continue + } + if f.FileInfo().IsDir() { + continue + } + relPath := strings.TrimPrefix(f.Name, prefix) + if relPath == "" { + continue + } + destPath := filepath.Join(extractDir, relPath) + if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil { + _ = os.RemoveAll(extractDir) + return "", err + } + src, err := f.Open() + if err != nil { + _ = os.RemoveAll(extractDir) + return "", err + } + dst, err := os.Create(destPath) + if err != nil { + src.Close() + _ = os.RemoveAll(extractDir) + return "", err + } + _, err = io.Copy(dst, src) + src.Close() + dst.Close() + if err != nil { + _ = os.RemoveAll(extractDir) + return "", err + } + } + + return extractDir, nil +} + +// deriveJavacPath returns the path to javac given the path to java. +func deriveJavacPath(javaPath string) string { + dir := filepath.Dir(javaPath) + return filepath.Join(dir, "javac") +} diff --git a/cli/cmd/project.go b/cli/cmd/project.go index 216ad36d5..9b7564ad0 100644 --- a/cli/cmd/project.go +++ b/cli/cmd/project.go @@ -164,10 +164,13 @@ func (c *JavaAutobuilderConfig) runAutobuilder() error { return true } - err = javaRunner.ExecuteJavaCommand(autobuilderCommand, commandSucceeded) + cmdErr, err := javaRunner.ExecuteJavaCommand(autobuilderCommand, commandSucceeded) if err != nil { return fmt.Errorf("native autobuilder execution failed: %w", err) } + if cmdErr != nil { + return fmt.Errorf("native autobuilder execution failed: %w", cmdErr) + } config, err := validation.ValidateProjectModelOutput(c.outputDir) if err != nil { diff --git a/cli/cmd/root.go b/cli/cmd/root.go index fea11a5a8..5ab892d04 100644 --- a/cli/cmd/root.go +++ b/cli/cmd/root.go @@ -130,6 +130,14 @@ func init() { rootCmd.PersistentFlags().BoolVar(&globals.Config.SkipVerify, "skip-verify", false, "Skip SHA256 checksum verification of downloaded artifacts") _ = viper.BindPFlag("skip-verify", rootCmd.PersistentFlags().Lookup("skip-verify")) + + rootCmd.PersistentFlags().StringVar(&globals.Config.Analyzer.JarPath, "analyzer-jar", "", "Path to analyzer JAR (dev override, skips download)") + _ = rootCmd.PersistentFlags().MarkHidden("analyzer-jar") + _ = viper.BindPFlag("analyzer.jar_path", rootCmd.PersistentFlags().Lookup("analyzer-jar")) + + rootCmd.PersistentFlags().StringVar(&globals.Config.Autobuilder.JarPath, "autobuilder-jar", "", "Path to autobuilder JAR (dev override, skips download)") + _ = rootCmd.PersistentFlags().MarkHidden("autobuilder-jar") + _ = viper.BindPFlag("autobuilder.jar_path", rootCmd.PersistentFlags().Lookup("autobuilder-jar")) } // initConfig reads in config file and ENV variables if set. diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index 0be614740..819a5f55a 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -23,15 +23,20 @@ import ( ) var ( - UserProjectPath string - ProjectModelPath string - SarifReportPath string - SemgrepCompatibilitySarif bool - Severity []string - Ruleset []string - DryRunScan bool - Recompile bool - ScanLogFile string + UserProjectPath string + ProjectModelPath string + SarifReportPath string + SemgrepCompatibilitySarif bool + Severity []string + Ruleset []string + DryRunScan bool + Recompile bool + ScanLogFile string + RuleID []string + ApproximationsConfig []string + DataflowApproximations []string + TrackExternalMethods bool + DebugFactReachabilitySarif bool ) type RulesetType struct { @@ -125,6 +130,19 @@ func init() { scanCmd.Flags().BoolVar(&Recompile, "recompile", false, "Force recompilation even if a cached project model exists") scanCmd.Flags().StringVar(&ProjectModelPath, "project-model", "", "Path to a pre-compiled project model (skips compilation)") scanCmd.Flags().StringVar(&ScanLogFile, "log-file", "", "Path to the log file (default: /logs/.log)") + scanCmd.Flags().StringArrayVar(&RuleID, "rule-id", nil, "Filter active rules by ID (repeatable)") + + scanCmd.Flags().StringArrayVar(&ApproximationsConfig, "approximations-config", nil, "YAML passThrough approximations config (OVERRIDE mode, repeatable)") + _ = scanCmd.PersistentFlags().MarkHidden("approximations-config") + + scanCmd.Flags().StringArrayVar(&DataflowApproximations, "dataflow-approximations", nil, "Directory of compiled approximation class files (repeatable)") + _ = scanCmd.PersistentFlags().MarkHidden("dataflow-approximations") + + scanCmd.Flags().BoolVar(&TrackExternalMethods, "track-external-methods", false, "Write external-methods-{without,with}-rules.yaml next to the SARIF report") + _ = scanCmd.PersistentFlags().MarkHidden("track-external-methods") + + scanCmd.Flags().BoolVar(&DebugFactReachabilitySarif, "debug-fact-reachability-sarif", false, "Generate SARIF with fact reachability info (debug; use with a single rule only)") + _ = scanCmd.PersistentFlags().MarkHidden("debug-fact-reachability-sarif") } // currentScanBuilder returns a builder pre-populated with the user's current scan flags. @@ -334,6 +352,19 @@ func scan(cmd *cobra.Command) { if maxMemory != "" { nativeBuilder.SetMaxMemory(maxMemory) } + for _, ruleID := range RuleID { + nativeBuilder.AddRuleID(ruleID) + } + for _, approxConfig := range ApproximationsConfig { + absApproxConfig := log.AbsPathOrExit(approxConfig, "approximations-config") + nativeBuilder.AddApproximationsConfig(absApproxConfig) + } + if TrackExternalMethods { + nativeBuilder.SetTrackExternalMethods(true) + } + if DebugFactReachabilitySarif { + nativeBuilder.EnableDebugFactReachabilitySarif() + } analyzerJarPath, err := ensureAnalyzerAvailable() if err != nil { @@ -341,6 +372,16 @@ func scan(cmd *cobra.Command) { } nativeBuilder.SetJarPath(analyzerJarPath) + // Process --dataflow-approximations: auto-compile .java sources if needed + for _, approxPath := range DataflowApproximations { + absApproxPath := log.AbsPathOrExit(approxPath, "dataflow-approximations") + compiledPath, compileErr := compileApproximationsIfNeeded(absApproxPath, analyzerJarPath, absProjectModelPath) + if compileErr != nil { + out.Fatalf("Approximation compilation failed: %s", compileErr) + } + nativeBuilder.AddDataflowApproximations(compiledPath) + } + analyzerJavaRunner := java.NewJavaRunner(). WithSkipVerify(globals.Config.SkipVerify). WithDebugOutput(out.DebugStream("Analyzer")). @@ -350,38 +391,68 @@ func scan(cmd *cobra.Command) { out.Fatalf("Failed to resolve Java for analyzer: %s", err) } + var analyzerFail *analyzerError + var scanCmdErr *java.JavaCommandError if err := out.RunWithSpinner("Analyzing project", func() error { - return scanProject(nativeBuilder, analyzerJavaRunner) + var scanErr error + scanCmdErr, scanErr = scanProject(nativeBuilder, analyzerJavaRunner) + return scanErr }); err != nil { out.Fatalf("Native scan has failed: %s", err) } + analyzerFail = classifyAnalyzerError(scanCmdErr) + // Always attempt to print summary information — even when the analyzer + // failed, partial SARIF and rule-load-trace files may have been written. report, err := validation.ValidateSarifOutput(absSarifReportPath) if err != nil { output.LogInfof("Scan output validation failed: %v", err) - out.Fatalf("There was a problem during the scan step, check the full logs: %s", globals.LogPath) + if analyzerFail == nil { + // Analyzer reported success but produced no valid SARIF — treat as failure. + out.Error(fmt.Sprintf("There was a problem during the scan step, check the full logs: %s", globals.LogPath)) + analyzerFail = &analyzerError{exitCode: 1, message: "scan output validation failed"} + } } out.Blank() el, err := validation.ValidateRuleLoadTraceOutput(absSemgrepRuleLoadTracePath) if err != nil { - out.Fatalf("Failed to validate rule load trace output: %s", err) + output.LogInfof("Rule load trace validation failed: %v", err) + if analyzerFail == nil { + out.Error(fmt.Sprintf("Failed to validate rule load trace output: %s", err)) + analyzerFail = &analyzerError{exitCode: 1, message: "rule load trace validation failed"} + } } - ruleLoadTraceSummary := load_trace.CollectRuleLoadTraceSummary(el, nonBuiltinRulesetPaths) - res := load_trace.CollectRulesetLoadErrorsSummary(ruleLoadTraceSummary) - ruleLoadErrorsResult := &res + if el != nil { + ruleLoadTraceSummary := load_trace.CollectRuleLoadTraceSummary(el, nonBuiltinRulesetPaths) + + res := load_trace.CollectRulesetLoadErrorsSummary(ruleLoadTraceSummary) + ruleLoadErrorsResult := &res - sarifSummary := sarif.GenerateSummary(report) - load_trace.PrintRuleStatisticsTree(out, ruleLoadErrorsResult, absSemgrepRuleLoadTracePath, sarifSummary) + var sarifSummary sarif.Summary + if report != nil { + sarifSummary = sarif.GenerateSummary(report) + } + load_trace.PrintRuleStatisticsTree(out, ruleLoadErrorsResult, absSemgrepRuleLoadTracePath, sarifSummary) - load_trace.PrintSyntaxErrorReport(out, ruleLoadTraceSummary) + load_trace.PrintSyntaxErrorReport(out, ruleLoadTraceSummary) + } - // Process the generated SARIF report if it exists - printSarifSummary(report, absSarifReportPath) + if report != nil { + printSarifSummary(report, absSarifReportPath) + } - suggest("To view findings run", utils.NewSummaryCommand(absSarifReportPath).WithShowFindings().Build()) + if SarifReportPath == "" { + utils.RemoveIfExistsOrExit(absSarifReportPath) + } else { + suggest("To view findings run", fmt.Sprintf("opentaint summary --show-findings %s", absSarifReportPath)) + } + + if analyzerFail != nil { + os.Exit(analyzerFail.exitCode) + } } func resolveScanConfig(absUserProjectRoot string) scanConfig { @@ -502,6 +573,10 @@ func setupSemgrepRuleLoadTrace() string { } func ensureAnalyzerAvailable() (string, error) { + if globals.Config.Analyzer.JarPath != "" { + return globals.Config.Analyzer.JarPath, nil + } + analyzerJarPath, err := utils.GetAnalyzerJarPath(globals.Config.Analyzer.Version) if err != nil { return "", fmt.Errorf("failed to construct path to the analyzer: %w", err) @@ -516,7 +591,7 @@ func ensureAnalyzerAvailable() (string, error) { return analyzerJarPath, nil } -func scanProject(analyzerBuilder *AnalyzerBuilder, javaRunner java.JavaRunner) error { +func scanProject(analyzerBuilder *AnalyzerBuilder, javaRunner java.JavaRunner) (*java.JavaCommandError, error) { analyzerCommand := analyzerBuilder.BuildNativeCommand() commandSucceeded := func(err error) bool { @@ -526,8 +601,6 @@ func scanProject(analyzerBuilder *AnalyzerBuilder, javaRunner java.JavaRunner) e } return true } - // Execute the command using JavaRunner - err := javaRunner.ExecuteJavaCommand(analyzerCommand, commandSucceeded) - return err + return javaRunner.ExecuteJavaCommand(analyzerCommand, commandSucceeded) } diff --git a/cli/internal/globals/global.go b/cli/internal/globals/global.go index 8af14008f..8bcba3f39 100644 --- a/cli/internal/globals/global.go +++ b/cli/internal/globals/global.go @@ -58,10 +58,12 @@ type Github struct { type Analyzer struct { Version string `mapstructure:"version"` + JarPath string `mapstructure:"jar_path"` } type Autobuilder struct { Version string `mapstructure:"version"` + JarPath string `mapstructure:"jar_path"` } type Rules struct { diff --git a/cli/internal/utils/java/runner.go b/cli/internal/utils/java/runner.go index 1641e301b..a2878250f 100644 --- a/cli/internal/utils/java/runner.go +++ b/cli/internal/utils/java/runner.go @@ -24,6 +24,16 @@ const ( None ) +// JavaCommandError is returned when a Java process exits with a non-zero exit code. +// It preserves the exit code so callers can interpret process-specific status values. +type JavaCommandError struct { + ExitCode int +} + +func (e *JavaCommandError) Error() string { + return fmt.Sprintf("java command failed with exit code %d", e.ExitCode) +} + type JavaRunner interface { TrySystem() JavaRunner TrySpecificVersion(version int) JavaRunner @@ -35,7 +45,7 @@ type JavaRunner interface { // Call this before wrapping ExecuteJavaCommand in a spinner to avoid // download progress bars overlapping with spinner output. EnsureJava() (string, error) - ExecuteJavaCommand(args []string, commandSucceeded func(error) bool) error + ExecuteJavaCommand(args []string, commandSucceeded func(error) bool) (*JavaCommandError, error) } type DebugLineWriter interface { @@ -147,9 +157,9 @@ func (j *javaRunner) EnsureJava() (string, error) { return "", fmt.Errorf("all Java resolution attempts failed") } -func (j *javaRunner) ExecuteJavaCommand(args []string, commandSucceeded func(error) bool) error { +func (j *javaRunner) ExecuteJavaCommand(args []string, commandSucceeded func(error) bool) (*JavaCommandError, error) { if len(args) == 0 { - return fmt.Errorf("no Java command arguments provided") + return nil, fmt.Errorf("no Java command arguments provided") } // If EnsureJava was called, use the pre-resolved path directly @@ -157,6 +167,7 @@ func (j *javaRunner) ExecuteJavaCommand(args []string, commandSucceeded func(err return j.executeWithJava(j.resolvedJavaPath, Specific, args, commandSucceeded) } + var lastCmdErr *JavaCommandError resolutionStrategies := j.GetJavaResolutions() for i, resolutionStrategy := range resolutionStrategies { javaPath, strategy, err := resolutionStrategy() @@ -165,17 +176,26 @@ func (j *javaRunner) ExecuteJavaCommand(args []string, commandSucceeded func(err continue } - if err := j.executeWithJava(javaPath, strategy, args, commandSucceeded); err == nil { - return nil + cmdErr, execErr := j.executeWithJava(javaPath, strategy, args, commandSucceeded) + if execErr != nil { + output.LogDebugf("Java command setup failed (attempt %d): %v", i+1, execErr) + continue + } + if cmdErr == nil { + return nil, nil } + lastCmdErr = cmdErr output.LogDebugf("Java command failed (attempt %d), trying next resolution", i+1) } - return fmt.Errorf("all Java resolution attempts failed") + if lastCmdErr != nil { + return lastCmdErr, nil + } + return nil, fmt.Errorf("all Java resolution attempts failed") } -func (j *javaRunner) executeWithJava(javaPath string, strategy ResolutionStrategy, args []string, commandSucceeded func(error) bool) error { +func (j *javaRunner) executeWithJava(javaPath string, strategy ResolutionStrategy, args []string, commandSucceeded func(error) bool) (*JavaCommandError, error) { cmdArgs := append([]string{javaPath}, args...) cmd := exec.Command(cmdArgs[0], cmdArgs[1:]...) @@ -190,16 +210,16 @@ func (j *javaRunner) executeWithJava(javaPath string, strategy ResolutionStrateg // Create pipes for stdout and stderr stdoutPipe, err := cmd.StdoutPipe() if err != nil { - return fmt.Errorf("failed to create stdout pipe: %w", err) + return nil, fmt.Errorf("failed to create stdout pipe: %w", err) } stderrPipe, err := cmd.StderrPipe() if err != nil { - return fmt.Errorf("failed to create stderr pipe: %w", err) + return nil, fmt.Errorf("failed to create stderr pipe: %w", err) } if err := cmd.Start(); err != nil { - return fmt.Errorf("failed to start Java command: %w", err) + return nil, fmt.Errorf("failed to start Java command: %w", err) } streamToTerminal := globals.Config.Output.Debug @@ -230,9 +250,10 @@ func (j *javaRunner) executeWithJava(javaPath string, strategy ResolutionStrateg // Wait for the command to finish err = cmd.Wait() - // Log any errors at debug level (caller decides severity) + // Extract exit code from the process error + exitCode := 0 if err != nil { - exitCode := 1 + exitCode = 1 if exitErr, ok := err.(*exec.ExitError); ok { exitCode = exitErr.ExitCode() } @@ -240,10 +261,10 @@ func (j *javaRunner) executeWithJava(javaPath string, strategy ResolutionStrateg } if commandSucceeded(err) { - return nil + return nil, nil } - return fmt.Errorf("java command failed") + return &JavaCommandError{ExitCode: exitCode}, nil } func (j *javaRunner) TrySpecificVersion(version int) JavaRunner { diff --git a/cli/internal/utils/java/runner_test.go b/cli/internal/utils/java/runner_test.go index a04b19cde..fa0815d0a 100644 --- a/cli/internal/utils/java/runner_test.go +++ b/cli/internal/utils/java/runner_test.go @@ -332,8 +332,11 @@ func TestJavaRunner_GetJavaResolutions_BothStrategies(t *testing.T) { func TestJavaRunner_ExecuteJavaCommand_NoArgs(t *testing.T) { runner := NewJavaRunner() - err := runner.ExecuteJavaCommand([]string{}, func(error) bool { return true }) + cmdErr, err := runner.ExecuteJavaCommand([]string{}, func(error) bool { return true }) + if cmdErr != nil { + t.Error("Expected no JavaCommandError for missing arguments") + } if err == nil { t.Error("Expected error when no arguments provided") } From 5bf7a094ef39da43b08dac0274676346522553e6 Mon Sep 17 00:00:00 2001 From: Valentyn Sobol <8640896+Saloed@users.noreply.github.com> Date: Wed, 22 Apr 2026 12:46:54 +0300 Subject: [PATCH 2/6] Add experimental flag --- cli/cmd/root.go | 59 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/cli/cmd/root.go b/cli/cmd/root.go index 5ab892d04..4b15882a1 100644 --- a/cli/cmd/root.go +++ b/cli/cmd/root.go @@ -13,10 +13,16 @@ import ( "github.com/seqra/opentaint/internal/utils/log" "github.com/seqra/opentaint/internal/version" "github.com/spf13/cobra" + "github.com/spf13/pflag" "github.com/spf13/viper" ) -var toolVersion bool +const experimentalFlagName = "experimental" + +var ( + toolVersion bool + experimentalMode bool +) // out is the global output printer used by all commands for user-facing output. // It is configured in PersistentPreRunE after logging is set up. @@ -34,6 +40,8 @@ var rootCmd = &cobra.Command{ SilenceUsage: true, PersistentPreRunE: func(cmd *cobra.Command, args []string) error { + applyExperimentalFlagVisibility(cmd.Root(), experimentalMode) + if err := log.SetUpLogs(); err != nil { return fmt.Errorf("failed to set up logging: %w", err) } @@ -83,12 +91,15 @@ func Execute() { func init() { cobra.OnInitialize(initConfig) + configureExperimentalFlagVisibility() // Here you will define your flags and configuration settings. // Cobra supports persistent flags, which, if defined here, // will be global for your application. rootCmd.PersistentFlags().StringVar(&globals.ConfigFile, "config", "", "Path to a config file") + rootCmd.PersistentFlags().BoolVar(&experimentalMode, experimentalFlagName, false, "Show experimental and hidden flags") + _ = rootCmd.PersistentFlags().MarkHidden(experimentalFlagName) rootCmd.Flags().BoolVarP(&toolVersion, "version", "v", false, "Print the version information") @@ -190,6 +201,52 @@ func addConfigFields(cmd *cobra.Command, sb *output.SectionBuilder) { } } +func configureExperimentalFlagVisibility() { + defaultHelpFunc := rootCmd.HelpFunc() + defaultUsageFunc := rootCmd.UsageFunc() + + rootCmd.SetHelpFunc(func(cmd *cobra.Command, args []string) { + applyExperimentalFlagVisibility(cmd.Root(), experimentalMode) + defaultHelpFunc(cmd, args) + }) + rootCmd.SetUsageFunc(func(cmd *cobra.Command) error { + applyExperimentalFlagVisibility(cmd.Root(), experimentalMode) + return defaultUsageFunc(cmd) + }) +} + +func applyExperimentalFlagVisibility(root *cobra.Command, enabled bool) { + if !enabled || root == nil { + return + } + + visitCommandTree(root, func(cmd *cobra.Command) { + setFlagSetHidden(cmd.LocalFlags(), false) + setFlagSetHidden(cmd.PersistentFlags(), false) + }) +} + +func visitCommandTree(root *cobra.Command, visit func(*cobra.Command)) { + if root == nil { + return + } + + visit(root) + for _, child := range root.Commands() { + visitCommandTree(child, visit) + } +} + +func setFlagSetHidden(flags *pflag.FlagSet, hidden bool) { + if flags == nil { + return + } + + flags.VisitAll(func(flag *pflag.Flag) { + flag.Hidden = hidden + }) +} + // checkForUpdateAsync checks for a newer version in the background, throttled to once per day. func checkForUpdateAsync() { currentVersion := version.GetVersion() From 5499bb10776a7372576c8b68a9795fbb61745075 Mon Sep 17 00:00:00 2001 From: Valentyn Sobol <8640896+Saloed@users.noreply.github.com> Date: Wed, 22 Apr 2026 13:08:55 +0300 Subject: [PATCH 3/6] Fix defer usage in approximations compilation --- cli/cmd/compile_approximations.go | 80 ++++++++++++++++++------------- 1 file changed, 47 insertions(+), 33 deletions(-) diff --git a/cli/cmd/compile_approximations.go b/cli/cmd/compile_approximations.go index b09cae861..8b522d58a 100644 --- a/cli/cmd/compile_approximations.go +++ b/cli/cmd/compile_approximations.go @@ -155,7 +155,7 @@ func extractApproxClassesFromJar(jarPath string) (string, error) { if err != nil { return "", fmt.Errorf("failed to open JAR: %w", err) } - defer r.Close() + defer func() { _ = r.Close() }() extractDir, err := os.MkdirTemp("", "opentaint-approx-deps-*") if err != nil { @@ -164,44 +164,58 @@ func extractApproxClassesFromJar(jarPath string) (string, error) { const prefix = "opentaint-dataflow-approximations/" for _, f := range r.File { - if !strings.HasPrefix(f.Name, prefix) { - continue - } - if f.FileInfo().IsDir() { - continue - } - relPath := strings.TrimPrefix(f.Name, prefix) - if relPath == "" { - continue - } - destPath := filepath.Join(extractDir, relPath) - if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil { - _ = os.RemoveAll(extractDir) - return "", err - } - src, err := f.Open() - if err != nil { - _ = os.RemoveAll(extractDir) - return "", err - } - dst, err := os.Create(destPath) - if err != nil { - src.Close() - _ = os.RemoveAll(extractDir) - return "", err - } - _, err = io.Copy(dst, src) - src.Close() - dst.Close() - if err != nil { - _ = os.RemoveAll(extractDir) - return "", err + classExtErr := extractApproxClass(f, prefix, extractDir) + if classExtErr != nil { + return "", classExtErr } } return extractDir, nil } +func extractApproxClass(f *zip.File, prefix string, extractDir string) error { + if !strings.HasPrefix(f.Name, prefix) { + return nil + } + if f.FileInfo().IsDir() { + return nil + } + + relPath := strings.TrimPrefix(f.Name, prefix) + if relPath == "" { + return nil + } + + destPath := filepath.Join(extractDir, relPath) + if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil { + _ = os.RemoveAll(extractDir) + return err + } + + src, err := f.Open() + if err != nil { + _ = os.RemoveAll(extractDir) + return err + } + defer func() { _ = src.Close() }() + + dst, err := os.Create(destPath) + if err != nil { + _ = os.RemoveAll(extractDir) + return err + } + defer func() { _ = dst.Close() }() + + _, err = io.Copy(dst, src) + + if err != nil { + _ = os.RemoveAll(extractDir) + return err + } + + return nil +} + // deriveJavacPath returns the path to javac given the path to java. func deriveJavacPath(javaPath string) string { dir := filepath.Dir(javaPath) From 42051e8f7f683e6b475ed51309d8674bbf2cfdbc Mon Sep 17 00:00:00 2001 From: Valentyn Sobol <8640896+Saloed@users.noreply.github.com> Date: Wed, 22 Apr 2026 13:37:33 +0300 Subject: [PATCH 4/6] Avoid sarif removal --- cli/cmd/scan.go | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index 819a5f55a..21a2f0981 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -402,8 +402,6 @@ func scan(cmd *cobra.Command) { } analyzerFail = classifyAnalyzerError(scanCmdErr) - // Always attempt to print summary information — even when the analyzer - // failed, partial SARIF and rule-load-trace files may have been written. report, err := validation.ValidateSarifOutput(absSarifReportPath) if err != nil { output.LogInfof("Scan output validation failed: %v", err) @@ -442,12 +440,7 @@ func scan(cmd *cobra.Command) { if report != nil { printSarifSummary(report, absSarifReportPath) - } - - if SarifReportPath == "" { - utils.RemoveIfExistsOrExit(absSarifReportPath) - } else { - suggest("To view findings run", fmt.Sprintf("opentaint summary --show-findings %s", absSarifReportPath)) + suggest("To view findings run", utils.NewSummaryCommand(absSarifReportPath).WithShowFindings().Build()) } if analyzerFail != nil { From 8b91a34c49305f12f2b1a8e98d2b3be7e5f6a2d3 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Wed, 22 Apr 2026 14:30:09 +0300 Subject: [PATCH 5/6] refactor: hide experimental scan flags and extract analyzer domain Fix silent MarkHidden on the scan command: flags registered with scanCmd.Flags() were being hidden via scanCmd.PersistentFlags(), whose Lookup could not find them, so the MarkHidden error was swallowed and --code-flow-limit, --approximations-config, --dataflow-approximations, --track-external-methods, and --debug-fact-reachability-sarif all leaked into the default help output. Separate analyzer domain logic from the cmd layer: - internal/analyzer/exit.go owns exit-code constants, ExitMessage, Error type, and Classify (no I/O); scan.go handles presentation - java.DeriveJavacPath moves to internal/utils/java/detection.go and is now Windows-aware - utils.ExtractZipPrefix + copyZipEntry consolidate zip-prefix extraction with deferred cleanup, removing four repeated os.RemoveAll calls - compile_approximations.go is split into collectJavaSources, resolveJavacPath, buildApproxClasspath, runJavac --- cli/cmd/analyzer_exit.go | 68 ---------- cli/cmd/compile_approximations.go | 187 ++++++++++----------------- cli/cmd/scan.go | 23 ++-- cli/internal/analyzer/exit.go | 69 ++++++++++ cli/internal/utils/extract.go | 52 ++++++++ cli/internal/utils/java/detection.go | 13 ++ 6 files changed, 214 insertions(+), 198 deletions(-) delete mode 100644 cli/cmd/analyzer_exit.go create mode 100644 cli/internal/analyzer/exit.go diff --git a/cli/cmd/analyzer_exit.go b/cli/cmd/analyzer_exit.go deleted file mode 100644 index 8dd5a3cf4..000000000 --- a/cli/cmd/analyzer_exit.go +++ /dev/null @@ -1,68 +0,0 @@ -package cmd - -import ( - "fmt" - - "github.com/seqra/opentaint/internal/utils/java" -) - -// Analyzer exit codes as seen by the OS (unsigned byte values). -// These correspond to the Kotlin exitProcess() calls in AbstractAnalyzerRunner: -// -// exitProcess(-1) → 255 (project configuration error) -// exitProcess(-2) → 254 (analysis timeout) -// exitProcess(-3) → 253 (out of memory) -// exitProcess(-4) → 252 (unhandled exception) -const ( - analyzerExitConfigError = 255 - analyzerExitTimeout = 254 - analyzerExitOOM = 253 - analyzerExitException = 252 -) - -// analyzerError holds information about an analyzer failure. -// exitCode is the process exit code to forward to os.Exit. -type analyzerError struct { - exitCode int - message string -} - -// analyzerExitMessage returns a human-readable description for a known -// analyzer exit code, or empty string if the code is not recognized. -func analyzerExitMessage(code int) string { - switch code { - case analyzerExitConfigError: - return "project configuration error" - case analyzerExitTimeout: - return "analysis timed out — try increasing --timeout or --max-memory" - case analyzerExitOOM: - return "out of memory — try increasing --max-memory (e.g. --max-memory 16G)" - case analyzerExitException: - return "unhandled analyzer exception" - default: - return "" - } -} - -// classifyAnalyzerError converts a *JavaCommandError into an *analyzerError -// with a human-readable message. Returns nil when cmdErr is nil. -// -// The error message is printed immediately. The caller is responsible for -// eventually calling os.Exit with the returned exit code after performing -// any post-failure work (e.g. printing summaries). -func classifyAnalyzerError(cmdErr *java.JavaCommandError) *analyzerError { - if cmdErr == nil { - return nil - } - - code := cmdErr.ExitCode - if msg := analyzerExitMessage(code); msg != "" { - formatted := fmt.Sprintf("Analysis failed (exit code %d): %s", code, msg) - out.Error(formatted) - return &analyzerError{exitCode: code, message: formatted} - } - - formatted := fmt.Sprintf("Analysis failed with exit code %d", code) - out.Error(formatted) - return &analyzerError{exitCode: code, message: formatted} -} diff --git a/cli/cmd/compile_approximations.go b/cli/cmd/compile_approximations.go index 8b522d58a..ebd0ec3b2 100644 --- a/cli/cmd/compile_approximations.go +++ b/cli/cmd/compile_approximations.go @@ -1,9 +1,7 @@ package cmd import ( - "archive/zip" "fmt" - "io" "os" "os/exec" "path/filepath" @@ -11,10 +9,15 @@ import ( "github.com/seqra/opentaint/internal/globals" "github.com/seqra/opentaint/internal/output" + "github.com/seqra/opentaint/internal/utils" "github.com/seqra/opentaint/internal/utils/java" "github.com/seqra/opentaint/internal/utils/project" ) +// approxClassesJarPrefix is the path prefix under which the analyzer fat JAR +// bundles approximation support sources (OpentaintNdUtil, ArgumentTypeContext). +const approxClassesJarPrefix = "opentaint-dataflow-approximations/" + // compileApproximationsIfNeeded checks whether a --dataflow-approximations directory // contains .java source files. If so, it compiles them using javac (with the // analyzer JAR + project dependencies on the classpath) and returns the path to @@ -29,15 +32,53 @@ func compileApproximationsIfNeeded(approxPath string, analyzerJarPath string, pr if err != nil { return "", fmt.Errorf("approximation path does not exist: %w", err) } - - // If it's a single file, return as-is (nothing to compile) if !info.IsDir() { return approxPath, nil } - // Collect .java files in the directory tree + javaFiles, err := collectJavaSources(approxPath) + if err != nil { + return "", err + } + if len(javaFiles) == 0 { + return approxPath, nil + } + + output.LogInfof("Found %d .java file(s) in approximations directory, compiling...", len(javaFiles)) + + javacPath, err := resolveJavacPath() + if err != nil { + return "", err + } + + extractedDir, err := os.MkdirTemp("", "opentaint-approx-deps-*") + if err != nil { + return "", fmt.Errorf("failed to create temp directory for approximation deps: %w", err) + } + defer func() { _ = os.RemoveAll(extractedDir) }() + + if err := utils.ExtractZipPrefix(analyzerJarPath, approxClassesJarPrefix, extractedDir); err != nil { + return "", fmt.Errorf("failed to extract approximation classes from analyzer JAR: %w", err) + } + + outputDir, err := os.MkdirTemp("", "opentaint-approx-compiled-*") + if err != nil { + return "", fmt.Errorf("failed to create temp directory for compiled approximations: %w", err) + } + + classpath := buildApproxClasspath(analyzerJarPath, extractedDir, projectModelDir) + if err := runJavac(javacPath, classpath, outputDir, javaFiles); err != nil { + _ = os.RemoveAll(outputDir) + return "", err + } + + output.LogInfof("Approximation compilation succeeded, output: %s", outputDir) + return outputDir, nil +} + +func collectJavaSources(root string) ([]string, error) { var javaFiles []string - _ = filepath.Walk(approxPath, func(path string, fi os.FileInfo, walkErr error) error { + err := filepath.Walk(root, func(path string, fi os.FileInfo, walkErr error) error { if walkErr != nil { return walkErr } @@ -46,15 +87,13 @@ func compileApproximationsIfNeeded(approxPath string, analyzerJarPath string, pr } return nil }) - - if len(javaFiles) == 0 { - // No Java sources — directory may contain .class files or be empty; pass through. - return approxPath, nil + if err != nil { + return nil, fmt.Errorf("failed to walk approximations directory: %w", err) } + return javaFiles, nil +} - output.LogInfof("Found %d .java file(s) in approximations directory, compiling...", len(javaFiles)) - - // Resolve javac from the managed JDK +func resolveJavacPath() (string, error) { javacRunner := java.NewJavaRunner(). WithSkipVerify(globals.Config.SkipVerify). WithImageType(java.AdoptiumImageJDK). @@ -66,34 +105,24 @@ func compileApproximationsIfNeeded(approxPath string, analyzerJarPath string, pr return "", fmt.Errorf("failed to resolve Java for approximation compilation: %w", err) } - javacPath := deriveJavacPath(javaPath) + javacPath := java.DeriveJavacPath(javaPath) if _, err := os.Stat(javacPath); err != nil { return "", fmt.Errorf("javac not found at %s (resolved from java at %s). A JDK (not JRE) is required to compile approximation sources", javacPath, javaPath) } + return javacPath, nil +} - // Extract approximation support classes from the analyzer JAR. - // The JAR bundles utility classes (OpentaintNdUtil, ArgumentTypeContext) - // under "opentaint-dataflow-approximations/" prefix. - extractedDir, err := extractApproxClassesFromJar(analyzerJarPath) - if err != nil { - return "", fmt.Errorf("failed to extract approximation classes from analyzer JAR: %w", err) - } - - // Create temp output directory for compiled .class files - outputDir, err := os.MkdirTemp("", "opentaint-approx-compiled-*") - if err != nil { - _ = os.RemoveAll(extractedDir) - return "", fmt.Errorf("failed to create temp directory for compiled approximations: %w", err) - } - - // Build classpath: - // 1. Analyzer JAR — contains @Approximate, @ApproximateByName annotations - // 2. Extracted approximation utilities — OpentaintNdUtil, ArgumentTypeContext - // 3. Project dependencies — library JARs that approximation code may reference - cpParts := []string{analyzerJarPath, extractedDir} - cpParts = append(cpParts, resolveProjectDependencies(projectModelDir)...) - classpath := strings.Join(cpParts, string(os.PathListSeparator)) +// buildApproxClasspath assembles the javac classpath for approximation compilation: +// 1. Analyzer JAR — contains @Approximate, @ApproximateByName annotations +// 2. Extracted approximation utilities — OpentaintNdUtil, ArgumentTypeContext +// 3. Project dependencies — library JARs that approximation code may reference +func buildApproxClasspath(analyzerJarPath, extractedDir, projectModelDir string) string { + parts := []string{analyzerJarPath, extractedDir} + parts = append(parts, resolveProjectDependencies(projectModelDir)...) + return strings.Join(parts, string(os.PathListSeparator)) +} +func runJavac(javacPath, classpath, outputDir string, javaFiles []string) error { args := []string{ "-source", "8", "-target", "8", @@ -106,20 +135,13 @@ func compileApproximationsIfNeeded(approxPath string, analyzerJarPath string, pr cmd := exec.Command(javacPath, args...) cmdOutput, cmdErr := cmd.CombinedOutput() - - // Always clean up extracted dependencies - _ = os.RemoveAll(extractedDir) - if cmdErr != nil { - _ = os.RemoveAll(outputDir) - return "", fmt.Errorf( + return fmt.Errorf( "approximation compilation failed:\n%s\njavac exited with: %w", string(cmdOutput), cmdErr, ) } - - output.LogInfof("Approximation compilation succeeded, output: %s", outputDir) - return outputDir, nil + return nil } // resolveProjectDependencies reads project.yaml from the project model directory @@ -146,78 +168,3 @@ func resolveProjectDependencies(projectModelDir string) []string { output.LogDebugf("Resolved %d project dependencies for approximation classpath", len(absDeps)) return absDeps } - -// extractApproxClassesFromJar extracts bundled approximation support classes -// from the analyzer fat JAR. These are stored under "opentaint-dataflow-approximations/" -// prefix and need standard package structure for javac to find them. -func extractApproxClassesFromJar(jarPath string) (string, error) { - r, err := zip.OpenReader(jarPath) - if err != nil { - return "", fmt.Errorf("failed to open JAR: %w", err) - } - defer func() { _ = r.Close() }() - - extractDir, err := os.MkdirTemp("", "opentaint-approx-deps-*") - if err != nil { - return "", err - } - - const prefix = "opentaint-dataflow-approximations/" - for _, f := range r.File { - classExtErr := extractApproxClass(f, prefix, extractDir) - if classExtErr != nil { - return "", classExtErr - } - } - - return extractDir, nil -} - -func extractApproxClass(f *zip.File, prefix string, extractDir string) error { - if !strings.HasPrefix(f.Name, prefix) { - return nil - } - if f.FileInfo().IsDir() { - return nil - } - - relPath := strings.TrimPrefix(f.Name, prefix) - if relPath == "" { - return nil - } - - destPath := filepath.Join(extractDir, relPath) - if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil { - _ = os.RemoveAll(extractDir) - return err - } - - src, err := f.Open() - if err != nil { - _ = os.RemoveAll(extractDir) - return err - } - defer func() { _ = src.Close() }() - - dst, err := os.Create(destPath) - if err != nil { - _ = os.RemoveAll(extractDir) - return err - } - defer func() { _ = dst.Close() }() - - _, err = io.Copy(dst, src) - - if err != nil { - _ = os.RemoveAll(extractDir) - return err - } - - return nil -} - -// deriveJavacPath returns the path to javac given the path to java. -func deriveJavacPath(javaPath string) string { - dir := filepath.Dir(javaPath) - return filepath.Join(dir, "javac") -} diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index 21a2f0981..9d909e52c 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -6,6 +6,7 @@ import ( "path/filepath" "time" + "github.com/seqra/opentaint/internal/analyzer" "github.com/seqra/opentaint/internal/load_trace" "github.com/seqra/opentaint/internal/sarif" "github.com/seqra/opentaint/internal/validation" @@ -124,7 +125,7 @@ func init() { scanCmd.Flags().StringVar(&globals.Config.Scan.MaxMemory, "max-memory", "8G", "Maximum memory for the analyzer (e.g., 1024m, 8G, 81920k, 83886080)") _ = viper.BindPFlag("scan.max_memory", scanCmd.Flags().Lookup("max-memory")) scanCmd.Flags().Int64Var(&globals.Config.Scan.CodeFlowLimit, "code-flow-limit", 0, "Maximum number of code flows to include in the report (0 = unlimited)") - _ = scanCmd.PersistentFlags().MarkHidden("code-flow-limit") + _ = scanCmd.Flags().MarkHidden("code-flow-limit") _ = viper.BindPFlag("scan.code_flow_limit", scanCmd.Flags().Lookup("code-flow-limit")) scanCmd.Flags().BoolVar(&DryRunScan, "dry-run", false, "Validate inputs and show what would run without compiling or scanning") scanCmd.Flags().BoolVar(&Recompile, "recompile", false, "Force recompilation even if a cached project model exists") @@ -133,16 +134,16 @@ func init() { scanCmd.Flags().StringArrayVar(&RuleID, "rule-id", nil, "Filter active rules by ID (repeatable)") scanCmd.Flags().StringArrayVar(&ApproximationsConfig, "approximations-config", nil, "YAML passThrough approximations config (OVERRIDE mode, repeatable)") - _ = scanCmd.PersistentFlags().MarkHidden("approximations-config") + _ = scanCmd.Flags().MarkHidden("approximations-config") scanCmd.Flags().StringArrayVar(&DataflowApproximations, "dataflow-approximations", nil, "Directory of compiled approximation class files (repeatable)") - _ = scanCmd.PersistentFlags().MarkHidden("dataflow-approximations") + _ = scanCmd.Flags().MarkHidden("dataflow-approximations") scanCmd.Flags().BoolVar(&TrackExternalMethods, "track-external-methods", false, "Write external-methods-{without,with}-rules.yaml next to the SARIF report") - _ = scanCmd.PersistentFlags().MarkHidden("track-external-methods") + _ = scanCmd.Flags().MarkHidden("track-external-methods") scanCmd.Flags().BoolVar(&DebugFactReachabilitySarif, "debug-fact-reachability-sarif", false, "Generate SARIF with fact reachability info (debug; use with a single rule only)") - _ = scanCmd.PersistentFlags().MarkHidden("debug-fact-reachability-sarif") + _ = scanCmd.Flags().MarkHidden("debug-fact-reachability-sarif") } // currentScanBuilder returns a builder pre-populated with the user's current scan flags. @@ -391,7 +392,7 @@ func scan(cmd *cobra.Command) { out.Fatalf("Failed to resolve Java for analyzer: %s", err) } - var analyzerFail *analyzerError + var analyzerFail *analyzer.Error var scanCmdErr *java.JavaCommandError if err := out.RunWithSpinner("Analyzing project", func() error { var scanErr error @@ -400,7 +401,9 @@ func scan(cmd *cobra.Command) { }); err != nil { out.Fatalf("Native scan has failed: %s", err) } - analyzerFail = classifyAnalyzerError(scanCmdErr) + if analyzerFail = analyzer.Classify(scanCmdErr); analyzerFail != nil { + out.Error(analyzerFail.Message) + } report, err := validation.ValidateSarifOutput(absSarifReportPath) if err != nil { @@ -408,7 +411,7 @@ func scan(cmd *cobra.Command) { if analyzerFail == nil { // Analyzer reported success but produced no valid SARIF — treat as failure. out.Error(fmt.Sprintf("There was a problem during the scan step, check the full logs: %s", globals.LogPath)) - analyzerFail = &analyzerError{exitCode: 1, message: "scan output validation failed"} + analyzerFail = &analyzer.Error{ExitCode: 1, Message: "scan output validation failed"} } } @@ -419,7 +422,7 @@ func scan(cmd *cobra.Command) { output.LogInfof("Rule load trace validation failed: %v", err) if analyzerFail == nil { out.Error(fmt.Sprintf("Failed to validate rule load trace output: %s", err)) - analyzerFail = &analyzerError{exitCode: 1, message: "rule load trace validation failed"} + analyzerFail = &analyzer.Error{ExitCode: 1, Message: "rule load trace validation failed"} } } @@ -444,7 +447,7 @@ func scan(cmd *cobra.Command) { } if analyzerFail != nil { - os.Exit(analyzerFail.exitCode) + os.Exit(analyzerFail.ExitCode) } } diff --git a/cli/internal/analyzer/exit.go b/cli/internal/analyzer/exit.go new file mode 100644 index 000000000..6d38e7599 --- /dev/null +++ b/cli/internal/analyzer/exit.go @@ -0,0 +1,69 @@ +// Package analyzer holds OpenTaint analyzer domain logic that is independent +// of the CLI presentation layer. It currently covers exit-code classification +// for analyzer process failures. +package analyzer + +import ( + "fmt" + + "github.com/seqra/opentaint/internal/utils/java" +) + +// Analyzer exit codes as seen by the OS (unsigned byte values). +// These correspond to the Kotlin exitProcess() calls in AbstractAnalyzerRunner: +// +// exitProcess(-1) → 255 (project configuration error) +// exitProcess(-2) → 254 (analysis timeout) +// exitProcess(-3) → 253 (out of memory) +// exitProcess(-4) → 252 (unhandled exception) +const ( + ExitConfigError = 255 + ExitTimeout = 254 + ExitOOM = 253 + ExitException = 252 +) + +// Error holds information about an analyzer failure. ExitCode is the process +// exit code to forward to os.Exit; Message is a human-readable description. +type Error struct { + ExitCode int + Message string +} + +// ExitMessage returns a human-readable description for a known analyzer exit +// code, or empty string if the code is not recognized. +func ExitMessage(code int) string { + switch code { + case ExitConfigError: + return "project configuration error" + case ExitTimeout: + return "analysis timed out — try increasing --timeout or --max-memory" + case ExitOOM: + return "out of memory — try increasing --max-memory (e.g. --max-memory 16G)" + case ExitException: + return "unhandled analyzer exception" + default: + return "" + } +} + +// Classify converts a *java.JavaCommandError into an *Error with a formatted +// message. Returns nil when cmdErr is nil. No I/O is performed — the caller +// is responsible for presentation and for calling os.Exit(Error.ExitCode). +func Classify(cmdErr *java.JavaCommandError) *Error { + if cmdErr == nil { + return nil + } + + code := cmdErr.ExitCode + if msg := ExitMessage(code); msg != "" { + return &Error{ + ExitCode: code, + Message: fmt.Sprintf("Analysis failed (exit code %d): %s", code, msg), + } + } + return &Error{ + ExitCode: code, + Message: fmt.Sprintf("Analysis failed with exit code %d", code), + } +} diff --git a/cli/internal/utils/extract.go b/cli/internal/utils/extract.go index c2262a133..90421152f 100644 --- a/cli/internal/utils/extract.go +++ b/cli/internal/utils/extract.go @@ -56,6 +56,58 @@ func ExtractTar(tr *tar.Reader, basePath, destPath string, isSourceDir bool) err return nil } +// ExtractZipPrefix extracts entries whose names begin with prefix from the zip +// at src into destDir, stripping prefix from each entry's relative path. +// Directory entries are skipped — parent directories are created on demand. +// Returns an error if the zip cannot be opened or any entry fails to write. +func ExtractZipPrefix(src, prefix, destDir string) error { + r, err := zip.OpenReader(src) + if err != nil { + return fmt.Errorf("failed to open zip: %w", err) + } + defer func() { _ = r.Close() }() + + for _, f := range r.File { + if !strings.HasPrefix(f.Name, prefix) || f.FileInfo().IsDir() { + continue + } + relPath := strings.TrimPrefix(f.Name, prefix) + if relPath == "" { + continue + } + if err := copyZipEntry(f, filepath.Join(destDir, relPath)); err != nil { + return err + } + } + return nil +} + +func copyZipEntry(f *zip.File, destPath string) (err error) { + if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil { + return err + } + src, err := f.Open() + if err != nil { + return err + } + defer func() { + if cerr := src.Close(); cerr != nil && err == nil { + err = cerr + } + }() + dst, err := os.Create(destPath) + if err != nil { + return err + } + defer func() { + if cerr := dst.Close(); cerr != nil && err == nil { + err = cerr + } + }() + _, err = io.Copy(dst, src) + return err +} + // ExtractZip extracts the contents of a ZIP file to the specified destination directory. func ExtractZip(src, dest string) error { zr, err := zip.OpenReader(src) diff --git a/cli/internal/utils/java/detection.go b/cli/internal/utils/java/detection.go index b2cde7b87..736bfb1ca 100644 --- a/cli/internal/utils/java/detection.go +++ b/cli/internal/utils/java/detection.go @@ -3,7 +3,9 @@ package java import ( "os" "os/exec" + "path/filepath" "regexp" + "runtime" "strconv" "strings" @@ -130,6 +132,17 @@ func validateJavaInstallation(javaPath string) *JavaInstallation { return installation } +// DeriveJavacPath returns the javac binary path that sits next to the given +// java binary. On Windows the ".exe" suffix is preserved. +func DeriveJavacPath(javaPath string) string { + dir := filepath.Dir(javaPath) + name := "javac" + if runtime.GOOS == "windows" { + name = "javac.exe" + } + return filepath.Join(dir, name) +} + func extractVendor(versionOutput string) string { output := strings.ToLower(versionOutput) From 2b66e1fa2d63003dbd16cfaf21dc8fa3e5cfa8c9 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Wed, 22 Apr 2026 15:05:14 +0300 Subject: [PATCH 6/6] fix: Make code flow limit option visible --- cli/cmd/scan.go | 1 - 1 file changed, 1 deletion(-) diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index 9d909e52c..0560753d5 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -125,7 +125,6 @@ func init() { scanCmd.Flags().StringVar(&globals.Config.Scan.MaxMemory, "max-memory", "8G", "Maximum memory for the analyzer (e.g., 1024m, 8G, 81920k, 83886080)") _ = viper.BindPFlag("scan.max_memory", scanCmd.Flags().Lookup("max-memory")) scanCmd.Flags().Int64Var(&globals.Config.Scan.CodeFlowLimit, "code-flow-limit", 0, "Maximum number of code flows to include in the report (0 = unlimited)") - _ = scanCmd.Flags().MarkHidden("code-flow-limit") _ = viper.BindPFlag("scan.code_flow_limit", scanCmd.Flags().Lookup("code-flow-limit")) scanCmd.Flags().BoolVar(&DryRunScan, "dry-run", false, "Validate inputs and show what would run without compiling or scanning") scanCmd.Flags().BoolVar(&Recompile, "recompile", false, "Force recompilation even if a cached project model exists")