diff --git a/cli/cmd/command_builder.go b/cli/cmd/command_builder.go index 772957de..aa353a07 100644 --- a/cli/cmd/command_builder.go +++ b/cli/cmd/command_builder.go @@ -42,21 +42,26 @@ func NewAutobuilderBuilder() *AutobuilderBuilder { type AnalyzerBuilder struct { *BaseCommandBuilder - projectPath string - outputDir string - sarifFileName string - sarifCodeFlowLimit int64 - sarifToolVersion string - sarifToolSemanticVersion string - sarifUriBase string - semgrepCompatibility bool - partialFingerprints bool - ifdsAnalysisTimeout int64 - severities []string - ruleSetPaths []string - ruleLoadTracePath string - jarPath string - maxMemory string + projectPath string + outputDir string + sarifFileName string + sarifCodeFlowLimit int64 + sarifToolVersion string + sarifToolSemanticVersion string + sarifUriBase string + semgrepCompatibility bool + partialFingerprints bool + ifdsAnalysisTimeout int64 + severities []string + ruleSetPaths []string + ruleLoadTracePath string + jarPath string + maxMemory string + ruleIDs []string + approximationsConfig []string + dataflowApproximations []string + trackExternalMethods bool + debugFactReachabilitySarif bool } func (a *AnalyzerBuilder) SetProject(projectPath string) *AnalyzerBuilder { @@ -134,6 +139,31 @@ func (a *AnalyzerBuilder) SetMaxMemory(maxMemory string) *AnalyzerBuilder { return a } +func (a *AnalyzerBuilder) AddRuleID(ruleID string) *AnalyzerBuilder { + a.ruleIDs = append(a.ruleIDs, ruleID) + return a +} + +func (a *AnalyzerBuilder) AddApproximationsConfig(configPath string) *AnalyzerBuilder { + a.approximationsConfig = append(a.approximationsConfig, configPath) + return a +} + +func (a *AnalyzerBuilder) AddDataflowApproximations(approxPath string) *AnalyzerBuilder { + a.dataflowApproximations = append(a.dataflowApproximations, approxPath) + return a +} + +func (a *AnalyzerBuilder) SetTrackExternalMethods(track bool) *AnalyzerBuilder { + a.trackExternalMethods = track + return a +} + +func (a *AnalyzerBuilder) EnableDebugFactReachabilitySarif() *AnalyzerBuilder { + a.debugFactReachabilitySarif = true + return a +} + func (a *AnalyzerBuilder) BuildNativeCommand() []string { // For native execution, create a temporary logs directory tempLogsDir, err := os.MkdirTemp("", "opentaint-*") @@ -203,6 +233,26 @@ func (a *AnalyzerBuilder) BuildNativeCommand() []string { flags = append(flags, "--semgrep-rule-load-trace", a.ruleLoadTracePath) } + for _, ruleID := range a.ruleIDs { + flags = append(flags, "--semgrep-rule-id", ruleID) + } + + for _, configPath := range a.approximationsConfig { + flags = append(flags, "--approximations-config", configPath) + } + + for _, approxPath := range a.dataflowApproximations { + flags = append(flags, "--dataflow-approximations", approxPath) + } + + if a.trackExternalMethods { + flags = append(flags, "--track-external-methods") + } + + if a.debugFactReachabilitySarif { + flags = append(flags, "--debug-fact-reachability-sarif") + } + return append(command, flags...) } diff --git a/cli/cmd/compile.go b/cli/cmd/compile.go index 4dc97fba..f7de00a8 100644 --- a/cli/cmd/compile.go +++ b/cli/cmd/compile.go @@ -118,6 +118,10 @@ func init() { } func ensureAutobuilderAvailable() (string, error) { + if globals.Config.Autobuilder.JarPath != "" { + return globals.Config.Autobuilder.JarPath, nil + } + autobuilderJarPath, err := utils.GetAutobuilderJarPath(globals.Config.Autobuilder.Version) if err != nil { return "", fmt.Errorf("failed to construct path to the autobuilder: %w", err) @@ -189,11 +193,15 @@ func compileProject(absOutputProjectModelPath, absProjectRoot, autobuilderJarPat return true } // Execute the command using JavaRunner - err = javaRunner.ExecuteJavaCommand(autobuilderCommand, commandSucceeded) + cmdErr, err := javaRunner.ExecuteJavaCommand(autobuilderCommand, commandSucceeded) if err != nil { output.LogInfof("Native compilation has failed: %s", err) return fmt.Errorf("native compilation has failed: %w", err) } + if cmdErr != nil { + output.LogInfof("Native compilation has failed: %s", cmdErr) + return fmt.Errorf("native compilation has failed: %w", cmdErr) + } return nil } diff --git a/cli/cmd/compile_approximations.go b/cli/cmd/compile_approximations.go new file mode 100644 index 00000000..ebd0ec3b --- /dev/null +++ b/cli/cmd/compile_approximations.go @@ -0,0 +1,170 @@ +package cmd + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/seqra/opentaint/internal/globals" + "github.com/seqra/opentaint/internal/output" + "github.com/seqra/opentaint/internal/utils" + "github.com/seqra/opentaint/internal/utils/java" + "github.com/seqra/opentaint/internal/utils/project" +) + +// approxClassesJarPrefix is the path prefix under which the analyzer fat JAR +// bundles approximation support sources (OpentaintNdUtil, ArgumentTypeContext). +const approxClassesJarPrefix = "opentaint-dataflow-approximations/" + +// compileApproximationsIfNeeded checks whether a --dataflow-approximations directory +// contains .java source files. If so, it compiles them using javac (with the +// analyzer JAR + project dependencies on the classpath) and returns the path to +// the compiled .class output directory. If the directory already contains only +// .class files (or no .java files at all), it is returned as-is. +// +// projectModelDir is the directory containing project.yaml — used to resolve +// project dependencies for the javac classpath (approximation code may reference +// library types like org.apache.pdfbox.pdmodel.PDDocument). +func compileApproximationsIfNeeded(approxPath string, analyzerJarPath string, projectModelDir string) (string, error) { + info, err := os.Stat(approxPath) + if err != nil { + return "", fmt.Errorf("approximation path does not exist: %w", err) + } + if !info.IsDir() { + return approxPath, nil + } + + javaFiles, err := collectJavaSources(approxPath) + if err != nil { + return "", err + } + if len(javaFiles) == 0 { + return approxPath, nil + } + + output.LogInfof("Found %d .java file(s) in approximations directory, compiling...", len(javaFiles)) + + javacPath, err := resolveJavacPath() + if err != nil { + return "", err + } + + extractedDir, err := os.MkdirTemp("", "opentaint-approx-deps-*") + if err != nil { + return "", fmt.Errorf("failed to create temp directory for approximation deps: %w", err) + } + defer func() { _ = os.RemoveAll(extractedDir) }() + + if err := utils.ExtractZipPrefix(analyzerJarPath, approxClassesJarPrefix, extractedDir); err != nil { + return "", fmt.Errorf("failed to extract approximation classes from analyzer JAR: %w", err) + } + + outputDir, err := os.MkdirTemp("", "opentaint-approx-compiled-*") + if err != nil { + return "", fmt.Errorf("failed to create temp directory for compiled approximations: %w", err) + } + + classpath := buildApproxClasspath(analyzerJarPath, extractedDir, projectModelDir) + if err := runJavac(javacPath, classpath, outputDir, javaFiles); err != nil { + _ = os.RemoveAll(outputDir) + return "", err + } + + output.LogInfof("Approximation compilation succeeded, output: %s", outputDir) + return outputDir, nil +} + +func collectJavaSources(root string) ([]string, error) { + var javaFiles []string + err := filepath.Walk(root, func(path string, fi os.FileInfo, walkErr error) error { + if walkErr != nil { + return walkErr + } + if !fi.IsDir() && strings.HasSuffix(fi.Name(), ".java") { + javaFiles = append(javaFiles, path) + } + return nil + }) + if err != nil { + return nil, fmt.Errorf("failed to walk approximations directory: %w", err) + } + return javaFiles, nil +} + +func resolveJavacPath() (string, error) { + javacRunner := java.NewJavaRunner(). + WithSkipVerify(globals.Config.SkipVerify). + WithImageType(java.AdoptiumImageJDK). + TrySystem(). + TrySpecificVersion(globals.DefaultJavaVersion) + + javaPath, err := javacRunner.EnsureJava() + if err != nil { + return "", fmt.Errorf("failed to resolve Java for approximation compilation: %w", err) + } + + javacPath := java.DeriveJavacPath(javaPath) + if _, err := os.Stat(javacPath); err != nil { + return "", fmt.Errorf("javac not found at %s (resolved from java at %s). A JDK (not JRE) is required to compile approximation sources", javacPath, javaPath) + } + return javacPath, nil +} + +// buildApproxClasspath assembles the javac classpath for approximation compilation: +// 1. Analyzer JAR — contains @Approximate, @ApproximateByName annotations +// 2. Extracted approximation utilities — OpentaintNdUtil, ArgumentTypeContext +// 3. Project dependencies — library JARs that approximation code may reference +func buildApproxClasspath(analyzerJarPath, extractedDir, projectModelDir string) string { + parts := []string{analyzerJarPath, extractedDir} + parts = append(parts, resolveProjectDependencies(projectModelDir)...) + return strings.Join(parts, string(os.PathListSeparator)) +} + +func runJavac(javacPath, classpath, outputDir string, javaFiles []string) error { + args := []string{ + "-source", "8", + "-target", "8", + "-cp", classpath, + "-d", outputDir, + } + args = append(args, javaFiles...) + + output.LogDebugf("Running javac: %s %s", javacPath, strings.Join(args, " ")) + + cmd := exec.Command(javacPath, args...) + cmdOutput, cmdErr := cmd.CombinedOutput() + if cmdErr != nil { + return fmt.Errorf( + "approximation compilation failed:\n%s\njavac exited with: %w", + string(cmdOutput), cmdErr, + ) + } + return nil +} + +// resolveProjectDependencies reads project.yaml from the project model directory +// and returns absolute paths to the dependency JARs listed there. +func resolveProjectDependencies(projectModelDir string) []string { + if projectModelDir == "" { + return nil + } + config, err := project.LoadConfig(projectModelDir) + if err != nil { + output.LogDebugf("Could not read project config for approximation compilation: %v", err) + return nil + } + var absDeps []string + for _, dep := range config.Dependencies { + absPath := dep + if !filepath.IsAbs(dep) { + absPath = filepath.Join(projectModelDir, dep) + } + if _, err := os.Stat(absPath); err == nil { + absDeps = append(absDeps, absPath) + } + } + output.LogDebugf("Resolved %d project dependencies for approximation classpath", len(absDeps)) + return absDeps +} diff --git a/cli/cmd/project.go b/cli/cmd/project.go index 216ad36d..9b7564ad 100644 --- a/cli/cmd/project.go +++ b/cli/cmd/project.go @@ -164,10 +164,13 @@ func (c *JavaAutobuilderConfig) runAutobuilder() error { return true } - err = javaRunner.ExecuteJavaCommand(autobuilderCommand, commandSucceeded) + cmdErr, err := javaRunner.ExecuteJavaCommand(autobuilderCommand, commandSucceeded) if err != nil { return fmt.Errorf("native autobuilder execution failed: %w", err) } + if cmdErr != nil { + return fmt.Errorf("native autobuilder execution failed: %w", cmdErr) + } config, err := validation.ValidateProjectModelOutput(c.outputDir) if err != nil { diff --git a/cli/cmd/root.go b/cli/cmd/root.go index fea11a5a..4b15882a 100644 --- a/cli/cmd/root.go +++ b/cli/cmd/root.go @@ -13,10 +13,16 @@ import ( "github.com/seqra/opentaint/internal/utils/log" "github.com/seqra/opentaint/internal/version" "github.com/spf13/cobra" + "github.com/spf13/pflag" "github.com/spf13/viper" ) -var toolVersion bool +const experimentalFlagName = "experimental" + +var ( + toolVersion bool + experimentalMode bool +) // out is the global output printer used by all commands for user-facing output. // It is configured in PersistentPreRunE after logging is set up. @@ -34,6 +40,8 @@ var rootCmd = &cobra.Command{ SilenceUsage: true, PersistentPreRunE: func(cmd *cobra.Command, args []string) error { + applyExperimentalFlagVisibility(cmd.Root(), experimentalMode) + if err := log.SetUpLogs(); err != nil { return fmt.Errorf("failed to set up logging: %w", err) } @@ -83,12 +91,15 @@ func Execute() { func init() { cobra.OnInitialize(initConfig) + configureExperimentalFlagVisibility() // Here you will define your flags and configuration settings. // Cobra supports persistent flags, which, if defined here, // will be global for your application. rootCmd.PersistentFlags().StringVar(&globals.ConfigFile, "config", "", "Path to a config file") + rootCmd.PersistentFlags().BoolVar(&experimentalMode, experimentalFlagName, false, "Show experimental and hidden flags") + _ = rootCmd.PersistentFlags().MarkHidden(experimentalFlagName) rootCmd.Flags().BoolVarP(&toolVersion, "version", "v", false, "Print the version information") @@ -130,6 +141,14 @@ func init() { rootCmd.PersistentFlags().BoolVar(&globals.Config.SkipVerify, "skip-verify", false, "Skip SHA256 checksum verification of downloaded artifacts") _ = viper.BindPFlag("skip-verify", rootCmd.PersistentFlags().Lookup("skip-verify")) + + rootCmd.PersistentFlags().StringVar(&globals.Config.Analyzer.JarPath, "analyzer-jar", "", "Path to analyzer JAR (dev override, skips download)") + _ = rootCmd.PersistentFlags().MarkHidden("analyzer-jar") + _ = viper.BindPFlag("analyzer.jar_path", rootCmd.PersistentFlags().Lookup("analyzer-jar")) + + rootCmd.PersistentFlags().StringVar(&globals.Config.Autobuilder.JarPath, "autobuilder-jar", "", "Path to autobuilder JAR (dev override, skips download)") + _ = rootCmd.PersistentFlags().MarkHidden("autobuilder-jar") + _ = viper.BindPFlag("autobuilder.jar_path", rootCmd.PersistentFlags().Lookup("autobuilder-jar")) } // initConfig reads in config file and ENV variables if set. @@ -182,6 +201,52 @@ func addConfigFields(cmd *cobra.Command, sb *output.SectionBuilder) { } } +func configureExperimentalFlagVisibility() { + defaultHelpFunc := rootCmd.HelpFunc() + defaultUsageFunc := rootCmd.UsageFunc() + + rootCmd.SetHelpFunc(func(cmd *cobra.Command, args []string) { + applyExperimentalFlagVisibility(cmd.Root(), experimentalMode) + defaultHelpFunc(cmd, args) + }) + rootCmd.SetUsageFunc(func(cmd *cobra.Command) error { + applyExperimentalFlagVisibility(cmd.Root(), experimentalMode) + return defaultUsageFunc(cmd) + }) +} + +func applyExperimentalFlagVisibility(root *cobra.Command, enabled bool) { + if !enabled || root == nil { + return + } + + visitCommandTree(root, func(cmd *cobra.Command) { + setFlagSetHidden(cmd.LocalFlags(), false) + setFlagSetHidden(cmd.PersistentFlags(), false) + }) +} + +func visitCommandTree(root *cobra.Command, visit func(*cobra.Command)) { + if root == nil { + return + } + + visit(root) + for _, child := range root.Commands() { + visitCommandTree(child, visit) + } +} + +func setFlagSetHidden(flags *pflag.FlagSet, hidden bool) { + if flags == nil { + return + } + + flags.VisitAll(func(flag *pflag.Flag) { + flag.Hidden = hidden + }) +} + // checkForUpdateAsync checks for a newer version in the background, throttled to once per day. func checkForUpdateAsync() { currentVersion := version.GetVersion() diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index 0be61474..0560753d 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -6,6 +6,7 @@ import ( "path/filepath" "time" + "github.com/seqra/opentaint/internal/analyzer" "github.com/seqra/opentaint/internal/load_trace" "github.com/seqra/opentaint/internal/sarif" "github.com/seqra/opentaint/internal/validation" @@ -23,15 +24,20 @@ import ( ) var ( - UserProjectPath string - ProjectModelPath string - SarifReportPath string - SemgrepCompatibilitySarif bool - Severity []string - Ruleset []string - DryRunScan bool - Recompile bool - ScanLogFile string + UserProjectPath string + ProjectModelPath string + SarifReportPath string + SemgrepCompatibilitySarif bool + Severity []string + Ruleset []string + DryRunScan bool + Recompile bool + ScanLogFile string + RuleID []string + ApproximationsConfig []string + DataflowApproximations []string + TrackExternalMethods bool + DebugFactReachabilitySarif bool ) type RulesetType struct { @@ -119,12 +125,24 @@ func init() { scanCmd.Flags().StringVar(&globals.Config.Scan.MaxMemory, "max-memory", "8G", "Maximum memory for the analyzer (e.g., 1024m, 8G, 81920k, 83886080)") _ = viper.BindPFlag("scan.max_memory", scanCmd.Flags().Lookup("max-memory")) scanCmd.Flags().Int64Var(&globals.Config.Scan.CodeFlowLimit, "code-flow-limit", 0, "Maximum number of code flows to include in the report (0 = unlimited)") - _ = scanCmd.PersistentFlags().MarkHidden("code-flow-limit") _ = viper.BindPFlag("scan.code_flow_limit", scanCmd.Flags().Lookup("code-flow-limit")) scanCmd.Flags().BoolVar(&DryRunScan, "dry-run", false, "Validate inputs and show what would run without compiling or scanning") scanCmd.Flags().BoolVar(&Recompile, "recompile", false, "Force recompilation even if a cached project model exists") scanCmd.Flags().StringVar(&ProjectModelPath, "project-model", "", "Path to a pre-compiled project model (skips compilation)") scanCmd.Flags().StringVar(&ScanLogFile, "log-file", "", "Path to the log file (default: /logs/.log)") + scanCmd.Flags().StringArrayVar(&RuleID, "rule-id", nil, "Filter active rules by ID (repeatable)") + + scanCmd.Flags().StringArrayVar(&ApproximationsConfig, "approximations-config", nil, "YAML passThrough approximations config (OVERRIDE mode, repeatable)") + _ = scanCmd.Flags().MarkHidden("approximations-config") + + scanCmd.Flags().StringArrayVar(&DataflowApproximations, "dataflow-approximations", nil, "Directory of compiled approximation class files (repeatable)") + _ = scanCmd.Flags().MarkHidden("dataflow-approximations") + + scanCmd.Flags().BoolVar(&TrackExternalMethods, "track-external-methods", false, "Write external-methods-{without,with}-rules.yaml next to the SARIF report") + _ = scanCmd.Flags().MarkHidden("track-external-methods") + + scanCmd.Flags().BoolVar(&DebugFactReachabilitySarif, "debug-fact-reachability-sarif", false, "Generate SARIF with fact reachability info (debug; use with a single rule only)") + _ = scanCmd.Flags().MarkHidden("debug-fact-reachability-sarif") } // currentScanBuilder returns a builder pre-populated with the user's current scan flags. @@ -334,6 +352,19 @@ func scan(cmd *cobra.Command) { if maxMemory != "" { nativeBuilder.SetMaxMemory(maxMemory) } + for _, ruleID := range RuleID { + nativeBuilder.AddRuleID(ruleID) + } + for _, approxConfig := range ApproximationsConfig { + absApproxConfig := log.AbsPathOrExit(approxConfig, "approximations-config") + nativeBuilder.AddApproximationsConfig(absApproxConfig) + } + if TrackExternalMethods { + nativeBuilder.SetTrackExternalMethods(true) + } + if DebugFactReachabilitySarif { + nativeBuilder.EnableDebugFactReachabilitySarif() + } analyzerJarPath, err := ensureAnalyzerAvailable() if err != nil { @@ -341,6 +372,16 @@ func scan(cmd *cobra.Command) { } nativeBuilder.SetJarPath(analyzerJarPath) + // Process --dataflow-approximations: auto-compile .java sources if needed + for _, approxPath := range DataflowApproximations { + absApproxPath := log.AbsPathOrExit(approxPath, "dataflow-approximations") + compiledPath, compileErr := compileApproximationsIfNeeded(absApproxPath, analyzerJarPath, absProjectModelPath) + if compileErr != nil { + out.Fatalf("Approximation compilation failed: %s", compileErr) + } + nativeBuilder.AddDataflowApproximations(compiledPath) + } + analyzerJavaRunner := java.NewJavaRunner(). WithSkipVerify(globals.Config.SkipVerify). WithDebugOutput(out.DebugStream("Analyzer")). @@ -350,38 +391,63 @@ func scan(cmd *cobra.Command) { out.Fatalf("Failed to resolve Java for analyzer: %s", err) } + var analyzerFail *analyzer.Error + var scanCmdErr *java.JavaCommandError if err := out.RunWithSpinner("Analyzing project", func() error { - return scanProject(nativeBuilder, analyzerJavaRunner) + var scanErr error + scanCmdErr, scanErr = scanProject(nativeBuilder, analyzerJavaRunner) + return scanErr }); err != nil { out.Fatalf("Native scan has failed: %s", err) } + if analyzerFail = analyzer.Classify(scanCmdErr); analyzerFail != nil { + out.Error(analyzerFail.Message) + } report, err := validation.ValidateSarifOutput(absSarifReportPath) if err != nil { output.LogInfof("Scan output validation failed: %v", err) - out.Fatalf("There was a problem during the scan step, check the full logs: %s", globals.LogPath) + if analyzerFail == nil { + // Analyzer reported success but produced no valid SARIF — treat as failure. + out.Error(fmt.Sprintf("There was a problem during the scan step, check the full logs: %s", globals.LogPath)) + analyzerFail = &analyzer.Error{ExitCode: 1, Message: "scan output validation failed"} + } } out.Blank() el, err := validation.ValidateRuleLoadTraceOutput(absSemgrepRuleLoadTracePath) if err != nil { - out.Fatalf("Failed to validate rule load trace output: %s", err) + output.LogInfof("Rule load trace validation failed: %v", err) + if analyzerFail == nil { + out.Error(fmt.Sprintf("Failed to validate rule load trace output: %s", err)) + analyzerFail = &analyzer.Error{ExitCode: 1, Message: "rule load trace validation failed"} + } } - ruleLoadTraceSummary := load_trace.CollectRuleLoadTraceSummary(el, nonBuiltinRulesetPaths) - res := load_trace.CollectRulesetLoadErrorsSummary(ruleLoadTraceSummary) - ruleLoadErrorsResult := &res + if el != nil { + ruleLoadTraceSummary := load_trace.CollectRuleLoadTraceSummary(el, nonBuiltinRulesetPaths) - sarifSummary := sarif.GenerateSummary(report) - load_trace.PrintRuleStatisticsTree(out, ruleLoadErrorsResult, absSemgrepRuleLoadTracePath, sarifSummary) + res := load_trace.CollectRulesetLoadErrorsSummary(ruleLoadTraceSummary) + ruleLoadErrorsResult := &res + + var sarifSummary sarif.Summary + if report != nil { + sarifSummary = sarif.GenerateSummary(report) + } + load_trace.PrintRuleStatisticsTree(out, ruleLoadErrorsResult, absSemgrepRuleLoadTracePath, sarifSummary) - load_trace.PrintSyntaxErrorReport(out, ruleLoadTraceSummary) + load_trace.PrintSyntaxErrorReport(out, ruleLoadTraceSummary) + } - // Process the generated SARIF report if it exists - printSarifSummary(report, absSarifReportPath) + if report != nil { + printSarifSummary(report, absSarifReportPath) + suggest("To view findings run", utils.NewSummaryCommand(absSarifReportPath).WithShowFindings().Build()) + } - suggest("To view findings run", utils.NewSummaryCommand(absSarifReportPath).WithShowFindings().Build()) + if analyzerFail != nil { + os.Exit(analyzerFail.ExitCode) + } } func resolveScanConfig(absUserProjectRoot string) scanConfig { @@ -502,6 +568,10 @@ func setupSemgrepRuleLoadTrace() string { } func ensureAnalyzerAvailable() (string, error) { + if globals.Config.Analyzer.JarPath != "" { + return globals.Config.Analyzer.JarPath, nil + } + analyzerJarPath, err := utils.GetAnalyzerJarPath(globals.Config.Analyzer.Version) if err != nil { return "", fmt.Errorf("failed to construct path to the analyzer: %w", err) @@ -516,7 +586,7 @@ func ensureAnalyzerAvailable() (string, error) { return analyzerJarPath, nil } -func scanProject(analyzerBuilder *AnalyzerBuilder, javaRunner java.JavaRunner) error { +func scanProject(analyzerBuilder *AnalyzerBuilder, javaRunner java.JavaRunner) (*java.JavaCommandError, error) { analyzerCommand := analyzerBuilder.BuildNativeCommand() commandSucceeded := func(err error) bool { @@ -526,8 +596,6 @@ func scanProject(analyzerBuilder *AnalyzerBuilder, javaRunner java.JavaRunner) e } return true } - // Execute the command using JavaRunner - err := javaRunner.ExecuteJavaCommand(analyzerCommand, commandSucceeded) - return err + return javaRunner.ExecuteJavaCommand(analyzerCommand, commandSucceeded) } diff --git a/cli/internal/analyzer/exit.go b/cli/internal/analyzer/exit.go new file mode 100644 index 00000000..6d38e759 --- /dev/null +++ b/cli/internal/analyzer/exit.go @@ -0,0 +1,69 @@ +// Package analyzer holds OpenTaint analyzer domain logic that is independent +// of the CLI presentation layer. It currently covers exit-code classification +// for analyzer process failures. +package analyzer + +import ( + "fmt" + + "github.com/seqra/opentaint/internal/utils/java" +) + +// Analyzer exit codes as seen by the OS (unsigned byte values). +// These correspond to the Kotlin exitProcess() calls in AbstractAnalyzerRunner: +// +// exitProcess(-1) → 255 (project configuration error) +// exitProcess(-2) → 254 (analysis timeout) +// exitProcess(-3) → 253 (out of memory) +// exitProcess(-4) → 252 (unhandled exception) +const ( + ExitConfigError = 255 + ExitTimeout = 254 + ExitOOM = 253 + ExitException = 252 +) + +// Error holds information about an analyzer failure. ExitCode is the process +// exit code to forward to os.Exit; Message is a human-readable description. +type Error struct { + ExitCode int + Message string +} + +// ExitMessage returns a human-readable description for a known analyzer exit +// code, or empty string if the code is not recognized. +func ExitMessage(code int) string { + switch code { + case ExitConfigError: + return "project configuration error" + case ExitTimeout: + return "analysis timed out — try increasing --timeout or --max-memory" + case ExitOOM: + return "out of memory — try increasing --max-memory (e.g. --max-memory 16G)" + case ExitException: + return "unhandled analyzer exception" + default: + return "" + } +} + +// Classify converts a *java.JavaCommandError into an *Error with a formatted +// message. Returns nil when cmdErr is nil. No I/O is performed — the caller +// is responsible for presentation and for calling os.Exit(Error.ExitCode). +func Classify(cmdErr *java.JavaCommandError) *Error { + if cmdErr == nil { + return nil + } + + code := cmdErr.ExitCode + if msg := ExitMessage(code); msg != "" { + return &Error{ + ExitCode: code, + Message: fmt.Sprintf("Analysis failed (exit code %d): %s", code, msg), + } + } + return &Error{ + ExitCode: code, + Message: fmt.Sprintf("Analysis failed with exit code %d", code), + } +} diff --git a/cli/internal/globals/global.go b/cli/internal/globals/global.go index 8af14008..8bcba3f3 100644 --- a/cli/internal/globals/global.go +++ b/cli/internal/globals/global.go @@ -58,10 +58,12 @@ type Github struct { type Analyzer struct { Version string `mapstructure:"version"` + JarPath string `mapstructure:"jar_path"` } type Autobuilder struct { Version string `mapstructure:"version"` + JarPath string `mapstructure:"jar_path"` } type Rules struct { diff --git a/cli/internal/utils/extract.go b/cli/internal/utils/extract.go index c2262a13..90421152 100644 --- a/cli/internal/utils/extract.go +++ b/cli/internal/utils/extract.go @@ -56,6 +56,58 @@ func ExtractTar(tr *tar.Reader, basePath, destPath string, isSourceDir bool) err return nil } +// ExtractZipPrefix extracts entries whose names begin with prefix from the zip +// at src into destDir, stripping prefix from each entry's relative path. +// Directory entries are skipped — parent directories are created on demand. +// Returns an error if the zip cannot be opened or any entry fails to write. +func ExtractZipPrefix(src, prefix, destDir string) error { + r, err := zip.OpenReader(src) + if err != nil { + return fmt.Errorf("failed to open zip: %w", err) + } + defer func() { _ = r.Close() }() + + for _, f := range r.File { + if !strings.HasPrefix(f.Name, prefix) || f.FileInfo().IsDir() { + continue + } + relPath := strings.TrimPrefix(f.Name, prefix) + if relPath == "" { + continue + } + if err := copyZipEntry(f, filepath.Join(destDir, relPath)); err != nil { + return err + } + } + return nil +} + +func copyZipEntry(f *zip.File, destPath string) (err error) { + if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil { + return err + } + src, err := f.Open() + if err != nil { + return err + } + defer func() { + if cerr := src.Close(); cerr != nil && err == nil { + err = cerr + } + }() + dst, err := os.Create(destPath) + if err != nil { + return err + } + defer func() { + if cerr := dst.Close(); cerr != nil && err == nil { + err = cerr + } + }() + _, err = io.Copy(dst, src) + return err +} + // ExtractZip extracts the contents of a ZIP file to the specified destination directory. func ExtractZip(src, dest string) error { zr, err := zip.OpenReader(src) diff --git a/cli/internal/utils/java/detection.go b/cli/internal/utils/java/detection.go index b2cde7b8..736bfb1c 100644 --- a/cli/internal/utils/java/detection.go +++ b/cli/internal/utils/java/detection.go @@ -3,7 +3,9 @@ package java import ( "os" "os/exec" + "path/filepath" "regexp" + "runtime" "strconv" "strings" @@ -130,6 +132,17 @@ func validateJavaInstallation(javaPath string) *JavaInstallation { return installation } +// DeriveJavacPath returns the javac binary path that sits next to the given +// java binary. On Windows the ".exe" suffix is preserved. +func DeriveJavacPath(javaPath string) string { + dir := filepath.Dir(javaPath) + name := "javac" + if runtime.GOOS == "windows" { + name = "javac.exe" + } + return filepath.Join(dir, name) +} + func extractVendor(versionOutput string) string { output := strings.ToLower(versionOutput) diff --git a/cli/internal/utils/java/runner.go b/cli/internal/utils/java/runner.go index 1641e301..a2878250 100644 --- a/cli/internal/utils/java/runner.go +++ b/cli/internal/utils/java/runner.go @@ -24,6 +24,16 @@ const ( None ) +// JavaCommandError is returned when a Java process exits with a non-zero exit code. +// It preserves the exit code so callers can interpret process-specific status values. +type JavaCommandError struct { + ExitCode int +} + +func (e *JavaCommandError) Error() string { + return fmt.Sprintf("java command failed with exit code %d", e.ExitCode) +} + type JavaRunner interface { TrySystem() JavaRunner TrySpecificVersion(version int) JavaRunner @@ -35,7 +45,7 @@ type JavaRunner interface { // Call this before wrapping ExecuteJavaCommand in a spinner to avoid // download progress bars overlapping with spinner output. EnsureJava() (string, error) - ExecuteJavaCommand(args []string, commandSucceeded func(error) bool) error + ExecuteJavaCommand(args []string, commandSucceeded func(error) bool) (*JavaCommandError, error) } type DebugLineWriter interface { @@ -147,9 +157,9 @@ func (j *javaRunner) EnsureJava() (string, error) { return "", fmt.Errorf("all Java resolution attempts failed") } -func (j *javaRunner) ExecuteJavaCommand(args []string, commandSucceeded func(error) bool) error { +func (j *javaRunner) ExecuteJavaCommand(args []string, commandSucceeded func(error) bool) (*JavaCommandError, error) { if len(args) == 0 { - return fmt.Errorf("no Java command arguments provided") + return nil, fmt.Errorf("no Java command arguments provided") } // If EnsureJava was called, use the pre-resolved path directly @@ -157,6 +167,7 @@ func (j *javaRunner) ExecuteJavaCommand(args []string, commandSucceeded func(err return j.executeWithJava(j.resolvedJavaPath, Specific, args, commandSucceeded) } + var lastCmdErr *JavaCommandError resolutionStrategies := j.GetJavaResolutions() for i, resolutionStrategy := range resolutionStrategies { javaPath, strategy, err := resolutionStrategy() @@ -165,17 +176,26 @@ func (j *javaRunner) ExecuteJavaCommand(args []string, commandSucceeded func(err continue } - if err := j.executeWithJava(javaPath, strategy, args, commandSucceeded); err == nil { - return nil + cmdErr, execErr := j.executeWithJava(javaPath, strategy, args, commandSucceeded) + if execErr != nil { + output.LogDebugf("Java command setup failed (attempt %d): %v", i+1, execErr) + continue + } + if cmdErr == nil { + return nil, nil } + lastCmdErr = cmdErr output.LogDebugf("Java command failed (attempt %d), trying next resolution", i+1) } - return fmt.Errorf("all Java resolution attempts failed") + if lastCmdErr != nil { + return lastCmdErr, nil + } + return nil, fmt.Errorf("all Java resolution attempts failed") } -func (j *javaRunner) executeWithJava(javaPath string, strategy ResolutionStrategy, args []string, commandSucceeded func(error) bool) error { +func (j *javaRunner) executeWithJava(javaPath string, strategy ResolutionStrategy, args []string, commandSucceeded func(error) bool) (*JavaCommandError, error) { cmdArgs := append([]string{javaPath}, args...) cmd := exec.Command(cmdArgs[0], cmdArgs[1:]...) @@ -190,16 +210,16 @@ func (j *javaRunner) executeWithJava(javaPath string, strategy ResolutionStrateg // Create pipes for stdout and stderr stdoutPipe, err := cmd.StdoutPipe() if err != nil { - return fmt.Errorf("failed to create stdout pipe: %w", err) + return nil, fmt.Errorf("failed to create stdout pipe: %w", err) } stderrPipe, err := cmd.StderrPipe() if err != nil { - return fmt.Errorf("failed to create stderr pipe: %w", err) + return nil, fmt.Errorf("failed to create stderr pipe: %w", err) } if err := cmd.Start(); err != nil { - return fmt.Errorf("failed to start Java command: %w", err) + return nil, fmt.Errorf("failed to start Java command: %w", err) } streamToTerminal := globals.Config.Output.Debug @@ -230,9 +250,10 @@ func (j *javaRunner) executeWithJava(javaPath string, strategy ResolutionStrateg // Wait for the command to finish err = cmd.Wait() - // Log any errors at debug level (caller decides severity) + // Extract exit code from the process error + exitCode := 0 if err != nil { - exitCode := 1 + exitCode = 1 if exitErr, ok := err.(*exec.ExitError); ok { exitCode = exitErr.ExitCode() } @@ -240,10 +261,10 @@ func (j *javaRunner) executeWithJava(javaPath string, strategy ResolutionStrateg } if commandSucceeded(err) { - return nil + return nil, nil } - return fmt.Errorf("java command failed") + return &JavaCommandError{ExitCode: exitCode}, nil } func (j *javaRunner) TrySpecificVersion(version int) JavaRunner { diff --git a/cli/internal/utils/java/runner_test.go b/cli/internal/utils/java/runner_test.go index a04b19cd..fa0815d0 100644 --- a/cli/internal/utils/java/runner_test.go +++ b/cli/internal/utils/java/runner_test.go @@ -332,8 +332,11 @@ func TestJavaRunner_GetJavaResolutions_BothStrategies(t *testing.T) { func TestJavaRunner_ExecuteJavaCommand_NoArgs(t *testing.T) { runner := NewJavaRunner() - err := runner.ExecuteJavaCommand([]string{}, func(error) bool { return true }) + cmdErr, err := runner.ExecuteJavaCommand([]string{}, func(error) bool { return true }) + if cmdErr != nil { + t.Error("Expected no JavaCommandError for missing arguments") + } if err == nil { t.Error("Expected error when no arguments provided") }