From 674b595efcf9dc340ce398d4fa111851ae7868a9 Mon Sep 17 00:00:00 2001
From: shubhangi-raj-meesho <shubhangi.raj@meesho.com>
Date: Wed, 18 Feb 2026 14:26:42 +0530
Subject: [PATCH 1/5] Added automated model checks to the model upload
 workflow, including validation for logger and print statements in Python
 backend models, and ensuring warmup configuration is present in config.pbtxt.

---
 horizon/internal/externalcall/gcs_client.go   |  34 +++
 horizon/internal/predator/handler/model.go    |  10 +-
 .../predator/handler/predator_constants.go    |  13 +-
 .../predator/handler/predator_upload.go       | 279 +++++++++++++++++-
 4 files changed, 331 insertions(+), 5 deletions(-)

diff --git a/horizon/internal/externalcall/gcs_client.go b/horizon/internal/externalcall/gcs_client.go
index 08a38ef5..2eed96a3 100644
--- a/horizon/internal/externalcall/gcs_client.go
+++ b/horizon/internal/externalcall/gcs_client.go
@@ -33,6 +33,7 @@ type GCSClientInterface interface {
 	GetFolderInfo(bucket, folderPrefix string) (*GCSFolderInfo, error)
 	ListFoldersWithTimestamp(bucket, prefix string) ([]GCSFolderInfo, error)
 	FindFileWithSuffix(bucket, folderPath, suffix string) (bool, string, error)
+	ListFilesWithSuffix(bucket, folderPath, suffix string) ([]string, error)
 }
 
 const (
@@ -771,6 +772,39 @@ func (g *GCSClient) FindFileWithSuffix(bucket, folderPath, suffix string) (bool,
 	return true, foundFile, nil
 }
 
+// ListFilesWithSuffix returns all object paths (full GCS object keys) ending with the given
+// suffix under folderPath. Unlike FindFileWithSuffix which returns only the first match,
+// this method collects every matching file path. Directory markers are skipped.
+func (g *GCSClient) ListFilesWithSuffix(bucket, folderPath, suffix string) ([]string, error) {
+	if g.client == nil {
+		return nil, fmt.Errorf("GCS client not initialized properly")
+	}
+
+	if !strings.HasSuffix(folderPath, "/") {
+		folderPath += "/"
+	}
+
+	log.Info().Msgf("Listing files with suffix '%s' in GCS bucket %s with prefix %s", suffix, bucket, folderPath)
+
+	var files []string
+	err := g.forEachObject(bucket, folderPath, func(attrs *storage.ObjectAttrs) error {
+		// Skip directory markers
+		if strings.HasSuffix(attrs.Name, "/") {
+			return nil
+		}
+		if strings.HasSuffix(attrs.Name, suffix) {
+			files = append(files, attrs.Name)
+		}
+		return nil
+	})
+	if err != nil {
+		return nil, fmt.Errorf("failed to list files with suffix '%s': %w", suffix, err)
+	}
+
+	log.Info().Msgf("Found %d files with suffix '%s' under %s/%s", len(files), suffix, bucket, folderPath)
+	return files, nil
+}
+
 // forEachObject iterates over all objects with the given prefix and calls the visitor for each.
 func (g *GCSClient) forEachObject(bucket, prefix string, visitor ObjectVisitor) error {
 	if g.client == nil {
diff --git a/horizon/internal/predator/handler/model.go b/horizon/internal/predator/handler/model.go
index c05cb329..a3ef6a1f 100644
--- a/horizon/internal/predator/handler/model.go
+++ b/horizon/internal/predator/handler/model.go
@@ -44,7 +44,7 @@ type IOField struct {
 }
 
 type ConfigMapping struct {
-	ServiceDeployableID uint `json:"service_deployable_id"`
+	ServiceDeployableID uint   `json:"service_deployable_id"`
 	SourceModelName     string `json:"source_model_name,omitempty"`
 }
 
@@ -270,3 +270,11 @@ type TritonInputTensor struct {
 type TritonOutputTensor struct {
 	Name string `json:"name"`
 }
+
+// fileViolationInfo groups all detected violations for a single Python file.
+// Used by validateNoLoggerOrPrintStatements and buildBalancedViolationSummary
+// to produce balanced, capped error messages.
+type fileViolationInfo struct {
+	fileName string
+	details  []string
+}
diff --git a/horizon/internal/predator/handler/predator_constants.go b/horizon/internal/predator/handler/predator_constants.go
index 4d304dbb..b8830838 100644
--- a/horizon/internal/predator/handler/predator_constants.go
+++ b/horizon/internal/predator/handler/predator_constants.go
@@ -52,7 +52,7 @@ const (
 	successDeleteRequestMsg           = "Model deletion request raised successfully"
 	fieldModelSourcePath              = "model_source_path"
 	fieldMetaData                     = "meta_data"
-	fieldDiscoveryConfigID           = "discovery_config_id"
+	fieldDiscoveryConfigID            = "discovery_config_id"
 	fieldConfigMapping                = "config_mapping"
 	errReadConfigFileFormat           = "failed to read config.pbtxt: %v"
 	errUnmarshalProtoFormat           = "failed to unmarshal proto text: %v"
@@ -63,7 +63,7 @@ const (
 	errMaxBatchSizeMissing            = "max_batch_size is missing or zero in config"
 	errBackendMissing                 = "backend is missing in config"
 	errNoInputDefinitions             = "no input definitions found in config"
-	errNoOutputDefinitions           = "no output definitions found in config"
+	errNoOutputDefinitions            = "no output definitions found in config"
 	errInstanceGroupMissing           = "instance group is missing in config"
 	errInvalidRequestIDFormat         = "invalid group ID format"
 	errFailedToFetchRequest           = "failed to fetch request for group id %s"
@@ -112,4 +112,13 @@ const (
 	predatorInferMethod                        = "inference.GRPCInferenceService/ModelInfer"
 	deployableTagDelimiter                     = "_"
 	scaleupTag                                 = "scaleup"
+	errWarmupConfigMissing                     = "model_warmup configuration is missing in config.pbtxt for non-ensemble model; warmup is required to pre-load models and avoid cold-start latency"
+	errLoggerOrPrintStatementsFound            = "model validation rejected: Python model files contain logger/print statements outside of initialize() and finalize() functions; these must be removed or commented out before deployment to prevent excessive logging in production"
+	pythonBackend                              = "python"
+	ensembleBackend                            = "ensemble"
+	ensemblePlatform                           = "ensemble"
+	pyFileSuffix                               = ".py"
+	allowedFuncInitialize                      = "initialize"
+	allowedFuncFinalize                        = "finalize"
+	maxDisplayedViolationsForPythonModel       = 5
 )
diff --git a/horizon/internal/predator/handler/predator_upload.go b/horizon/internal/predator/handler/predator_upload.go
index 9015f352..a8af26b2 100644
--- a/horizon/internal/predator/handler/predator_upload.go
+++ b/horizon/internal/predator/handler/predator_upload.go
@@ -144,7 +144,11 @@ func (p *Predator) validateUploadPrerequisites(bucket, destPath string, isPartia
 	return nil
 }
 
-// validateSourceModel validates the source model structure and configuration
+// validateSourceModel validates the source model structure and configuration.
+// For full uploads it also validates:
+//   - Model configuration (config.pbtxt is valid and parseable) and warmup configuration is present for non-ensemble models
+//   - Complete model structure (version 1/ folder exists with files)
+//   - Python backend models do not contain logger/print statements outside initialize()/finalize()
 func (p *Predator) validateSourceModel(gcsPath string, isPartial bool) error {
 	srcBucket, srcPath := extractGCSPath(gcsPath)
 	if srcBucket == "" || srcPath == "" {
@@ -159,6 +163,14 @@ func (p *Predator) validateSourceModel(gcsPath string, isPartial bool) error {
 		if err := p.validateCompleteModelStructure(srcBucket, srcPath); err != nil {
 			return fmt.Errorf("complete model structure validation failed: %w", err)
 		}
+
+		// Validate that Python backend models don't contain logger/print statements
+		// outside of initialize() and finalize() functions. This check only applies to
+		// full uploads because partial uploads only sync config.pbtxt — the Python
+		// model files in the source aren't modified or uploaded during partial syncs.
+		if err := p.validateNoLoggerOrPrintStatements(gcsPath); err != nil {
+			return fmt.Errorf("logger/print statement validation failed: %w", err)
+		}
 	}
 
 	return nil
@@ -422,7 +434,9 @@ func (p *Predator) syncPartialFiles(gcsPath, destBucket, destPath, modelName str
 	return nil
 }
 
-// validateModelConfiguration validates the model configuration
+// validateModelConfiguration validates the model configuration, including:
+// 1. Parsing config.pbtxt as a valid protobuf ModelConfig
+// 2. Checking that non-ensemble models have a warmup configuration defined
 func (p *Predator) validateModelConfiguration(gcsPath string) error {
 	log.Info().Msgf("Validating model configuration for GCS path: %s", gcsPath)
 
@@ -443,6 +457,19 @@ func (p *Predator) validateModelConfiguration(gcsPath string) error {
 	}
 
 	log.Info().Msgf("Parsed model config - Name: %s, Backend: %s", modelConfig.Name, modelConfig.Backend)
+
+	// Validate warmup configuration for non-ensemble models.
+	// Ensemble models are excluded because they orchestrate sub-models and don't
+	// hold weights that need warming up.
+	if !isEnsembleModel(&modelConfig) {
+		if len(modelConfig.GetModelWarmup()) == 0 {
+			return fmt.Errorf(errWarmupConfigMissing)
+		}
+		log.Info().Msg("Warmup configuration validation passed for non-ensemble model")
+	} else {
+		log.Info().Msg("Skipping warmup validation for ensemble model")
+	}
+
 	return nil
 }
 
@@ -528,3 +555,251 @@ func (p *Predator) replaceModelNameInConfigPreservingFormat(data []byte, destMod
 
 	return []byte(strings.Join(lines, "\n"))
 }
+
+// validateNoLoggerOrPrintStatements checks that Python backend models do not contain
+// uncommented logger.info, logger.debug, or print statements outside of initialize()
+// and finalize() functions.
+//
+// Workflow:
+//  1. Reads and parses config.pbtxt to determine the model backend type.
+//  2. If the backend is NOT "python", the check is skipped (non-Python backends
+//     like tensorrt or onnxruntime don't contain .py files).
+//  3. If the backend IS "python", enumerates all .py files in the model source
+//     directory using ListFilesWithSuffix.
+//  4. For each .py file, reads its content and scans for logger/print statements.
+//  5. Returns an error listing violations summary if any violations are found.
+func (p *Predator) validateNoLoggerOrPrintStatements(gcsPath string) error {
+	srcBucket, srcPath := extractGCSPath(gcsPath)
+	if srcBucket == "" || srcPath == "" {
+		return fmt.Errorf("invalid GCS path format: %s", gcsPath)
+	}
+
+	// Read and parse config.pbtxt to determine backend type.
+	configPath := path.Join(srcPath, configFile)
+	configData, err := p.GcsClient.ReadFile(srcBucket, configPath)
+	if err != nil {
+		return fmt.Errorf("failed to read config.pbtxt for logger/print check: %w", err)
+	}
+
+	var modelConfig ModelConfig
+	if err := prototext.Unmarshal(configData, &modelConfig); err != nil {
+		return fmt.Errorf("failed to parse config.pbtxt for logger/print check: %w", err)
+	}
+
+	if !isPythonBackendModel(&modelConfig) {
+		log.Info().Msg("Skipping logger/print statement check for non-Python backend model")
+		return nil
+	}
+
+	log.Info().Msg("Python backend model detected, scanning .py files for logger/print statements")
+
+	// Enumerate all .py files in the model source directory.
+	pyFiles, err := p.GcsClient.ListFilesWithSuffix(srcBucket, srcPath, pyFileSuffix)
+	if err != nil {
+		return fmt.Errorf("failed to list Python files for logger/print check: %w", err)
+	}
+
+	if len(pyFiles) == 0 {
+		log.Info().Msg("No Python files found in model directory, skipping logger/print check")
+		return nil
+	}
+
+	// Collect per-file violations so we can produce a balanced error message
+	// that caps the total shown violations at maxDisplayedViolations.
+	var perFileViolations []fileViolationInfo
+	totalViolations := 0
+
+	for _, pyFile := range pyFiles {
+		content, err := p.GcsClient.ReadFile(srcBucket, pyFile)
+		if err != nil {
+			return fmt.Errorf("failed to read Python file %s for logger/print check: %w", pyFile, err)
+		}
+
+		found, details := hasPythonLoggerOrPrintStatements(content)
+		if found {
+			perFileViolations = append(perFileViolations, fileViolationInfo{
+				fileName: path.Base(pyFile),
+				details:  details,
+			})
+			totalViolations += len(details)
+		}
+	}
+
+	if totalViolations == 0 {
+		log.Info().Msg("Logger/print statement validation passed for Python backend model")
+		return nil
+	}
+
+	log.Warn().Msgf("Found %d logger/print statement violation(s) across %d Python model file(s)",
+		totalViolations, len(perFileViolations))
+
+	return fmt.Errorf("%s; %s",
+		errLoggerOrPrintStatementsFound,
+		buildBalancedViolationSummary(perFileViolations, totalViolations))
+}
+
+// buildBalancedViolationSummary produces a human-readable error string that shows
+// at most maxDisplayedViolations individual violation details, distributed fairly
+// across all files using round-robin allocation. Each file entry includes its
+// total violation count, and files whose violations exceed their allocated share
+// get an "...and N more" suffix.
+//
+// Round-robin ensures fair distribution: each file gets one slot per round until
+// all slots are consumed. Files with fewer violations than their fair share
+// naturally release unused slots to files with more violations.
+func buildBalancedViolationSummary(perFileViolations []fileViolationInfo, totalViolations int) string {
+	const maxDisplayedViolations = maxDisplayedViolationsForPythonModel
+
+	numFiles := len(perFileViolations)
+
+	slots := make([]int, numFiles)
+	remaining := maxDisplayedViolations
+	if totalViolations < remaining {
+		remaining = totalViolations
+	}
+
+	for remaining > 0 {
+		advanced := false
+		for i := range perFileViolations {
+			if remaining <= 0 {
+				break
+			}
+			if slots[i] < len(perFileViolations[i].details) {
+				slots[i]++
+				remaining--
+				advanced = true
+			}
+		}
+		if !advanced {
+			break
+		}
+	}
+
+	// Build a per-file summary: shown violations + "...and N more" if truncated.
+	var summaries []string
+	for i, fv := range perFileViolations {
+		var parts []string
+		for j := 0; j < slots[i]; j++ {
+			parts = append(parts, fv.details[j])
+		}
+		extra := len(fv.details) - slots[i]
+		if extra > 0 {
+			parts = append(parts, fmt.Sprintf("...and %d more", extra))
+		}
+
+		label := "violations"
+		if len(fv.details) == 1 {
+			label = "violation"
+		}
+		summaries = append(summaries, fmt.Sprintf("%s (%d %s): %s",
+			fv.fileName, len(fv.details), label, strings.Join(parts, ", ")))
+	}
+
+	return strings.Join(summaries, "; ")
+}
+
+// hasPythonLoggerOrPrintStatements scans Python source code for uncommented
+// logger.info, logger.debug, or print() statements that are NOT inside
+// initialize() or finalize() functions.
+//
+// The logic tracks which Python function scope each line belongs to by:
+//  1. Detecting "def funcname(" patterns and recording the function name + indentation.
+//  2. When a subsequent non-empty line appears at the same or lesser indentation,
+//     considering the function scope exited (Python's indentation-based scoping).
+//  3. Skipping any logger/print matches found inside initialize() or finalize().
+//
+// Lines that are entirely comments (start with '#' after stripping whitespace) are
+// skipped.
+// Returns:
+//   - found: true if any violations were detected
+//   - details: human-readable descriptions of each violation (line number + content)
+func hasPythonLoggerOrPrintStatements(content []byte) (found bool, details []string) {
+	lines := strings.Split(string(content), "\n")
+
+	// Pre-compile regex patterns once per invocation (not per line).
+	// funcDefPattern matches Python function definitions: "def funcname("
+	funcDefPattern := regexp.MustCompile(`^def\s+(\w+)\s*\(`)
+	// loggerPattern matches logger.info(...) and logger.debug(...) calls, case-insensitive.
+	loggerPattern := regexp.MustCompile(`(?i)logger\.(info|debug)\s*\(`)
+	// printPattern matches print(...) calls with a word boundary to avoid matching
+	// identifiers like "fingerprint(" or "blueprint(".
+	printPattern := regexp.MustCompile(`\bprint\s*\(`)
+
+	// Track which Python function we're currently inside to skip initialize/finalize.
+	var currentFunction string
+	var functionIndent int
+
+	for lineNum, line := range lines {
+		strippedLine := strings.TrimSpace(line)
+
+		// Skip empty lines — they don't affect scope tracking or contain statements.
+		if strippedLine == "" {
+			continue
+		}
+
+		// Skip lines that are entirely comments (e.g., "# logger.info(...)").
+		if strings.HasPrefix(strippedLine, "#") {
+			continue
+		}
+
+		// Calculate indentation level (number of leading space/tab characters).
+		lineIndent := len(line) - len(strings.TrimLeft(line, " \t"))
+
+		// Detect function definitions: "def funcname("
+		if funcMatch := funcDefPattern.FindStringSubmatch(strippedLine); len(funcMatch) > 1 {
+			currentFunction = funcMatch[1]
+			functionIndent = lineIndent
+			continue
+		}
+
+		// If the current line's indentation is at or below the function definition's level,
+		// we've exited that function's scope (Python's indentation-based scoping).
+		if currentFunction != "" && lineIndent <= functionIndent && strippedLine != "" {
+			currentFunction = ""
+		}
+
+		// Skip statements inside initialize() and finalize() — these only execute during
+		// model load/unload, not during inference, so logging there is acceptable.
+		if currentFunction == allowedFuncInitialize || currentFunction == allowedFuncFinalize {
+			continue
+		}
+
+		// Check for logger.info() or logger.debug() calls (case-insensitive).
+		if loggerPattern.MatchString(line) {
+			details = append(details, fmt.Sprintf("line %d: %s", lineNum+1, strippedLine))
+			found = true
+		}
+
+		// Check for print() calls.
+		if printPattern.MatchString(line) {
+			details = append(details, fmt.Sprintf("line %d: %s", lineNum+1, strippedLine))
+			found = true
+		}
+	}
+
+	return found, details
+}
+
+// isEnsembleModel checks if the model configuration represents a Triton ensemble model.
+// Ensemble models are identified by:
+//   - backend field set to "ensemble", OR
+//   - platform field set to "ensemble", OR
+//   - ensemble_scheduling section being present in the config
+func isEnsembleModel(config *ModelConfig) bool {
+	if config.GetBackend() == ensembleBackend {
+		return true
+	}
+	if config.GetPlatform() == ensemblePlatform {
+		return true
+	}
+	// Also check if ensemble_scheduling is defined, which implies an ensemble model
+	if config.GetEnsembleScheduling() != nil {
+		return true
+	}
+	return false
+}
+
+// isPythonBackendModel checks if the model config specifies "python" as its backend.
+func isPythonBackendModel(config *ModelConfig) bool {
+	return config.GetBackend() == pythonBackend
+}

From acf4ba460643014180752fffa19c17d86a64a136 Mon Sep 17 00:00:00 2001
From: shubhangi-raj-meesho <shubhangi.raj@meesho.com>
Date: Wed, 18 Feb 2026 18:06:26 +0530
Subject: [PATCH 2/5] Resolved static check lint error

---
 horizon/internal/predator/handler/predator_upload.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/horizon/internal/predator/handler/predator_upload.go b/horizon/internal/predator/handler/predator_upload.go
index a8af26b2..01f9c4cd 100644
--- a/horizon/internal/predator/handler/predator_upload.go
+++ b/horizon/internal/predator/handler/predator_upload.go
@@ -2,6 +2,7 @@ package handler
 
 import (
 	"encoding/json"
+	"errors"
 	"fmt"
 	"net/http"
 	"path"
@@ -463,7 +464,7 @@ func (p *Predator) validateModelConfiguration(gcsPath string) error {
 	// hold weights that need warming up.
 	if !isEnsembleModel(&modelConfig) {
 		if len(modelConfig.GetModelWarmup()) == 0 {
-			return fmt.Errorf(errWarmupConfigMissing)
+			return errors.New(errWarmupConfigMissing)
 		}
 		log.Info().Msg("Warmup configuration validation passed for non-ensemble model")
 	} else {

From 95986c443e4975f25a9bfecab5cdca2813381c55 Mon Sep 17 00:00:00 2001
From: shubhangi-raj-meesho <shubhangi.raj@meesho.com>
Date: Wed, 18 Feb 2026 18:22:47 +0530
Subject: [PATCH 3/5] Added unit tests for the new added code changes,

---
 .../predator/handler/predator_upload_test.go  | 631 ++++++++++++++++++
 1 file changed, 631 insertions(+)
 create mode 100644 horizon/internal/predator/handler/predator_upload_test.go

diff --git a/horizon/internal/predator/handler/predator_upload_test.go b/horizon/internal/predator/handler/predator_upload_test.go
new file mode 100644
index 00000000..6639faca
--- /dev/null
+++ b/horizon/internal/predator/handler/predator_upload_test.go
@@ -0,0 +1,631 @@
+package handler
+
+import (
+	"fmt"
+	"strings"
+	"testing"
+
+	"github.com/Meesho/BharatMLStack/horizon/internal/externalcall"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// mockGCSClient implements externalcall.GCSClientInterface for unit tests.
+// Only methods needed by the functions under test carry real logic;
+// the rest return nil/zero to satisfy the interface.
+type mockGCSClient struct {
+	readFile            func(bucket, objectPath string) ([]byte, error)
+	listFilesWithSuffix func(bucket, folderPath, suffix string) ([]string, error)
+}
+
+func (m *mockGCSClient) ReadFile(bucket, objectPath string) ([]byte, error) {
+	if m.readFile != nil {
+		return m.readFile(bucket, objectPath)
+	}
+	return nil, fmt.Errorf("ReadFile not mocked")
+}
+
+func (m *mockGCSClient) ListFilesWithSuffix(bucket, folderPath, suffix string) ([]string, error) {
+	if m.listFilesWithSuffix != nil {
+		return m.listFilesWithSuffix(bucket, folderPath, suffix)
+	}
+	return nil, fmt.Errorf("ListFilesWithSuffix not mocked")
+}
+
+func (m *mockGCSClient) TransferFolder(_, _, _, _, _, _ string) error          { return nil }
+func (m *mockGCSClient) TransferAndDeleteFolder(_, _, _, _, _, _ string) error { return nil }
+func (m *mockGCSClient) TransferFolderWithSplitSources(_, _, _, _, _, _, _, _ string) error {
+	return nil
+}
+func (m *mockGCSClient) DeleteFolder(_, _, _ string) error           { return nil }
+func (m *mockGCSClient) ListFolders(_, _ string) ([]string, error)   { return nil, nil }
+func (m *mockGCSClient) UploadFile(_, _ string, _ []byte) error      { return nil }
+func (m *mockGCSClient) CheckFileExists(_, _ string) (bool, error)   { return false, nil }
+func (m *mockGCSClient) CheckFolderExists(_, _ string) (bool, error) { return false, nil }
+func (m *mockGCSClient) UploadFolderFromLocal(_, _, _ string) error  { return nil }
+func (m *mockGCSClient) GetFolderInfo(_, _ string) (*externalcall.GCSFolderInfo, error) {
+	return nil, nil
+}
+func (m *mockGCSClient) ListFoldersWithTimestamp(_, _ string) ([]externalcall.GCSFolderInfo, error) {
+	return nil, nil
+}
+func (m *mockGCSClient) FindFileWithSuffix(_, _, _ string) (bool, string, error) {
+	return false, "", nil
+}
+
+// Tests for isEnsembleModel
+func TestIsEnsembleModel(t *testing.T) {
+	tests := []struct {
+		name   string
+		config *ModelConfig
+		want   bool
+	}{
+		{
+			name:   "backend is ensemble",
+			config: &ModelConfig{Backend: "ensemble"},
+			want:   true,
+		},
+		{
+			name:   "platform is ensemble",
+			config: &ModelConfig{Platform: "ensemble"},
+			want:   true,
+		},
+		{
+			name: "ensemble_scheduling is set",
+			config: &ModelConfig{
+				Backend: "python",
+				SchedulingChoice: &ModelConfig_EnsembleScheduling{
+					EnsembleScheduling: &ModelEnsembling{},
+				},
+			},
+			want: true,
+		},
+		{
+			name:   "python backend - not ensemble",
+			config: &ModelConfig{Backend: "python"},
+			want:   false,
+		},
+		{
+			name:   "onnxruntime backend - not ensemble",
+			config: &ModelConfig{Backend: "onnxruntime"},
+			want:   false,
+		},
+		{
+			name:   "empty config - not ensemble",
+			config: &ModelConfig{},
+			want:   false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := isEnsembleModel(tt.config)
+			assert.Equal(t, tt.want, got)
+		})
+	}
+}
+
+// Tests for isPythonBackendModel
+func TestIsPythonBackendModel(t *testing.T) {
+	tests := []struct {
+		name   string
+		config *ModelConfig
+		want   bool
+	}{
+		{
+			name:   "python backend",
+			config: &ModelConfig{Backend: "python"},
+			want:   true,
+		},
+		{
+			name:   "onnxruntime backend",
+			config: &ModelConfig{Backend: "onnxruntime"},
+			want:   false,
+		},
+		{
+			name:   "tensorrt_llm backend",
+			config: &ModelConfig{Backend: "tensorrt_llm"},
+			want:   false,
+		},
+		{
+			name:   "ensemble backend",
+			config: &ModelConfig{Backend: "ensemble"},
+			want:   false,
+		},
+		{
+			name:   "empty backend",
+			config: &ModelConfig{},
+			want:   false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := isPythonBackendModel(tt.config)
+			assert.Equal(t, tt.want, got)
+		})
+	}
+}
+
+// Tests for hasPythonLoggerOrPrintStatements
+func TestHasPythonLoggerOrPrintStatements(t *testing.T) {
+	tests := []struct {
+		name        string
+		content     string
+		wantFound   bool
+		wantDetails int // expected number of violations
+	}{
+		{
+			name:        "clean file with no violations",
+			content:     "import numpy as np\n\ndef execute(requests):\n    result = np.array([1, 2])\n    return result\n",
+			wantFound:   false,
+			wantDetails: 0,
+		},
+		{
+			name:        "logger.info outside any function",
+			content:     "import triton\nlogger.info('loading model')\n",
+			wantFound:   true,
+			wantDetails: 1,
+		},
+		{
+			name:        "print statement outside any function",
+			content:     "import triton\nprint('hello')\n",
+			wantFound:   true,
+			wantDetails: 1,
+		},
+		{
+			name:        "logger.debug inside execute function",
+			content:     "def execute(requests):\n    logger.debug('processing')\n    return []\n",
+			wantFound:   true,
+			wantDetails: 1,
+		},
+		{
+			name:        "logger.info inside initialize is allowed",
+			content:     "def initialize(self, args):\n    logger.info('model loaded')\n    self.model = load()\n",
+			wantFound:   false,
+			wantDetails: 0,
+		},
+		{
+			name:        "print inside finalize is allowed",
+			content:     "def finalize(self):\n    print('cleanup done')\n",
+			wantFound:   false,
+			wantDetails: 0,
+		},
+		{
+			name: "logger in initialize allowed but in execute flagged",
+			content: `def initialize(self, args):
+    logger.info('init ok')
+
+def execute(self, requests):
+    logger.info('processing request')
+    return []
+`,
+			wantFound:   true,
+			wantDetails: 1,
+		},
+		{
+			name:        "commented out logger.info is skipped",
+			content:     "def execute(requests):\n    # logger.info('debug')\n    return []\n",
+			wantFound:   false,
+			wantDetails: 0,
+		},
+		{
+			name:        "commented out print is skipped",
+			content:     "# print('debug')\ndef execute(requests):\n    return []\n",
+			wantFound:   false,
+			wantDetails: 0,
+		},
+		{
+			name:        "fingerprint should not match print pattern",
+			content:     "def execute(requests):\n    fp = fingerprint(data)\n    return fp\n",
+			wantFound:   false,
+			wantDetails: 0,
+		},
+		{
+			name:        "blueprint should not match print pattern",
+			content:     "def execute(requests):\n    bp = blueprint(config)\n    return bp\n",
+			wantFound:   false,
+			wantDetails: 0,
+		},
+		{
+			name: "multiple violations across functions",
+			content: `import os
+
+def helper():
+    print('helper debug')
+    logger.info('helper info')
+
+def execute(requests):
+    print('exec debug')
+    return []
+`,
+			wantFound:   true,
+			wantDetails: 3,
+		},
+		{
+			name:        "logger.INFO case insensitive match",
+			content:     "def execute(requests):\n    LOGGER.INFO('upper case')\n    return []\n",
+			wantFound:   true,
+			wantDetails: 1,
+		},
+		{
+			name: "scope exit resets after initialize",
+			content: `def initialize(self, args):
+    logger.info('init ok')
+
+logger.info('module level - should flag')
+`,
+			wantFound:   true,
+			wantDetails: 1,
+		},
+		{
+			name:        "empty file",
+			content:     "",
+			wantFound:   false,
+			wantDetails: 0,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			found, details := hasPythonLoggerOrPrintStatements([]byte(tt.content))
+			assert.Equal(t, tt.wantFound, found)
+			assert.Equal(t, tt.wantDetails, len(details), "violation details: %v", details)
+		})
+	}
+}
+
+// Tests for buildBalancedViolationSummary
+func TestBuildBalancedViolationSummary(t *testing.T) {
+	tests := []struct {
+		name            string
+		violations      []fileViolationInfo
+		totalViolations int
+		wantContains    []string
+		wantNotContain  []string
+	}{
+		{
+			name: "single file single violation",
+			violations: []fileViolationInfo{
+				{fileName: "model.py", details: []string{"line 5: print('x')"}},
+			},
+			totalViolations: 1,
+			wantContains:    []string{"model.py (1 violation)", "line 5: print('x')"},
+		},
+		{
+			name: "single file multiple violations within cap",
+			violations: []fileViolationInfo{
+				{fileName: "model.py", details: []string{
+					"line 5: print('a')",
+					"line 10: print('b')",
+					"line 15: print('c')",
+				}},
+			},
+			totalViolations: 3,
+			wantContains:    []string{"model.py (3 violations)", "line 5", "line 10", "line 15"},
+		},
+		{
+			name: "single file exceeds cap shows truncation",
+			violations: []fileViolationInfo{
+				{fileName: "model.py", details: []string{
+					"line 1: print('a')",
+					"line 2: print('b')",
+					"line 3: print('c')",
+					"line 4: print('d')",
+					"line 5: print('e')",
+					"line 6: print('f')",
+					"line 7: print('g')",
+				}},
+			},
+			totalViolations: 7,
+			wantContains:    []string{"model.py (7 violations)", "...and 2 more"},
+		},
+		{
+			name: "two files with round-robin distribution",
+			violations: []fileViolationInfo{
+				{fileName: "a.py", details: []string{"line 1: print('a1')", "line 2: print('a2')", "line 3: print('a3')"}},
+				{fileName: "b.py", details: []string{"line 10: print('b1')", "line 20: print('b2')", "line 30: print('b3')"}},
+			},
+			totalViolations: 6,
+			wantContains:    []string{"a.py", "b.py"},
+		},
+		{
+			name: "uses singular 'violation' for single-violation file",
+			violations: []fileViolationInfo{
+				{fileName: "single.py", details: []string{"line 1: print('x')"}},
+			},
+			totalViolations: 1,
+			wantContains:    []string{"1 violation)"},
+			wantNotContain:  []string{"1 violations)"},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := buildBalancedViolationSummary(tt.violations, tt.totalViolations)
+			for _, want := range tt.wantContains {
+				assert.Contains(t, got, want)
+			}
+			for _, notWant := range tt.wantNotContain {
+				assert.NotContains(t, got, notWant)
+			}
+		})
+	}
+}
+
+// ---------------------------------------------------------------------------
+// Tests for validateModelConfiguration (warmup check via GCS mock)
+// ---------------------------------------------------------------------------
+
+func TestValidateModelConfiguration_WarmupCheck(t *testing.T) {
+	pythonWithWarmup := `name: "my_model"
+backend: "python"
+max_batch_size: 8
+model_warmup {
+  name: "warmup_request"
+  batch_size: 1
+  inputs {
+    key: "input"
+    value: {
+      data_type: TYPE_FP32
+      dims: [1, 3]
+      zero_data: true
+    }
+  }
+}
+`
+
+	pythonWithoutWarmup := `name: "my_model"
+backend: "python"
+max_batch_size: 8
+`
+
+	ensembleConfig := `name: "ensemble_model"
+platform: "ensemble"
+max_batch_size: 8
+ensemble_scheduling {
+  step {
+    model_name: "preprocess"
+    model_version: 1
+  }
+}
+`
+
+	ensembleBackendConfig := `name: "ensemble_model"
+backend: "ensemble"
+max_batch_size: 8
+`
+
+	tests := []struct {
+		name       string
+		configData string
+		wantErr    bool
+		errContain string
+	}{
+		{
+			name:       "python model with warmup passes",
+			configData: pythonWithWarmup,
+			wantErr:    false,
+		},
+		{
+			name:       "python model without warmup fails",
+			configData: pythonWithoutWarmup,
+			wantErr:    true,
+			errContain: "model_warmup configuration is missing",
+		},
+		{
+			name:       "ensemble model skips warmup check",
+			configData: ensembleConfig,
+			wantErr:    false,
+		},
+		{
+			name:       "ensemble backend model skips warmup check",
+			configData: ensembleBackendConfig,
+			wantErr:    false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			gcs := &mockGCSClient{
+				readFile: func(bucket, objectPath string) ([]byte, error) {
+					return []byte(tt.configData), nil
+				},
+			}
+
+			p := &Predator{GcsClient: gcs}
+			err := p.validateModelConfiguration("gs://test-bucket/models/my_model")
+
+			if tt.wantErr {
+				require.Error(t, err)
+				assert.Contains(t, err.Error(), tt.errContain)
+			} else {
+				require.NoError(t, err)
+			}
+		})
+	}
+}
+
+func TestValidateModelConfiguration_InvalidGCSPath(t *testing.T) {
+	p := &Predator{}
+	err := p.validateModelConfiguration("invalid-path")
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "invalid GCS path format")
+}
+
+func TestValidateModelConfiguration_ReadFileError(t *testing.T) {
+	gcs := &mockGCSClient{
+		readFile: func(_, _ string) ([]byte, error) {
+			return nil, fmt.Errorf("bucket not found")
+		},
+	}
+	p := &Predator{GcsClient: gcs}
+	err := p.validateModelConfiguration("gs://test-bucket/models/my_model")
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "failed to read config.pbtxt")
+}
+
+func TestValidateModelConfiguration_InvalidProto(t *testing.T) {
+	gcs := &mockGCSClient{
+		readFile: func(_, _ string) ([]byte, error) {
+			return []byte("this is not valid proto text {{{"), nil
+		},
+	}
+	p := &Predator{GcsClient: gcs}
+	err := p.validateModelConfiguration("gs://test-bucket/models/my_model")
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "failed to parse config.pbtxt")
+}
+
+// Tests for validateNoLoggerOrPrintStatements (integration with GCS mock)
+func TestValidateNoLoggerOrPrintStatements_NonPythonBackend(t *testing.T) {
+	configData := `name: "onnx_model"
+backend: "onnxruntime"
+max_batch_size: 8
+`
+	gcs := &mockGCSClient{
+		readFile: func(_, objectPath string) ([]byte, error) {
+			if strings.HasSuffix(objectPath, "config.pbtxt") {
+				return []byte(configData), nil
+			}
+			return nil, fmt.Errorf("unexpected read: %s", objectPath)
+		},
+	}
+
+	p := &Predator{GcsClient: gcs}
+	err := p.validateNoLoggerOrPrintStatements("gs://test-bucket/models/onnx_model")
+	require.NoError(t, err)
+}
+
+func TestValidateNoLoggerOrPrintStatements_PythonBackendClean(t *testing.T) {
+	configData := `name: "py_model"
+backend: "python"
+max_batch_size: 8
+`
+	cleanPython := `import numpy as np
+
+def initialize(self, args):
+    logger.info('init')
+
+def execute(self, requests):
+    result = np.array([1, 2, 3])
+    return result
+
+def finalize(self):
+    print('done')
+`
+
+	gcs := &mockGCSClient{
+		readFile: func(_, objectPath string) ([]byte, error) {
+			if strings.HasSuffix(objectPath, "config.pbtxt") {
+				return []byte(configData), nil
+			}
+			return []byte(cleanPython), nil
+		},
+		listFilesWithSuffix: func(_, _, _ string) ([]string, error) {
+			return []string{"models/py_model/1/model.py"}, nil
+		},
+	}
+
+	p := &Predator{GcsClient: gcs}
+	err := p.validateNoLoggerOrPrintStatements("gs://test-bucket/models/py_model")
+	require.NoError(t, err)
+}
+
+func TestValidateNoLoggerOrPrintStatements_PythonBackendWithViolations(t *testing.T) {
+	configData := `name: "py_model"
+backend: "python"
+max_batch_size: 8
+`
+	dirtyPython := `import numpy as np
+
+def execute(self, requests):
+    logger.info('processing')
+    print('debug output')
+    return []
+`
+
+	gcs := &mockGCSClient{
+		readFile: func(_, objectPath string) ([]byte, error) {
+			if strings.HasSuffix(objectPath, "config.pbtxt") {
+				return []byte(configData), nil
+			}
+			return []byte(dirtyPython), nil
+		},
+		listFilesWithSuffix: func(_, _, _ string) ([]string, error) {
+			return []string{"models/py_model/1/model.py"}, nil
+		},
+	}
+
+	p := &Predator{GcsClient: gcs}
+	err := p.validateNoLoggerOrPrintStatements("gs://test-bucket/models/py_model")
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "logger/print statements")
+	assert.Contains(t, err.Error(), "model.py")
+}
+
+func TestValidateNoLoggerOrPrintStatements_NoPythonFiles(t *testing.T) {
+	configData := `name: "py_model"
+backend: "python"
+max_batch_size: 8
+`
+	gcs := &mockGCSClient{
+		readFile: func(_, objectPath string) ([]byte, error) {
+			if strings.HasSuffix(objectPath, "config.pbtxt") {
+				return []byte(configData), nil
+			}
+			return nil, fmt.Errorf("not found")
+		},
+		listFilesWithSuffix: func(_, _, _ string) ([]string, error) {
+			return []string{}, nil
+		},
+	}
+
+	p := &Predator{GcsClient: gcs}
+	err := p.validateNoLoggerOrPrintStatements("gs://test-bucket/models/py_model")
+	require.NoError(t, err)
+}
+
+func TestValidateNoLoggerOrPrintStatements_InvalidGCSPath(t *testing.T) {
+	p := &Predator{}
+	err := p.validateNoLoggerOrPrintStatements("invalid-path")
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "invalid GCS path format")
+}
+
+func TestValidateNoLoggerOrPrintStatements_MultipleFiles(t *testing.T) {
+	configData := `name: "py_model"
+backend: "python"
+max_batch_size: 8
+`
+	fileContents := map[string]string{
+		"models/py_model/config.pbtxt": configData,
+		"models/py_model/1/model.py": `def execute(self, requests):
+    print('debug in model')
+    return []
+`,
+		"models/py_model/1/utils.py": `def helper():
+    logger.info('helper log')
+    return True
+`,
+	}
+
+	gcs := &mockGCSClient{
+		readFile: func(_, objectPath string) ([]byte, error) {
+			if content, ok := fileContents[objectPath]; ok {
+				return []byte(content), nil
+			}
+			return nil, fmt.Errorf("file not found: %s", objectPath)
+		},
+		listFilesWithSuffix: func(_, _, _ string) ([]string, error) {
+			return []string{"models/py_model/1/model.py", "models/py_model/1/utils.py"}, nil
+		},
+	}
+
+	p := &Predator{GcsClient: gcs}
+	err := p.validateNoLoggerOrPrintStatements("gs://test-bucket/models/py_model")
+	require.Error(t, err)
+	errMsg := err.Error()
+	assert.Contains(t, errMsg, "model.py")
+	assert.Contains(t, errMsg, "utils.py")
+}

From a7e4692f78bcb88d631311fe8d7cc001f0b449ef Mon Sep 17 00:00:00 2001
From: shubhangi-raj-meesho <shubhangi.raj@meesho.com>
Date: Thu, 19 Feb 2026 16:46:39 +0530
Subject: [PATCH 4/5] resolved coderabbit and other review comments

---
 horizon/internal/predator/handler/model.go    |   6 +
 .../predator/handler/predator_upload.go       | 114 +++++++++++-------
 .../predator/handler/predator_upload_test.go  | 110 +++++++++++++++++
 3 files changed, 184 insertions(+), 46 deletions(-)

diff --git a/horizon/internal/predator/handler/model.go b/horizon/internal/predator/handler/model.go
index a3ef6a1f..5d750423 100644
--- a/horizon/internal/predator/handler/model.go
+++ b/horizon/internal/predator/handler/model.go
@@ -278,3 +278,9 @@ type fileViolationInfo struct {
 	fileName string
 	details  []string
 }
+
+// funcScopeEntry represents a Python function scope on the tracking stack.
+type funcScopeEntry struct {
+	name   string
+	indent int
+}
diff --git a/horizon/internal/predator/handler/predator_upload.go b/horizon/internal/predator/handler/predator_upload.go
index 01f9c4cd..d02da174 100644
--- a/horizon/internal/predator/handler/predator_upload.go
+++ b/horizon/internal/predator/handler/predator_upload.go
@@ -165,10 +165,6 @@ func (p *Predator) validateSourceModel(gcsPath string, isPartial bool) error {
 			return fmt.Errorf("complete model structure validation failed: %w", err)
 		}
 
-		// Validate that Python backend models don't contain logger/print statements
-		// outside of initialize() and finalize() functions. This check only applies to
-		// full uploads because partial uploads only sync config.pbtxt — the Python
-		// model files in the source aren't modified or uploaded during partial syncs.
 		if err := p.validateNoLoggerOrPrintStatements(gcsPath); err != nil {
 			return fmt.Errorf("logger/print statement validation failed: %w", err)
 		}
@@ -459,9 +455,6 @@ func (p *Predator) validateModelConfiguration(gcsPath string) error {
 
 	log.Info().Msgf("Parsed model config - Name: %s, Backend: %s", modelConfig.Name, modelConfig.Backend)
 
-	// Validate warmup configuration for non-ensemble models.
-	// Ensemble models are excluded because they orchestrate sub-models and don't
-	// hold weights that need warming up.
 	if !isEnsembleModel(&modelConfig) {
 		if len(modelConfig.GetModelWarmup()) == 0 {
 			return errors.New(errWarmupConfigMissing)
@@ -605,8 +598,6 @@ func (p *Predator) validateNoLoggerOrPrintStatements(gcsPath string) error {
 		return nil
 	}
 
-	// Collect per-file violations so we can produce a balanced error message
-	// that caps the total shown violations at maxDisplayedViolations.
 	var perFileViolations []fileViolationInfo
 	totalViolations := 0
 
@@ -703,76 +694,61 @@ func buildBalancedViolationSummary(perFileViolations []fileViolationInfo, totalV
 // logger.info, logger.debug, or print() statements that are NOT inside
 // initialize() or finalize() functions.
 //
-// The logic tracks which Python function scope each line belongs to by:
-//  1. Detecting "def funcname(" patterns and recording the function name + indentation.
-//  2. When a subsequent non-empty line appears at the same or lesser indentation,
-//     considering the function scope exited (Python's indentation-based scoping).
-//  3. Skipping any logger/print matches found inside initialize() or finalize().
+// The logic tracks nested Python function scopes using a stack of (name, indent)
+// entries so that nested defs inside initialize/finalize are correctly treated as
+// allowed:
+//  1. On each non-empty, non-comment line, pop stack entries whose indentation is
+//     >= the current line's indentation (dedent = scope exit).
+//  2. If the line is a "def funcname(" pattern, push the new scope onto the stack.
+//  3. If any entry on the stack is initialize or finalize, skip the line.
+//
+// Inline comments are stripped before pattern matching to avoid false positives on
+// trailing comments like `x = f()  # logger.info(...)`.
 //
-// Lines that are entirely comments (start with '#' after stripping whitespace) are
-// skipped.
 // Returns:
 //   - found: true if any violations were detected
 //   - details: human-readable descriptions of each violation (line number + content)
 func hasPythonLoggerOrPrintStatements(content []byte) (found bool, details []string) {
 	lines := strings.Split(string(content), "\n")
 
-	// Pre-compile regex patterns once per invocation (not per line).
-	// funcDefPattern matches Python function definitions: "def funcname("
 	funcDefPattern := regexp.MustCompile(`^def\s+(\w+)\s*\(`)
-	// loggerPattern matches logger.info(...) and logger.debug(...) calls, case-insensitive.
 	loggerPattern := regexp.MustCompile(`(?i)logger\.(info|debug)\s*\(`)
-	// printPattern matches print(...) calls with a word boundary to avoid matching
-	// identifiers like "fingerprint(" or "blueprint(".
 	printPattern := regexp.MustCompile(`\bprint\s*\(`)
 
-	// Track which Python function we're currently inside to skip initialize/finalize.
-	var currentFunction string
-	var functionIndent int
+	var functionStack []funcScopeEntry
 
 	for lineNum, line := range lines {
 		strippedLine := strings.TrimSpace(line)
 
-		// Skip empty lines — they don't affect scope tracking or contain statements.
 		if strippedLine == "" {
 			continue
 		}
 
-		// Skip lines that are entirely comments (e.g., "# logger.info(...)").
 		if strings.HasPrefix(strippedLine, "#") {
 			continue
 		}
 
-		// Calculate indentation level (number of leading space/tab characters).
 		lineIndent := len(line) - len(strings.TrimLeft(line, " \t"))
 
-		// Detect function definitions: "def funcname("
-		if funcMatch := funcDefPattern.FindStringSubmatch(strippedLine); len(funcMatch) > 1 {
-			currentFunction = funcMatch[1]
-			functionIndent = lineIndent
-			continue
+		for len(functionStack) > 0 && functionStack[len(functionStack)-1].indent >= lineIndent {
+			functionStack = functionStack[:len(functionStack)-1]
 		}
 
-		// If the current line's indentation is at or below the function definition's level,
-		// we've exited that function's scope (Python's indentation-based scoping).
-		if currentFunction != "" && lineIndent <= functionIndent && strippedLine != "" {
-			currentFunction = ""
+		if funcMatch := funcDefPattern.FindStringSubmatch(strippedLine); len(funcMatch) > 1 {
+			functionStack = append(functionStack, funcScopeEntry{name: funcMatch[1], indent: lineIndent})
+			continue
 		}
 
-		// Skip statements inside initialize() and finalize() — these only execute during
-		// model load/unload, not during inference, so logging there is acceptable.
-		if currentFunction == allowedFuncInitialize || currentFunction == allowedFuncFinalize {
+		if isInsideAllowedFunction(functionStack) {
 			continue
 		}
 
-		// Check for logger.info() or logger.debug() calls (case-insensitive).
-		if loggerPattern.MatchString(line) {
+		codePortion := stripInlineComment(line)
+
+		if loggerPattern.MatchString(codePortion) {
 			details = append(details, fmt.Sprintf("line %d: %s", lineNum+1, strippedLine))
 			found = true
-		}
-
-		// Check for print() calls.
-		if printPattern.MatchString(line) {
+		} else if printPattern.MatchString(codePortion) {
 			details = append(details, fmt.Sprintf("line %d: %s", lineNum+1, strippedLine))
 			found = true
 		}
@@ -781,6 +757,52 @@ func hasPythonLoggerOrPrintStatements(content []byte) (found bool, details []str
 	return found, details
 }
 
+// isInsideAllowedFunction returns true if any entry on the function scope stack
+// is initialize() or finalize().
+func isInsideAllowedFunction(stack []funcScopeEntry) bool {
+	for _, entry := range stack {
+		if entry.name == allowedFuncInitialize || entry.name == allowedFuncFinalize {
+			return true
+		}
+	}
+	return false
+}
+
+// stripInlineComment removes a trailing Python inline comment (# ...) from a
+// line while preserving '#' characters that appear inside string literals.
+// Uses simple single/double quote state tracking with backslash-escape awareness.
+func stripInlineComment(line string) string {
+	inSingle := false
+	inDouble := false
+	escaped := false
+	for i := 0; i < len(line); i++ {
+		ch := line[i]
+		if escaped {
+			escaped = false
+			continue
+		}
+		if ch == '\\' {
+			escaped = true
+			continue
+		}
+		switch ch {
+		case '\'':
+			if !inDouble {
+				inSingle = !inSingle
+			}
+		case '"':
+			if !inSingle {
+				inDouble = !inDouble
+			}
+		case '#':
+			if !inSingle && !inDouble {
+				return line[:i]
+			}
+		}
+	}
+	return line
+}
+
 // isEnsembleModel checks if the model configuration represents a Triton ensemble model.
 // Ensemble models are identified by:
 //   - backend field set to "ensemble", OR
@@ -793,7 +815,7 @@ func isEnsembleModel(config *ModelConfig) bool {
 	if config.GetPlatform() == ensemblePlatform {
 		return true
 	}
-	// Also check if ensemble_scheduling is defined, which implies an ensemble model
+
 	if config.GetEnsembleScheduling() != nil {
 		return true
 	}
diff --git a/horizon/internal/predator/handler/predator_upload_test.go b/horizon/internal/predator/handler/predator_upload_test.go
index 6639faca..1cf36efe 100644
--- a/horizon/internal/predator/handler/predator_upload_test.go
+++ b/horizon/internal/predator/handler/predator_upload_test.go
@@ -264,6 +264,62 @@ logger.info('module level - should flag')
 			wantFound:   false,
 			wantDetails: 0,
 		},
+		{
+			name: "nested def inside initialize is allowed",
+			content: `def initialize(self, args):
+    def _load_model():
+        logger.info('loading weights')
+        print('progress')
+    _load_model()
+`,
+			wantFound:   false,
+			wantDetails: 0,
+		},
+		{
+			name: "nested def inside finalize is allowed",
+			content: `def finalize(self):
+    def _cleanup():
+        logger.info('releasing resources')
+    _cleanup()
+`,
+			wantFound:   false,
+			wantDetails: 0,
+		},
+		{
+			name: "nested def inside execute is still flagged",
+			content: `def execute(self, requests):
+    def _process():
+        logger.info('processing batch')
+    _process()
+    return []
+`,
+			wantFound:   true,
+			wantDetails: 1,
+		},
+		{
+			name:        "inline comment with logger is not flagged",
+			content:     "def execute(requests):\n    x = compute()  # logger.info('debug')\n    return x\n",
+			wantFound:   false,
+			wantDetails: 0,
+		},
+		{
+			name:        "inline comment with print is not flagged",
+			content:     "def execute(requests):\n    x = compute()  # print('debug')\n    return x\n",
+			wantFound:   false,
+			wantDetails: 0,
+		},
+		{
+			name:        "hash inside string literal is not treated as comment",
+			content:     "def execute(requests):\n    print('value is # 42')\n    return []\n",
+			wantFound:   true,
+			wantDetails: 1,
+		},
+		{
+			name:        "line matching both logger and print reports once",
+			content:     "def execute(requests):\n    print(logger.info('x'))\n    return []\n",
+			wantFound:   true,
+			wantDetails: 1,
+		},
 	}
 
 	for _, tt := range tests {
@@ -629,3 +685,57 @@ max_batch_size: 8
 	assert.Contains(t, errMsg, "model.py")
 	assert.Contains(t, errMsg, "utils.py")
 }
+
+func TestStripInlineComment(t *testing.T) {
+	tests := []struct {
+		name string
+		line string
+		want string
+	}{
+		{name: "no comment", line: "x = 1", want: "x = 1"},
+		{name: "trailing comment", line: "x = 1  # set x", want: "x = 1  "},
+		{name: "hash in single quotes", line: "x = 'a#b'", want: "x = 'a#b'"},
+		{name: "hash in double quotes", line: `x = "a#b"`, want: `x = "a#b"`},
+		{name: "hash after quoted string", line: `x = "a#b"  # comment`, want: `x = "a#b"  `},
+		{name: "escaped quote", line: `x = 'it\'s'  # note`, want: `x = 'it\'s'  `},
+		{name: "full line comment", line: "# comment", want: ""},
+		{name: "empty line", line: "", want: ""},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := stripInlineComment(tt.line)
+			assert.Equal(t, tt.want, got)
+		})
+	}
+}
+
+func TestIsInsideAllowedFunction(t *testing.T) {
+	tests := []struct {
+		name  string
+		stack []funcScopeEntry
+		want  bool
+	}{
+		{name: "empty stack", stack: nil, want: false},
+		{name: "non-allowed function", stack: []funcScopeEntry{{name: "execute", indent: 0}}, want: false},
+		{name: "initialize on stack", stack: []funcScopeEntry{{name: "initialize", indent: 0}}, want: true},
+		{name: "finalize on stack", stack: []funcScopeEntry{{name: "finalize", indent: 0}}, want: true},
+		{
+			name:  "nested inside initialize",
+			stack: []funcScopeEntry{{name: "initialize", indent: 0}, {name: "helper", indent: 4}},
+			want:  true,
+		},
+		{
+			name:  "nested inside non-allowed",
+			stack: []funcScopeEntry{{name: "execute", indent: 0}, {name: "helper", indent: 4}},
+			want:  false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := isInsideAllowedFunction(tt.stack)
+			assert.Equal(t, tt.want, got)
+		})
+	}
+}

From 90a2214972446ddbcf04c8d8c5a4783aa7437c5d Mon Sep 17 00:00:00 2001
From: shubhangi-raj-meesho <shubhangi.raj@meesho.com>
Date: Mon, 23 Feb 2026 16:11:47 +0530
Subject: [PATCH 5/5] removed extra comments

---
 .../predator/handler/predator_upload.go       | 36 +------------------
 1 file changed, 1 insertion(+), 35 deletions(-)

diff --git a/horizon/internal/predator/handler/predator_upload.go b/horizon/internal/predator/handler/predator_upload.go
index d02da174..5e48c29b 100644
--- a/horizon/internal/predator/handler/predator_upload.go
+++ b/horizon/internal/predator/handler/predator_upload.go
@@ -553,15 +553,6 @@ func (p *Predator) replaceModelNameInConfigPreservingFormat(data []byte, destMod
 // validateNoLoggerOrPrintStatements checks that Python backend models do not contain
 // uncommented logger.info, logger.debug, or print statements outside of initialize()
 // and finalize() functions.
-//
-// Workflow:
-//  1. Reads and parses config.pbtxt to determine the model backend type.
-//  2. If the backend is NOT "python", the check is skipped (non-Python backends
-//     like tensorrt or onnxruntime don't contain .py files).
-//  3. If the backend IS "python", enumerates all .py files in the model source
-//     directory using ListFilesWithSuffix.
-//  4. For each .py file, reads its content and scans for logger/print statements.
-//  5. Returns an error listing violations summary if any violations are found.
 func (p *Predator) validateNoLoggerOrPrintStatements(gcsPath string) error {
 	srcBucket, srcPath := extractGCSPath(gcsPath)
 	if srcBucket == "" || srcPath == "" {
@@ -633,12 +624,7 @@ func (p *Predator) validateNoLoggerOrPrintStatements(gcsPath string) error {
 // buildBalancedViolationSummary produces a human-readable error string that shows
 // at most maxDisplayedViolations individual violation details, distributed fairly
 // across all files using round-robin allocation. Each file entry includes its
-// total violation count, and files whose violations exceed their allocated share
-// get an "...and N more" suffix.
-//
-// Round-robin ensures fair distribution: each file gets one slot per round until
-// all slots are consumed. Files with fewer violations than their fair share
-// naturally release unused slots to files with more violations.
+// total violation count.
 func buildBalancedViolationSummary(perFileViolations []fileViolationInfo, totalViolations int) string {
 	const maxDisplayedViolations = maxDisplayedViolationsForPythonModel
 
@@ -693,21 +679,6 @@ func buildBalancedViolationSummary(perFileViolations []fileViolationInfo, totalV
 // hasPythonLoggerOrPrintStatements scans Python source code for uncommented
 // logger.info, logger.debug, or print() statements that are NOT inside
 // initialize() or finalize() functions.
-//
-// The logic tracks nested Python function scopes using a stack of (name, indent)
-// entries so that nested defs inside initialize/finalize are correctly treated as
-// allowed:
-//  1. On each non-empty, non-comment line, pop stack entries whose indentation is
-//     >= the current line's indentation (dedent = scope exit).
-//  2. If the line is a "def funcname(" pattern, push the new scope onto the stack.
-//  3. If any entry on the stack is initialize or finalize, skip the line.
-//
-// Inline comments are stripped before pattern matching to avoid false positives on
-// trailing comments like `x = f()  # logger.info(...)`.
-//
-// Returns:
-//   - found: true if any violations were detected
-//   - details: human-readable descriptions of each violation (line number + content)
 func hasPythonLoggerOrPrintStatements(content []byte) (found bool, details []string) {
 	lines := strings.Split(string(content), "\n")
 
@@ -770,7 +741,6 @@ func isInsideAllowedFunction(stack []funcScopeEntry) bool {
 
 // stripInlineComment removes a trailing Python inline comment (# ...) from a
 // line while preserving '#' characters that appear inside string literals.
-// Uses simple single/double quote state tracking with backslash-escape awareness.
 func stripInlineComment(line string) string {
 	inSingle := false
 	inDouble := false
@@ -804,10 +774,6 @@ func stripInlineComment(line string) string {
 }
 
 // isEnsembleModel checks if the model configuration represents a Triton ensemble model.
-// Ensemble models are identified by:
-//   - backend field set to "ensemble", OR
-//   - platform field set to "ensemble", OR
-//   - ensemble_scheduling section being present in the config
 func isEnsembleModel(config *ModelConfig) bool {
 	if config.GetBackend() == ensembleBackend {
 		return true