diff --git a/backend/plugins/argocd/models/migrationscripts/20260331_add_repo_url_to_sync_operations.go b/backend/plugins/argocd/models/migrationscripts/20260331_add_repo_url_to_sync_operations.go new file mode 100644 index 00000000000..5acdc543b9c --- /dev/null +++ b/backend/plugins/argocd/models/migrationscripts/20260331_add_repo_url_to_sync_operations.go @@ -0,0 +1,54 @@ +/* +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package migrationscripts + +import ( + "github.com/apache/incubator-devlake/core/context" + "github.com/apache/incubator-devlake/core/errors" + "github.com/apache/incubator-devlake/core/plugin" +) + +var _ plugin.MigrationScript = (*addRepoURLToSyncOperations)(nil) + +type addRepoURLToSyncOperations struct{} + +// addRepoURLSyncOpArchived is a snapshot of ArgocdSyncOperation used solely +// for this migration so the live model can evolve independently. +type addRepoURLSyncOpArchived struct { + ConnectionId uint64 `gorm:"primaryKey"` + ApplicationName string `gorm:"primaryKey;type:varchar(255)"` + DeploymentId int64 `gorm:"primaryKey"` + RepoURL string `gorm:"type:varchar(500)"` +} + +func (addRepoURLSyncOpArchived) TableName() string { + return "_tool_argocd_sync_operations" +} + +func (m *addRepoURLToSyncOperations) Up(basicRes context.BasicRes) errors.Error { + db := basicRes.GetDal() + return db.AutoMigrate(&addRepoURLSyncOpArchived{}) +} + +func (*addRepoURLToSyncOperations) Version() uint64 { + return 20260331000000 +} + +func (*addRepoURLToSyncOperations) Name() string { + return "argocd add repo_url to sync operations" +} diff --git a/backend/plugins/argocd/models/migrationscripts/register.go b/backend/plugins/argocd/models/migrationscripts/register.go index 2c70d811548..9ed9242062c 100644 --- a/backend/plugins/argocd/models/migrationscripts/register.go +++ b/backend/plugins/argocd/models/migrationscripts/register.go @@ -25,5 +25,6 @@ func All() []plugin.MigrationScript { return []plugin.MigrationScript{ new(addInitTables), new(addImageSupportArtifacts), + new(addRepoURLToSyncOperations), } } diff --git a/backend/plugins/argocd/models/sync_operation.go b/backend/plugins/argocd/models/sync_operation.go index 77cc095447d..ea8838cb108 100644 --- a/backend/plugins/argocd/models/sync_operation.go +++ b/backend/plugins/argocd/models/sync_operation.go @@ -28,6 +28,7 @@ type ArgocdSyncOperation struct { ApplicationName string `gorm:"primaryKey;type:varchar(255)"` DeploymentId int64 `gorm:"primaryKey"` // History ID from ArgoCD Revision string `gorm:"type:varchar(255)"` // Git SHA + RepoURL string `gorm:"type:varchar(500)"` // Git repo URL resolved from source/sources at extraction time Kind string `gorm:"type:varchar(100)"` // Kubernetes resource kind: Deployment, ReplicaSet, Rollout, StatefulSet, DaemonSet, etc. StartedAt *time.Time FinishedAt *time.Time diff --git a/backend/plugins/argocd/tasks/application_extractor.go b/backend/plugins/argocd/tasks/application_extractor.go index 3a0568609a5..1c789999c79 100644 --- a/backend/plugins/argocd/tasks/application_extractor.go +++ b/backend/plugins/argocd/tasks/application_extractor.go @@ -38,6 +38,13 @@ var ExtractApplicationsMeta = plugin.SubTaskMeta{ ProductTables: []string{models.ArgocdApplication{}.TableName()}, } +type ArgocdApiApplicationSource struct { + RepoURL string `json:"repoURL"` + Path string `json:"path"` + TargetRevision string `json:"targetRevision"` + Chart string `json:"chart"` +} + type ArgocdApiApplication struct { Metadata struct { Name string `json:"name"` @@ -46,11 +53,9 @@ type ArgocdApiApplication struct { } `json:"metadata"` Spec struct { Project string `json:"project"` - Source struct { - RepoURL string `json:"repoURL"` - Path string `json:"path"` - TargetRevision string `json:"targetRevision"` - } `json:"source"` + // Single-source apps use Source; multi-source apps use Sources. + Source ArgocdApiApplicationSource `json:"source"` + Sources []ArgocdApiApplicationSource `json:"sources"` Destination struct { Server string `json:"server"` Namespace string `json:"namespace"` @@ -88,13 +93,31 @@ func ExtractApplications(taskCtx plugin.SubTaskContext) errors.Error { return nil, errors.Default.Wrap(err, "error unmarshaling application") } + // Resolve the primary source. Multi-source apps populate spec.sources[] + // instead of spec.source; we prefer the first git-hosted source so that + // cicd_deployment_commits.repo_url is a browsable repository URL rather + // than a Helm chart registry address. + primarySource := apiApp.Spec.Source + if primarySource.RepoURL == "" && len(apiApp.Spec.Sources) > 0 { + for _, src := range apiApp.Spec.Sources { + if isGitHostedURL(src.RepoURL) { + primarySource = src + break + } + } + // Fallback: use the first source if none matched the git-host heuristic. + if primarySource.RepoURL == "" { + primarySource = apiApp.Spec.Sources[0] + } + } + application := &models.ArgocdApplication{ Name: apiApp.Metadata.Name, Namespace: apiApp.Metadata.Namespace, Project: apiApp.Spec.Project, - RepoURL: apiApp.Spec.Source.RepoURL, - Path: apiApp.Spec.Source.Path, - TargetRevision: apiApp.Spec.Source.TargetRevision, + RepoURL: primarySource.RepoURL, + Path: primarySource.Path, + TargetRevision: primarySource.TargetRevision, DestServer: apiApp.Spec.Destination.Server, DestNamespace: apiApp.Spec.Destination.Namespace, SyncStatus: apiApp.Status.Sync.Status, diff --git a/backend/plugins/argocd/tasks/sync_operation_convertor.go b/backend/plugins/argocd/tasks/sync_operation_convertor.go index 9b34d03397d..e577b7af185 100644 --- a/backend/plugins/argocd/tasks/sync_operation_convertor.go +++ b/backend/plugins/argocd/tasks/sync_operation_convertor.go @@ -137,8 +137,14 @@ func ConvertSyncOperations(taskCtx plugin.SubTaskContext) errors.Error { results = append(results, deployment) if syncOp.Revision != "" { + // Priority: repo_url resolved at extraction time (always present for + // multi-source apps) → application-level repo_url → deployment name + // as a last-resort non-empty placeholder. repoUrl := deployment.Name - if application != nil && application.RepoURL != "" { + switch { + case syncOp.RepoURL != "": + repoUrl = syncOp.RepoURL + case application != nil && application.RepoURL != "": repoUrl = application.RepoURL } diff --git a/backend/plugins/argocd/tasks/sync_operation_extractor.go b/backend/plugins/argocd/tasks/sync_operation_extractor.go index ed5ce8e9511..ae90584dc02 100644 --- a/backend/plugins/argocd/tasks/sync_operation_extractor.go +++ b/backend/plugins/argocd/tasks/sync_operation_extractor.go @@ -43,15 +43,23 @@ var ExtractSyncOperationsMeta = plugin.SubTaskMeta{ ProductTables: []string{models.ArgocdSyncOperation{}.TableName()}, } +// ArgocdApiSyncSource represents a single source in a multi-source ArgoCD application. +type ArgocdApiSyncSource struct { + RepoURL string `json:"repoURL"` + Chart string `json:"chart"` +} + type ArgocdApiSyncOperation struct { // For history entries ID int64 `json:"id"` Revision string `json:"revision"` + Revisions []string `json:"revisions"` // multi-source apps populate this instead of revision DeployedAt time.Time `json:"deployedAt"` DeployStartedAt *time.Time `json:"deployStartedAt"` Source struct { RepoURL string `json:"repoURL"` } `json:"source"` + Sources []ArgocdApiSyncSource `json:"sources"` // multi-source apps populate this instead of source InitiatedBy struct { Username string `json:"username"` Automated bool `json:"automated"` @@ -66,6 +74,7 @@ type ArgocdApiSyncOperation struct { FinishedAt *time.Time `json:"finishedAt"` SyncResult struct { Revision string `json:"revision"` + Revisions []string `json:"revisions"` // multi-source apps Resources []ArgocdApiSyncResourceItem `json:"resources"` } `json:"syncResult"` } @@ -179,10 +188,21 @@ func ExtractSyncOperations(taskCtx plugin.SubTaskContext) errors.Error { isOperationState := apiOp.Phase != "" + // For multi-source apps ArgoCD sets revisions[] instead of revision. Resolve + // the single commit SHA we care about before deciding whether to skip this entry. + if apiOp.Revision == "" { + apiOp.Revision = resolveMultiSourceRevision(apiOp.Revisions, apiOp.Sources) + } if !isOperationState && apiOp.DeployedAt.IsZero() && apiOp.Revision == "" { return nil, nil } + // Resolve the git repo URL at extraction time so the convertor can set + // cicd_deployment_commits.repo_url correctly even when + // _tool_argocd_applications.repo_url is empty (e.g. for multi-source apps + // whose collectApplications subtask was skipped due to state caching). + syncOp.RepoURL = resolveGitRepoURL(apiOp.Source.RepoURL, apiOp.Sources) + if isOperationState { start := normalize(apiOp.StartedAt) if start != nil { @@ -190,7 +210,14 @@ func ExtractSyncOperations(taskCtx plugin.SubTaskContext) errors.Error { } else { syncOp.DeploymentId = time.Now().Unix() } - syncOp.Revision = apiOp.SyncResult.Revision + // Prefer the top-level resolved revision; fall back to syncResult. + syncOp.Revision = apiOp.Revision + if syncOp.Revision == "" { + syncOp.Revision = resolveMultiSourceRevision(apiOp.SyncResult.Revisions, apiOp.Sources) + } + if syncOp.Revision == "" { + syncOp.Revision = apiOp.SyncResult.Revision + } syncOp.StartedAt = start syncOp.FinishedAt = normalizePtr(apiOp.FinishedAt) syncOp.Phase = apiOp.Phase @@ -380,3 +407,130 @@ func stringSlicesEqual(a, b []string) bool { } return true } + +// resolveMultiSourceRevision picks the git commit SHA from a multi-source ArgoCD +// application's revisions slice. ArgoCD multi-source apps store one revision per +// source: Helm chart sources carry a semver tag while git sources carry a 40-hex +// commit SHA. We prefer the first git-hosted source (github.com / gitlab.com / +// bitbucket.org) and fall back to any entry that looks like a 40-character hex SHA. +// +// Single-source apps already populate the top-level "revision" field, so this +// function is only called when that field is empty. +func resolveMultiSourceRevision(revisions []string, sources []ArgocdApiSyncSource) string { + if len(revisions) == 0 { + return "" + } + + // Pass 1: prefer a revision whose corresponding source is a git hosting service. + for i, rev := range revisions { + if i >= len(sources) { + break + } + repoURL := sources[i].RepoURL + if isGitHostedURL(repoURL) && isCommitSHA(rev) { + return rev + } + } + + // Pass 2: accept any revision that looks like a full commit SHA regardless of + // source type (covers self-hosted Gitea / Forgejo / etc.). + for _, rev := range revisions { + if isCommitSHA(rev) { + return rev + } + } + + return "" +} + +// isGitHostedURL returns true when the URL belongs to a known git hosting service +// or is clearly not a Helm chart registry. +func isGitHostedURL(repoURL string) bool { + if repoURL == "" { + return false + } + gitHosts := []string{ + "github.com", + "gitlab.com", + "bitbucket.org", + "dev.azure.com", + "ssh.dev.azure.com", + "gitea.", + "forgejo.", + } + lower := strings.ToLower(repoURL) + for _, host := range gitHosts { + if strings.Contains(lower, host) { + return true + } + } + // Any https/ssh git URL that is not a chart registry (gs://, oci://, https://*.azurecr.io, etc.) + chartPrefixes := []string{"gs://", "oci://", "s3://"} + for _, pfx := range chartPrefixes { + if strings.HasPrefix(lower, pfx) { + return false + } + } + // .git suffix is a strong signal + return strings.HasSuffix(strings.TrimSpace(repoURL), ".git") +} + +// resolveGitRepoURL returns the best git repository URL from a sync operation's +// source metadata. For single-source apps the source.repoURL is used directly. +// For multi-source apps (sources[]) the first URL that matches a known git +// hosting service is preferred; if none match the heuristic, the first non-chart +// HTTPS/SSH URL is used as a fallback so that cicd_deployment_commits.repo_url +// is never left as the deployment-name placeholder. +// +// This is called during extractSyncOperations which always runs, providing +// reliable repo_url population even when extractApplications is skipped due +// to the collector state cache. +func resolveGitRepoURL(singleSourceURL string, sources []ArgocdApiSyncSource) string { + // Single-source app: use the URL directly. + if singleSourceURL != "" { + return singleSourceURL + } + + // Multi-source app: pass 1 — prefer a known git host. + for _, src := range sources { + if isGitHostedURL(src.RepoURL) { + return src.RepoURL + } + } + + // Pass 2 — fall back to the first non-chart URL (covers self-hosted instances + // not in the known-host list, e.g. on-prem GitLab with a custom domain). + chartPrefixes := []string{"gs://", "oci://", "s3://"} + for _, src := range sources { + if src.RepoURL == "" { + continue + } + lower := strings.ToLower(src.RepoURL) + isChart := false + for _, pfx := range chartPrefixes { + if strings.HasPrefix(lower, pfx) { + isChart = true + break + } + } + if !isChart { + return src.RepoURL + } + } + + return "" +} + +// isCommitSHA returns true for a 40-character lowercase hexadecimal string, +// which is the standard representation of a Git commit SHA-1. +func isCommitSHA(s string) bool { + if len(s) != 40 { + return false + } + for _, c := range s { + if !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) { + return false + } + } + return true +} diff --git a/backend/plugins/argocd/tasks/sync_operation_extractor_test.go b/backend/plugins/argocd/tasks/sync_operation_extractor_test.go index 9d71cec41af..59f95e5ad8b 100644 --- a/backend/plugins/argocd/tasks/sync_operation_extractor_test.go +++ b/backend/plugins/argocd/tasks/sync_operation_extractor_test.go @@ -76,3 +76,143 @@ func TestCollectContainerImages_FallbackRevisionAndSummary(t *testing.T) { // normalizeImages: dedupe + sort assert.Equal(t, []string{"a", "b"}, normalizeImages([]string{"b", "a", "b"})) } + +// ── resolveMultiSourceRevision ──────────────────────────────────────────────── + +func TestResolveMultiSourceRevision_GitHubSourceWins(t *testing.T) { + // Multi-source pattern: Helm chart (GCS) + git values repo (GitHub). + revisions := []string{"2.6.2", "5dd95b4efd7e9b668c361bbddb8d7f1e56c32ac1"} + sources := []ArgocdApiSyncSource{ + {RepoURL: "gs://charts-example-net/infra/stable", Chart: "generic-service"}, + {RepoURL: "https://github.com/example/my-repo"}, + } + got := resolveMultiSourceRevision(revisions, sources) + assert.Equal(t, "5dd95b4efd7e9b668c361bbddb8d7f1e56c32ac1", got) +} + +func TestResolveMultiSourceRevision_GitLabSourceWins(t *testing.T) { + revisions := []string{"1.0.0", "aabbccdd11223344aabbccdd11223344aabbccdd"} + sources := []ArgocdApiSyncSource{ + {RepoURL: "oci://registry.example.com/charts", Chart: "app"}, + {RepoURL: "https://gitlab.com/example/config"}, + } + got := resolveMultiSourceRevision(revisions, sources) + assert.Equal(t, "aabbccdd11223344aabbccdd11223344aabbccdd", got) +} + +func TestResolveMultiSourceRevision_FallbackToAnySHA(t *testing.T) { + // Neither source matches a known git hosting service (no github/gitlab/gitea/etc. + // prefix). The function should still return the 40-hex SHA via the fallback + // pass that accepts any commit-SHA-shaped revision regardless of source type. + revisions := []string{"1.2.3", "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef"} + sources := []ArgocdApiSyncSource{ + {RepoURL: "gs://bucket/charts"}, + {RepoURL: "https://git.acme-corp.internal/team/config"}, + } + got := resolveMultiSourceRevision(revisions, sources) + assert.Equal(t, "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef", got) +} + +func TestResolveMultiSourceRevision_EmptyRevisions(t *testing.T) { + assert.Equal(t, "", resolveMultiSourceRevision(nil, nil)) + assert.Equal(t, "", resolveMultiSourceRevision([]string{}, []ArgocdApiSyncSource{})) +} + +func TestResolveMultiSourceRevision_AllSemver(t *testing.T) { + // All revisions are semver tags; nothing looks like a commit SHA. + revisions := []string{"1.0.0", "2.3.4"} + sources := []ArgocdApiSyncSource{ + {RepoURL: "oci://registry.example.com/charts"}, + {RepoURL: "oci://registry.example.com/other"}, + } + assert.Equal(t, "", resolveMultiSourceRevision(revisions, sources)) +} + +func TestResolveMultiSourceRevision_SingleGitSHA(t *testing.T) { + // Single-source multi-source edge case. + revisions := []string{"abcdef1234567890abcdef1234567890abcdef12"} + sources := []ArgocdApiSyncSource{{RepoURL: "https://github.com/example/repo"}} + got := resolveMultiSourceRevision(revisions, sources) + assert.Equal(t, "abcdef1234567890abcdef1234567890abcdef12", got) +} + +// ── isCommitSHA ─────────────────────────────────────────────────────────────── + +func TestIsCommitSHA(t *testing.T) { + assert.True(t, isCommitSHA("5dd95b4efd7e9b668c361bbddb8d7f1e56c32ac1")) + assert.True(t, isCommitSHA("AABBCCDD11223344AABBCCDD11223344AABBCCDD")) + assert.False(t, isCommitSHA("2.6.2")) + assert.False(t, isCommitSHA("")) + assert.False(t, isCommitSHA("5dd95b4efd7e9b668c361bbddb8d7f1e56c32ac")) // 39 chars + assert.False(t, isCommitSHA("5dd95b4efd7e9b668c361bbddb8d7f1e56c32ac12")) // 41 chars +} + +// ── resolveGitRepoURL ───────────────────────────────────────────────────────── + +func TestResolveGitRepoURL_SingleSource(t *testing.T) { + // Single-source app: singleSourceURL is used directly, sources ignored. + got := resolveGitRepoURL("https://github.com/example/my-app", nil) + assert.Equal(t, "https://github.com/example/my-app", got) +} + +func TestResolveGitRepoURL_MultiSourceGitHubWins(t *testing.T) { + // Multi-source pattern: GCS chart + GitHub values ref. + sources := []ArgocdApiSyncSource{ + {RepoURL: "gs://charts-example-net/infra/stable", Chart: "generic-service"}, + {RepoURL: "https://github.com/example/my-app"}, + } + got := resolveGitRepoURL("", sources) + assert.Equal(t, "https://github.com/example/my-app", got) +} + +func TestResolveGitRepoURL_MultiSourceOCIChart(t *testing.T) { + // OCI chart + GitLab values repo. + sources := []ArgocdApiSyncSource{ + {RepoURL: "oci://registry.example.com/charts", Chart: "app"}, + {RepoURL: "https://gitlab.com/org/config"}, + } + got := resolveGitRepoURL("", sources) + assert.Equal(t, "https://gitlab.com/org/config", got) +} + +func TestResolveGitRepoURL_FallbackNonChartURL(t *testing.T) { + // No known git host but a non-chart HTTPS URL is still better than nothing. + sources := []ArgocdApiSyncSource{ + {RepoURL: "gs://bucket/charts"}, + {RepoURL: "https://git.acme-corp.internal/team/config"}, + } + got := resolveGitRepoURL("", sources) + assert.Equal(t, "https://git.acme-corp.internal/team/config", got) +} + +func TestResolveGitRepoURL_AllChartSources(t *testing.T) { + // All sources are chart registries — returns empty string. + sources := []ArgocdApiSyncSource{ + {RepoURL: "gs://charts-example-net/infra/stable"}, + {RepoURL: "oci://registry.example.com/charts"}, + } + got := resolveGitRepoURL("", sources) + assert.Equal(t, "", got) +} + +func TestResolveGitRepoURL_EmptySources(t *testing.T) { + assert.Equal(t, "", resolveGitRepoURL("", nil)) + assert.Equal(t, "", resolveGitRepoURL("", []ArgocdApiSyncSource{})) +} + +// ── isGitHostedURL ──────────────────────────────────────────────────────────── + +func TestIsGitHostedURL(t *testing.T) { + assert.True(t, isGitHostedURL("https://github.com/org/repo")) + assert.True(t, isGitHostedURL("git@github.com:org/repo.git")) + assert.True(t, isGitHostedURL("https://gitlab.com/org/repo")) + assert.True(t, isGitHostedURL("https://bitbucket.org/org/repo")) + assert.True(t, isGitHostedURL("https://dev.azure.com/org/proj/_git/repo")) + assert.True(t, isGitHostedURL("https://gitea.internal.corp/team/config")) + assert.True(t, isGitHostedURL("https://example.com/repo.git")) + + assert.False(t, isGitHostedURL("gs://charts-example-net/infra/stable")) + assert.False(t, isGitHostedURL("oci://registry.example.com/charts")) + assert.False(t, isGitHostedURL("s3://my-bucket/charts")) + assert.False(t, isGitHostedURL("")) +}