diff --git a/backend/helpers/pluginhelper/api/graphql_collector.go b/backend/helpers/pluginhelper/api/graphql_collector.go index fdd77909784..28a5b2b9d86 100644 --- a/backend/helpers/pluginhelper/api/graphql_collector.go +++ b/backend/helpers/pluginhelper/api/graphql_collector.go @@ -276,14 +276,18 @@ func (collector *GraphqlCollector) fetchAsync(reqData *GraphqlRequestData, handl } if len(dataErrors) > 0 { if !collector.args.IgnoreQueryErrors { + hasNonIgnorableDataErrors := false for _, dataError := range dataErrors { - if strings.Contains(dataError.Error(), "Could not resolve to an Issue") { - logger.Warn(nil, "Issue may have been transferred.") - } else { - collector.checkError(errors.Default.Wrap(dataError, `graphql query got error`)) + if isIgnorableGraphqlQueryError(dataError) { + logger.Warn(nil, "Issue may have been transferred or deleted.") + continue } + hasNonIgnorableDataErrors = true + collector.checkError(errors.Default.Wrap(dataError, `graphql query got error`)) + } + if hasNonIgnorableDataErrors { + return } - return } // else: error will deal by ResponseParserWithDataErrors } @@ -344,4 +348,8 @@ func (collector *GraphqlCollector) HasError() bool { return len(collector.workerErrors) > 0 } +func isIgnorableGraphqlQueryError(err error) bool { + return err != nil && strings.Contains(err.Error(), "Could not resolve to an Issue") +} + var _ plugin.SubTask = (*GraphqlCollector)(nil) diff --git a/backend/helpers/pluginhelper/api/graphql_collector_test.go b/backend/helpers/pluginhelper/api/graphql_collector_test.go new file mode 100644 index 00000000000..fcab4dce098 --- /dev/null +++ b/backend/helpers/pluginhelper/api/graphql_collector_test.go @@ -0,0 +1,31 @@ +/* +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package api + +import ( + "errors" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestIsIgnorableGraphqlQueryError(t *testing.T) { + assert.True(t, isIgnorableGraphqlQueryError(errors.New("Could not resolve to an Issue with the number of 17."))) + assert.False(t, isIgnorableGraphqlQueryError(errors.New("some other graphql error"))) + assert.False(t, isIgnorableGraphqlQueryError(nil)) +} diff --git a/backend/plugins/github_graphql/tasks/issue_collector.go b/backend/plugins/github_graphql/tasks/issue_collector.go index f4db1d5b6c5..c9a59f23c33 100644 --- a/backend/plugins/github_graphql/tasks/issue_collector.go +++ b/backend/plugins/github_graphql/tasks/issue_collector.go @@ -20,11 +20,14 @@ package tasks import ( "encoding/json" "reflect" + "sort" "strings" "time" "github.com/apache/incubator-devlake/core/dal" "github.com/apache/incubator-devlake/core/errors" + "github.com/apache/incubator-devlake/core/log" + "github.com/apache/incubator-devlake/core/models/common" "github.com/apache/incubator-devlake/core/plugin" "github.com/apache/incubator-devlake/core/utils" "github.com/apache/incubator-devlake/helpers/pluginhelper/api" @@ -49,7 +52,8 @@ type GraphqlQueryIssueWrapper struct { } type GraphqlQueryIssueDetailWrapper struct { - RateLimit struct { + requestedIssues map[int]missingGithubIssueRef + RateLimit struct { Cost int } Repository struct { @@ -84,6 +88,13 @@ type GraphqlQueryIssue struct { } `graphql:"labels(first: 100)"` } +type missingGithubIssueRef struct { + ConnectionId uint64 + GithubId int + Number int + RawDataOrigin common.RawDataOrigin +} + var CollectIssuesMeta = plugin.SubTaskMeta{ Name: "Collect Issues", EntryPoint: CollectIssues, @@ -175,12 +186,19 @@ func CollectIssues(taskCtx plugin.SubTaskContext) errors.Error { ownerName := strings.Split(data.Options.Name, "/") inputIssues := reqData.Input.([]interface{}) outputIssues := []map[string]interface{}{} + query.requestedIssues = make(map[int]missingGithubIssueRef, len(inputIssues)) for _, i := range inputIssues { inputIssue := i.(*models.GithubIssue) outputIssues = append(outputIssues, map[string]interface{}{ `number`: graphql.Int(inputIssue.Number), }) issueUpdatedAt[inputIssue.Number] = inputIssue.GithubUpdatedAt + query.requestedIssues[inputIssue.Number] = missingGithubIssueRef{ + ConnectionId: inputIssue.ConnectionId, + GithubId: inputIssue.GithubId, + Number: inputIssue.Number, + RawDataOrigin: inputIssue.RawDataOrigin, + } } variables := map[string]interface{}{ "issue": outputIssues, @@ -193,10 +211,17 @@ func CollectIssues(taskCtx plugin.SubTaskContext) errors.Error { query := queryWrapper.(*GraphqlQueryIssueDetailWrapper) issues := query.Repository.Issues for _, rawL := range issues { + if rawL.DatabaseId == 0 || rawL.Number == 0 { + continue + } if rawL.UpdatedAt.After(issueUpdatedAt[rawL.Number]) { messages = append(messages, errors.Must1(json.Marshal(rawL))) } } + missingIssues := findMissingGithubIssues(query.requestedIssues, issues) + if len(missingIssues) > 0 { + err = cleanupMissingGithubIssues(db, taskCtx.GetLogger(), missingIssues) + } return }, }) @@ -206,3 +231,95 @@ func CollectIssues(taskCtx plugin.SubTaskContext) errors.Error { return apiCollector.Execute() } + +func findMissingGithubIssues(requestedIssues map[int]missingGithubIssueRef, resolvedIssues []GraphqlQueryIssue) []missingGithubIssueRef { + if len(requestedIssues) == 0 { + return nil + } + + resolvedNumbers := make(map[int]struct{}, len(resolvedIssues)) + for _, issue := range resolvedIssues { + if issue.DatabaseId == 0 || issue.Number == 0 { + continue + } + resolvedNumbers[issue.Number] = struct{}{} + } + + missingIssues := make([]missingGithubIssueRef, 0) + for number, issue := range requestedIssues { + if _, ok := resolvedNumbers[number]; ok { + continue + } + missingIssues = append(missingIssues, issue) + } + sort.Slice(missingIssues, func(i, j int) bool { + return missingIssues[i].Number < missingIssues[j].Number + }) + return missingIssues +} + +func cleanupMissingGithubIssues(db dal.Dal, logger log.Logger, issues []missingGithubIssueRef) errors.Error { + var allErrors []error + for _, issue := range issues { + logger.Warn(nil, "GitHub issue #%d no longer resolves from the source API, deleting stale local data", issue.Number) + err := cleanupMissingGithubIssue(db, issue) + if err != nil { + allErrors = append(allErrors, err) + } + } + return errors.Default.Combine(allErrors) +} + +func cleanupMissingGithubIssue(db dal.Dal, issue missingGithubIssueRef) errors.Error { + deleteByIssueId := func(model any, table string) errors.Error { + err := db.Delete(model, dal.From(table), dal.Where("connection_id = ? AND issue_id = ?", issue.ConnectionId, issue.GithubId)) + if err != nil { + return errors.Default.Wrap(err, "failed to delete stale github issue data from "+table) + } + return nil + } + + err := deleteByIssueId(&models.GithubIssueComment{}, models.GithubIssueComment{}.TableName()) + if err != nil { + return err + } + err = deleteByIssueId(&models.GithubIssueEvent{}, models.GithubIssueEvent{}.TableName()) + if err != nil { + return err + } + err = deleteByIssueId(&models.GithubIssueLabel{}, models.GithubIssueLabel{}.TableName()) + if err != nil { + return err + } + err = deleteByIssueId(&models.GithubIssueAssignee{}, models.GithubIssueAssignee{}.TableName()) + if err != nil { + return err + } + err = db.Delete( + &models.GithubPrIssue{}, + dal.From(models.GithubPrIssue{}.TableName()), + dal.Where("connection_id = ? AND issue_id = ?", issue.ConnectionId, issue.GithubId), + ) + if err != nil { + return errors.Default.Wrap(err, "failed to delete stale github pull request issue links") + } + err = db.Delete( + &models.GithubIssue{}, + dal.From(models.GithubIssue{}.TableName()), + dal.Where("connection_id = ? AND github_id = ?", issue.ConnectionId, issue.GithubId), + ) + if err != nil { + return errors.Default.Wrap(err, "failed to delete stale github issue") + } + if issue.RawDataOrigin.RawDataTable != "" && issue.RawDataOrigin.RawDataId != 0 { + err = db.Delete( + &api.RawData{}, + dal.From(issue.RawDataOrigin.RawDataTable), + dal.Where("id = ?", issue.RawDataOrigin.RawDataId), + ) + if err != nil { + return errors.Default.Wrap(err, "failed to delete stale raw github issue") + } + } + return nil +} diff --git a/backend/plugins/github_graphql/tasks/issue_collector_test.go b/backend/plugins/github_graphql/tasks/issue_collector_test.go new file mode 100644 index 00000000000..a2ddd75bdd8 --- /dev/null +++ b/backend/plugins/github_graphql/tasks/issue_collector_test.go @@ -0,0 +1,74 @@ +/* +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package tasks + +import ( + "testing" + + "github.com/apache/incubator-devlake/core/models/common" + "github.com/stretchr/testify/assert" +) + +func TestFindMissingGithubIssues(t *testing.T) { + requestedIssues := map[int]missingGithubIssueRef{ + 17: { + ConnectionId: 1, + GithubId: 1700, + Number: 17, + RawDataOrigin: common.RawDataOrigin{ + RawDataTable: "_raw_github_graphql_issues", + RawDataId: 10, + }, + }, + 18: { + ConnectionId: 1, + GithubId: 1800, + Number: 18, + }, + } + + resolvedIssues := []GraphqlQueryIssue{ + {DatabaseId: 1800, Number: 18}, + } + + missingIssues := findMissingGithubIssues(requestedIssues, resolvedIssues) + + if assert.Len(t, missingIssues, 1) { + assert.Equal(t, 17, missingIssues[0].Number) + assert.Equal(t, 1700, missingIssues[0].GithubId) + assert.Equal(t, uint64(10), missingIssues[0].RawDataOrigin.RawDataId) + } +} + +func TestFindMissingGithubIssuesSkipsZeroValueResponses(t *testing.T) { + requestedIssues := map[int]missingGithubIssueRef{ + 17: {Number: 17}, + 18: {Number: 18}, + } + + resolvedIssues := []GraphqlQueryIssue{ + {}, + {DatabaseId: 1800, Number: 18}, + } + + missingIssues := findMissingGithubIssues(requestedIssues, resolvedIssues) + + if assert.Len(t, missingIssues, 1) { + assert.Equal(t, 17, missingIssues[0].Number) + } +}