diff --git a/Common/OJS.Common/Extensions/EnumerableExtensions.cs b/Common/OJS.Common/Extensions/EnumerableExtensions.cs index 6796f2cf9b..a3b0f29b2b 100644 --- a/Common/OJS.Common/Extensions/EnumerableExtensions.cs +++ b/Common/OJS.Common/Extensions/EnumerableExtensions.cs @@ -5,7 +5,14 @@ namespace OJS.Common.Extensions; public static class EnumerableExtensions { - public static T? MaxOrDefault(this IEnumerable enumerable) - => enumerable.DefaultIfEmpty() - .Max(); + public static IEnumerable> InBatches(this IEnumerable queryable, int size) + { + var current = queryable.AsQueryable(); + while (current.Any()) + { + var batch = current.Take(size); + yield return batch; + current = current.Skip(size); + } + } } \ No newline at end of file diff --git a/Data/OJS.Data.Models/Submissions/ArchivedSubmission.cs b/Data/OJS.Data.Models/Submissions/ArchivedSubmission.cs new file mode 100644 index 0000000000..938b484671 --- /dev/null +++ b/Data/OJS.Data.Models/Submissions/ArchivedSubmission.cs @@ -0,0 +1,77 @@ +namespace OJS.Data.Models.Submissions +{ + using System; + using System.ComponentModel.DataAnnotations; + using System.ComponentModel.DataAnnotations.Schema; + using AutoMapper; + using OJS.Data.Validation; + using OJS.Services.Infrastructure.Models.Mapping; + using OJS.Workers.Common.Models; + + [Table("Submissions")] + public class ArchivedSubmission : IMapExplicitly, IEquatable + { + [Key] + [DatabaseGenerated(DatabaseGeneratedOption.None)] + public int Id { get; set; } + + public int ParticipantId { get; set; } + + public int ProblemId { get; set; } + + public int? SubmissionTypeId { get; set; } + + public byte[] Content { get; set; } = []; + + public string? FileExtension { get; set; } + + public byte[]? SolutionSkeleton { get; set; } + + public DateTime? StartedExecutionOn { get; set; } + + public DateTime? CompletedExecutionOn { get; set; } + + [StringLength(ConstraintConstants.IpAddressMaxLength)] + [Column(TypeName = "varchar")] + public string? IpAddress { get; set; } + + [StringLength(ConstraintConstants.Submission.WorkerNameMaxLength)] + public string? WorkerName { get; set; } + + public ExceptionType? ExceptionType { get; set; } + + public bool Processed { get; set; } + + public int Points { get; set; } + + public string? ProcessingComment { get; set; } + + public string? TestRunsCache { get; set; } + + public DateTime CreatedOn { get; set; } + + public DateTime? ModifiedOn { get; set; } + + public bool IsHardDeletedFromMainDatabase { get; set; } + + [NotMapped] + public bool IsBinaryFile => !string.IsNullOrWhiteSpace(this.FileExtension); + + [NotMapped] + public string ContentAsString + => this.IsBinaryFile ? string.Empty : this.Content.ToString(); + + public override bool Equals(object? obj) + => obj is ArchivedSubmission other && this.Equals(other); + + public bool Equals(ArchivedSubmission? other) + => other != null && this.Id == other.Id; + + public override int GetHashCode() + => this.Id.GetHashCode(); + + public void RegisterMappings(IProfileExpression configuration) + => configuration.CreateMap() + .ForMember(d => d.IsHardDeletedFromMainDatabase, opt => opt.MapFrom(s => false)); + } +} diff --git a/Data/OJS.Data/ArchivesDbContext.cs b/Data/OJS.Data/ArchivesDbContext.cs new file mode 100644 index 0000000000..0f2eeca0d0 --- /dev/null +++ b/Data/OJS.Data/ArchivesDbContext.cs @@ -0,0 +1,9 @@ +namespace OJS.Data; + +using Microsoft.EntityFrameworkCore; +using OJS.Data.Models.Submissions; + +public class ArchivesDbContext(DbContextOptions options) : DbContext(options) +{ + public DbSet Submissions { get; set; } = null!; +} \ No newline at end of file diff --git a/Servers/Administration/OJS.Servers.Administration/Extensions/ServiceCollectionExtensions.cs b/Servers/Administration/OJS.Servers.Administration/Extensions/ServiceCollectionExtensions.cs index 7b8b5a43e8..58659ec825 100644 --- a/Servers/Administration/OJS.Servers.Administration/Extensions/ServiceCollectionExtensions.cs +++ b/Servers/Administration/OJS.Servers.Administration/Extensions/ServiceCollectionExtensions.cs @@ -40,12 +40,14 @@ public static void ConfigureServices( .AddHttpClients(configuration) .AddTransient(typeof(IDataService<>), typeof(AdministrationDataService<>)) .AddTransient>() + .AddTransient() .AddTransient() .AddHangfireServer(configuration, AppName, [AdministrationQueueName]) .AddMessageQueue(configuration) .ConfigureGlobalDateFormat() .ConfigureCorsPolicy(configuration) .AddIdentityDatabase(configuration) + .AddArchivesDatabase(configuration) .AddResiliencePipelines() .AddMemoryCache() .AddDistributedCaching(configuration) diff --git a/Servers/Administration/OJS.Servers.Administration/Program.cs b/Servers/Administration/OJS.Servers.Administration/Program.cs index b8d31d0eb1..7047eb0870 100644 --- a/Servers/Administration/OJS.Servers.Administration/Program.cs +++ b/Servers/Administration/OJS.Servers.Administration/Program.cs @@ -1,6 +1,7 @@ namespace OJS.Servers.Administration; using Microsoft.AspNetCore.Builder; +using OJS.Data; using OJS.Servers.Administration.Extensions; using OJS.Servers.Infrastructure.Extensions; @@ -16,6 +17,8 @@ public static void Main(string[] args) var app = builder.Build(); + app.MigrateDatabase(builder.Configuration); + app.ConfigureWebApplication(builder.Configuration); app.Run(); } diff --git a/Servers/Infrastructure/OJS.Servers.Infrastructure/Extensions/ServiceCollectionExtensions.cs b/Servers/Infrastructure/OJS.Servers.Infrastructure/Extensions/ServiceCollectionExtensions.cs index 3eef815831..f7e2bce160 100644 --- a/Servers/Infrastructure/OJS.Servers.Infrastructure/Extensions/ServiceCollectionExtensions.cs +++ b/Servers/Infrastructure/OJS.Servers.Infrastructure/Extensions/ServiceCollectionExtensions.cs @@ -54,6 +54,8 @@ namespace OJS.Servers.Infrastructure.Extensions using System.Security.Claims; using System.Text.Json; using System.Threading.Tasks; + using Microsoft.Data.SqlClient; + using OJS.Data; using OpenAI; using RabbitMQ.Client; using static OJS.Common.GlobalConstants; @@ -199,6 +201,35 @@ public static IServiceCollection AddHangfireServer( return services; } + /// + /// Adds the archives database context to the service collection. + /// + /// The service collection. + /// The configuration. + public static IServiceCollection AddArchivesDatabase( + this IServiceCollection services, + IConfiguration configuration) + { + var defaultConnectionString = configuration.GetConnectionString(DefaultDbConnectionName); + + // Modify the connection string to use a different database name for archives + var builder = new SqlConnectionStringBuilder(defaultConnectionString); + builder.InitialCatalog = $"{builder.InitialCatalog}Archives"; + var connectionString = builder.ConnectionString; + + services + .AddDbContext(options => + { + options.UseSqlServer(connectionString); + }); + + services + .AddHealthChecks() + .AddSqlServer(connectionString, name: "archives-db"); + + return services; + } + public static IServiceCollection AddSwaggerDocs( this IServiceCollection services, string name, diff --git a/Servers/Infrastructure/OJS.Servers.Infrastructure/OJS.Servers.Infrastructure.csproj b/Servers/Infrastructure/OJS.Servers.Infrastructure/OJS.Servers.Infrastructure.csproj index 4f5affd8be..8d4f857d7e 100644 --- a/Servers/Infrastructure/OJS.Servers.Infrastructure/OJS.Servers.Infrastructure.csproj +++ b/Servers/Infrastructure/OJS.Servers.Infrastructure/OJS.Servers.Infrastructure.csproj @@ -4,6 +4,7 @@ + diff --git a/Services/Administration/OJS.Services.Administration.Business/Implementations/ArchivedSubmissionsBusinessService.cs b/Services/Administration/OJS.Services.Administration.Business/Implementations/ArchivedSubmissionsBusinessService.cs new file mode 100644 index 0000000000..98ab023526 --- /dev/null +++ b/Services/Administration/OJS.Services.Administration.Business/Implementations/ArchivedSubmissionsBusinessService.cs @@ -0,0 +1,116 @@ +namespace OJS.Services.Administration.Business.Implementations; + +using System.Linq; +using System.Threading.Tasks; +using DocumentFormat.OpenXml.Vml; +using OJS.Common; +using OJS.Data.Models.Submissions; +using OJS.Services.Administration.Data; +using OJS.Services.Common; +using OJS.Services.Common.Data; +using OJS.Services.Infrastructure; +using OJS.Services.Infrastructure.Extensions; + +public class ArchivedSubmissionsBusinessService : IArchivedSubmissionsBusinessService +{ + private readonly ISubmissionsDataService submissionsData; + private readonly IArchivesDataService archivesData; + private readonly IDatesService dates; + + public ArchivedSubmissionsBusinessService( + ISubmissionsDataService submissionsData, + IArchivesDataService archivesData, + IDatesService dates) + { + this.submissionsData = submissionsData; + this.archivesData = archivesData; + this.dates = dates; + } + + public async Task ArchiveOldSubmissionsDailyBatch(int limit, int maxSubBatchSize) + { + var leftoverSubmissionsFromBatchSplitting = limit % maxSubBatchSize; + var numberOfIterations = limit / maxSubBatchSize; + if(leftoverSubmissionsFromBatchSplitting > 0) + { + numberOfIterations++; + } + + var archived = 0; + + for (var i = 0; i < numberOfIterations; i++) + { + var curBatchSize = maxSubBatchSize; + var isLastIteration = i == (numberOfIterations - 1); + if(leftoverSubmissionsFromBatchSplitting > 0 && isLastIteration) + { + curBatchSize = leftoverSubmissionsFromBatchSplitting; + } + + var allSubmissionsForArchive = this + .GetSubmissionsForArchiving() + .OrderBy(x => x.Id) + .InBatches(GlobalConstants.BatchOperationsChunkSize, curBatchSize); + + foreach (var submissionsForArchiveBatch in allSubmissionsForArchive) + { + var submissionsForArchives = submissionsForArchiveBatch + .MapCollection() + .ToList(); + + if(submissionsForArchives.Count == 0) + { + break; + } + + archived += await this.archivesData.AddMany(submissionsForArchives); + await this.archivesData.SaveChanges(); + } + + await this.submissionsData.HardDeleteArchived(curBatchSize); + } + + return archived; + } + + public async Task ArchiveOldSubmissionsWithLimit(int limit) + { + var archived = 0; + var allSubmissionsForArchive = this + .GetSubmissionsForArchiving() + .OrderBy(x => x.Id) + .InBatches(GlobalConstants.BatchOperationsChunkSize, limit); + + foreach (var submissionsForArchiveBatch in allSubmissionsForArchive) + { + var submissionsForArchives = submissionsForArchiveBatch + .MapCollection() + .ToList(); + + if(submissionsForArchives.Count == 0) + { + break; + } + + archived += await this.archivesData.AddMany(submissionsForArchives); + await this.archivesData.SaveChanges(); + } + + return archived; + } + + public async Task HardDeleteArchivedByLimit(int limit) + => await this.submissionsData.HardDeleteArchived(limit); + + private IQueryable GetSubmissionsForArchiving() + { + var now = this.dates.GetUtcNow(); + var bestSubmissionCutoffDate = now.AddYears(-GlobalConstants.BestSubmissionEligibleForArchiveAgeInYears); + var nonBestSubmissionCutoffDate = now.AddYears(-GlobalConstants.NonBestSubmissionEligibleForArchiveAgeInYears); + + return this.submissionsData + .GetAllCreatedBeforeDateAndNonBestCreatedBeforeDate( + bestSubmissionCutoffDate, + nonBestSubmissionCutoffDate); + } +} \ No newline at end of file diff --git a/Services/Administration/OJS.Services.Administration.Business/Implementations/RecurringBackgroundJobsBusinessService.cs b/Services/Administration/OJS.Services.Administration.Business/Implementations/RecurringBackgroundJobsBusinessService.cs index 015f5fcee6..24bacecc38 100644 --- a/Services/Administration/OJS.Services.Administration.Business/Implementations/RecurringBackgroundJobsBusinessService.cs +++ b/Services/Administration/OJS.Services.Administration.Business/Implementations/RecurringBackgroundJobsBusinessService.cs @@ -15,6 +15,7 @@ public class RecurringBackgroundJobsBusinessService : IRecurringBackgroundJobsBu private readonly ISubmissionsForProcessingBusinessService submissionsForProcessing; private readonly IParticipantsBusinessService participantsBusinessService; private readonly IParticipantScoresBusinessService participantScoresBusiness; + private readonly IArchivedSubmissionsBusinessService archivedSubmissionsBusiness; private readonly IBusControl bus; private readonly ILogger logger; @@ -22,12 +23,14 @@ public RecurringBackgroundJobsBusinessService( ISubmissionsForProcessingBusinessService submissionsForProcessing, IParticipantsBusinessService participantsBusinessService, IParticipantScoresBusinessService participantScoresBusiness, + IArchivedSubmissionsBusinessService archivedSubmissionsBusiness, IBusControl bus, ILogger logger) { this.submissionsForProcessing = submissionsForProcessing; this.participantsBusinessService = participantsBusinessService; this.participantScoresBusiness = participantScoresBusiness; + this.archivedSubmissionsBusiness = archivedSubmissionsBusiness; this.bus = bus; this.logger = logger; } @@ -77,5 +80,27 @@ public async Task NormalizeAllPointsThatExceedAllowedLimit() return "Successfully normalized all points that exceed allowed limit"; } + + public async Task ArchiveOldSubmissionsDailyBatch() + { + const int archiveDailyBatchLimit = 500_000; + const int archiveMaxSubBatchSize = 10_000; + + var archivedCount = await this.archivedSubmissionsBusiness.ArchiveOldSubmissionsDailyBatch( + archiveDailyBatchLimit, + archiveMaxSubBatchSize); + + return $"Successfully archived {archivedCount} submissions."; + } + + public async Task HardDeleteArchivedSubmissions() + { + const int archiveSingleBatchLimit = 25_000; + + var hardDeletedCount = await this.archivedSubmissionsBusiness.HardDeleteArchivedByLimit( + archiveSingleBatchLimit); + + return $"Successfully hard deleted {hardDeletedCount} archived submissions."; + } } } \ No newline at end of file diff --git a/Services/Administration/OJS.Services.Administration.Data/IParticipantScoresDataService.cs b/Services/Administration/OJS.Services.Administration.Data/IParticipantScoresDataService.cs index dd2ce1fa24..224613e3d7 100644 --- a/Services/Administration/OJS.Services.Administration.Data/IParticipantScoresDataService.cs +++ b/Services/Administration/OJS.Services.Administration.Data/IParticipantScoresDataService.cs @@ -29,5 +29,7 @@ Task UpdateBySubmissionAndPoints( int submissionPoints, Participant participant, bool shouldSaveChanges = true); + + Task RemoveSubmissionIdsBySubmissionIds(IEnumerable submissionIds); } } \ No newline at end of file diff --git a/Services/Administration/OJS.Services.Administration.Data/ISubmissionsDataService.cs b/Services/Administration/OJS.Services.Administration.Data/ISubmissionsDataService.cs index 81a90c46c6..2b812fd552 100644 --- a/Services/Administration/OJS.Services.Administration.Data/ISubmissionsDataService.cs +++ b/Services/Administration/OJS.Services.Administration.Data/ISubmissionsDataService.cs @@ -40,5 +40,7 @@ IQueryable GetAllCreatedBeforeDateAndNonBestCreatedBeforeDate( Task RemoveTestRunsCacheByProblem(int problemId); Task> GetIdsByProblemId(int problemId); + + Task HardDeleteArchived(int deleteCountLimit = 0); } } \ No newline at end of file diff --git a/Services/Administration/OJS.Services.Administration.Data/Implementations/ParticipantScoresDataService.cs b/Services/Administration/OJS.Services.Administration.Data/Implementations/ParticipantScoresDataService.cs index a0b9c0bb27..48cea2a290 100644 --- a/Services/Administration/OJS.Services.Administration.Data/Implementations/ParticipantScoresDataService.cs +++ b/Services/Administration/OJS.Services.Administration.Data/Implementations/ParticipantScoresDataService.cs @@ -8,6 +8,7 @@ namespace OJS.Services.Administration.Data.Implementations using System.Collections.Generic; using System.Linq; using System.Threading.Tasks; + using OJS.Common; public class ParticipantScoresDataService : AdministrationDataService, IParticipantScoresDataService { @@ -140,6 +141,16 @@ public async Task UpdateBySubmissionAndPoints( } } + public async Task RemoveSubmissionIdsBySubmissionIds(IEnumerable submissionIds) => + await this + .Update( + ps => submissionIds.Cast().Contains(ps.SubmissionId), + ps => new ParticipantScore + { + SubmissionId = null + }, + batchSize: GlobalConstants.BatchOperationsChunkSize); + private static void UpdateTotalScoreSnapshot( Participant participant, int previousPoints, diff --git a/Services/Administration/OJS.Services.Administration.Data/Implementations/SubmissionsDataService.cs b/Services/Administration/OJS.Services.Administration.Data/Implementations/SubmissionsDataService.cs index f0a7e49a47..34150d8f46 100644 --- a/Services/Administration/OJS.Services.Administration.Data/Implementations/SubmissionsDataService.cs +++ b/Services/Administration/OJS.Services.Administration.Data/Implementations/SubmissionsDataService.cs @@ -8,17 +8,38 @@ using OJS.Services.Infrastructure.Extensions; using System; using System.Collections.Generic; + using System.Data; using System.Linq; using System.Linq.Expressions; using System.Threading.Tasks; + using OJS.Common; + using OJS.Common.Extensions; + using OJS.Services.Common.Data; using static OJS.Common.GlobalConstants.Roles; public class SubmissionsDataService : AdministrationDataService, ISubmissionsDataService { private readonly IDatesService datesService; - public SubmissionsDataService(OjsDbContext submissions, IDatesService datesService) + private readonly IArchivesDataService archivesDataService; + private readonly ITestRunsDataService testRunsDataService; + private readonly IParticipantScoresDataService participantScoresData; + private readonly ITransactionsProvider transactions; + + public SubmissionsDataService( + OjsDbContext submissions, + IDatesService datesService, + IArchivesDataService archivesDataService, + ITestRunsDataService testRunsDataService, + IParticipantScoresDataService participantScoresData, + ITransactionsProvider transactions) : base(submissions) - => this.datesService = datesService; + { + this.datesService = datesService; + this.archivesDataService = archivesDataService; + this.testRunsDataService = testRunsDataService; + this.participantScoresData = participantScoresData; + this.transactions = transactions; + } public Submission? GetBestForParticipantByProblem(int participantId, int problemId) => this.GetAllByProblemAndParticipant(problemId, participantId) @@ -99,6 +120,68 @@ public async Task> GetIdsByProblemId(int problemId) .Select(s => s.Id) .ToListAsync(); + /// + /// Deletes archived submissions from OnlineJudgeSystem Db and marks them as Hard Deleted in OnlineJudgeSystemArchives. + /// + /// Specifies a limit to the number of submissions deleted, if omitted or 0 is passed, delete all available, without limits. + /// + public async Task HardDeleteArchived(int deleteCountLimit = 0) + { + var hardDeletedCount = 0; + + var submissionBatches = deleteCountLimit > 0 ? + this.archivesDataService + .GetAllNotHardDeletedFromMainDatabase() + .Distinct() + .OrderBy(x => x.Id) + .InSelfModifyingBatches(GlobalConstants.BatchOperationsChunkSize, deleteCountLimit) : + this.archivesDataService + .GetAllNotHardDeletedFromMainDatabase() + .Distinct() + .OrderBy(x => x.Id) + .InSelfModifyingBatches(GlobalConstants.BatchOperationsChunkSize); + + foreach (var submissionIdsBatch in submissionBatches) + { + var archivedIds = submissionIdsBatch + .Select(s => s.Id) + .ToHashSet(); + + if(archivedIds.Count == 0) + { + break; + } + + var idsSet = await this.GetQuery() + .Where(s => archivedIds.Contains(s.Id)) + .Select(x => x.Id) + .ToListAsync(); + + if(idsSet.Count > 0) + { + await this.transactions.ExecuteInTransaction(async () => + { + await this.participantScoresData.RemoveSubmissionIdsBySubmissionIds(idsSet); + + await this.testRunsDataService.Delete( + tr => idsSet.Contains(tr.SubmissionId), + batchSize: GlobalConstants.BatchOperationsChunkSize); + + await this.GetQuery(s => idsSet.Contains(s.Id)).DeleteFromQueryAsync(); + }, IsolationLevel.ReadCommitted); + } + + foreach (var archivedIdsBatch in archivedIds.InBatches(GlobalConstants.BatchOperationsChunkSize / 10)) + { + await this.archivesDataService.MarkAsHardDeletedFromMainDatabase(archivedIdsBatch); + } + + hardDeletedCount += idsSet.Count; + } + + return hardDeletedCount; + } + protected override Expression> GetUserFilter(UserInfoModel user) => submission => user.IsAdmin || submission.Problem.ProblemGroup.Contest.Category!.LecturersInContestCategories.Any(cc => cc.LecturerId == user.Id) || diff --git a/Services/Common/OJS.Services.Common.Data/IArchivesDataService.cs b/Services/Common/OJS.Services.Common.Data/IArchivesDataService.cs new file mode 100644 index 0000000000..47df9acd9f --- /dev/null +++ b/Services/Common/OJS.Services.Common.Data/IArchivesDataService.cs @@ -0,0 +1,29 @@ +namespace OJS.Services.Common.Data +{ + using System.Collections.Generic; + using System.Linq; + using System.Threading.Tasks; + using OJS.Data.Models.Submissions; + + public interface IArchivesDataService + { + /// + /// Gets archived submissions that are not hard-deleted from the main database. + /// + /// Queryable of archived submissions. + IQueryable GetAllNotHardDeletedFromMainDatabase(); + + /// + /// Marks archived submissions as hard-deleted from the main database. + /// + /// The IDs of submissions to mark. + Task MarkAsHardDeletedFromMainDatabase(IEnumerable submissionIds); + + Task AddMany(IEnumerable entities); + + /// + /// Saves all changes made in this context to the database. + /// + Task SaveChanges(); + } +} \ No newline at end of file diff --git a/Services/Common/OJS.Services.Common.Data/IDataService.cs b/Services/Common/OJS.Services.Common.Data/IDataService.cs index bef18ac3cc..ea184bb5e3 100644 --- a/Services/Common/OJS.Services.Common.Data/IDataService.cs +++ b/Services/Common/OJS.Services.Common.Data/IDataService.cs @@ -22,12 +22,19 @@ public interface IDataService : IService Task Update(Expression> filter, Expression,SetPropertyCalls>> setPropertyCalls); + Task Update( + Expression> filterExpression, + Expression> updateExpression, + int batchSize); + void UpdateMany(IEnumerable entities); void Delete(TEntity entity); void Delete(Expression>? filter = null); + Task Delete(Expression> filterExpression, int batchSize); + void DeleteMany(IEnumerable entities); Task ExecuteDelete(Expression> filter); diff --git a/Services/Common/OJS.Services.Common.Data/Implementations/ArchivesDataService.cs b/Services/Common/OJS.Services.Common.Data/Implementations/ArchivesDataService.cs new file mode 100644 index 0000000000..bb6aefa6e6 --- /dev/null +++ b/Services/Common/OJS.Services.Common.Data/Implementations/ArchivesDataService.cs @@ -0,0 +1,97 @@ +namespace OJS.Services.Common.Data.Implementations +{ + using System; + using System.Collections.Generic; + using System.Linq; + using System.Linq.Expressions; + using System.Threading.Tasks; + using Microsoft.EntityFrameworkCore; + using Microsoft.EntityFrameworkCore.ChangeTracking; + using OJS.Data; + using OJS.Data.Models.Submissions; + using OJS.Common; + + using efplus = Z.EntityFramework.Plus; + + public class ArchivesDataService : IArchivesDataService + { + private readonly ArchivesDbContext archivesDbContext; + private readonly DbSet dbSet; + + public ArchivesDataService(ArchivesDbContext archivesDbContext) + { + this.archivesDbContext = archivesDbContext; + this.dbSet = archivesDbContext.Set(); + } + + public IQueryable GetQuery( + Expression>? filter = null, + Expression>? orderBy = null, + bool descending = false, + int? skip = null, + int? take = null) + { + var query = this.dbSet.AsQueryable(); + + if (filter != null) + { + query = query.Where(filter); + } + + if (orderBy != null) + { + query = descending + ? query.OrderByDescending(orderBy) + : query.OrderBy(orderBy); + } + + if (skip.HasValue) + { + query = query.Skip(skip.Value); + } + + if (take.HasValue) + { + query = query.Take(take.Value); + } + + return query; + } + + public IQueryable GetAllNotHardDeletedFromMainDatabase() + => this.GetQuery(s => !s.IsHardDeletedFromMainDatabase); + + public async Task MarkAsHardDeletedFromMainDatabase(IEnumerable submissionIds) + => await this.dbSet + .Where(s => submissionIds.Contains(s.Id)) + .UpdateFromQueryAsync(s => new ArchivedSubmission + { + IsHardDeletedFromMainDatabase = true, + ModifiedOn = DateTime.UtcNow, + }, + bub => bub.BatchSize = GlobalConstants.BatchOperationsChunkSize); + + public async Task AddMany(IEnumerable entities) + { + var entitiesList = entities.ToList(); + var ids = entitiesList + .Select(x => x.Id) + .ToHashSet(); + + var existingEntities = this.dbSet + .Where(x => ids.Contains(x.Id)) + .Select(x => x.Id) + .ToHashSet(); + + var entitiesToAdd = entitiesList + .Where(x => !existingEntities.Contains(x.Id)) + .ToList(); + + await this.dbSet.AddRangeAsync(entitiesToAdd); + return entitiesToAdd.Count; + } + + public async Task SaveChanges() + => await this.archivesDbContext.SaveChangesAsync(); + } +} \ No newline at end of file diff --git a/Services/Common/OJS.Services.Common.Data/Implementations/DataService.cs b/Services/Common/OJS.Services.Common.Data/Implementations/DataService.cs index 09079bc115..d6351f9ede 100644 --- a/Services/Common/OJS.Services.Common.Data/Implementations/DataService.cs +++ b/Services/Common/OJS.Services.Common.Data/Implementations/DataService.cs @@ -46,6 +46,13 @@ public Task Update( Expression, SetPropertyCalls>> setPropertyCalls) => this.GetQuery(filter).ExecuteUpdateAsync(setPropertyCalls); + public async Task Update( + Expression> filterExpression, + Expression> updateExpression, + int batchSize) => + await this.dbSet.Where(filterExpression).UpdateFromQueryAsync(updateExpression, + x => x.BatchSize = batchSize); + public virtual void UpdateMany(IEnumerable entities) => this.dbSet.UpdateRange(entities); @@ -55,6 +62,9 @@ public virtual void Delete(TEntity entity) public void Delete(Expression>? filter = null) => this.dbSet.RemoveRange(this.GetQuery(filter)); + public async Task Delete(Expression> filterExpression, int batchSize) + => await this.dbSet.Where(filterExpression).DeleteFromQueryAsync(x => x.BatchSize = batchSize); + public virtual void DeleteMany(IEnumerable entities) => this.dbSet.RemoveRange(entities); diff --git a/Services/Common/OJS.Services.Common/BackgroundJobsHostedService.cs b/Services/Common/OJS.Services.Common/BackgroundJobsHostedService.cs index 4974939aa8..a35e83fe78 100644 --- a/Services/Common/OJS.Services.Common/BackgroundJobsHostedService.cs +++ b/Services/Common/OJS.Services.Common/BackgroundJobsHostedService.cs @@ -21,6 +21,8 @@ public class BackgroundJobsHostedService : IHostedService private readonly string updatingParticipantTotalScoreSnapshotCronExpression = Cron.Daily(4); private readonly string removingMultipleParticipantScoresForProblemCronExpression = Cron.Daily(3); private readonly string normalizingAllPointsThatExceedAllowedLimitCronExpression = Cron.Daily(1); + private readonly string archiveOldSubmissionsDailyBatchCronExpression = Cron.Daily(1, 30); + private readonly string hardDeleteArchivedSubmissionsCronExpression = Cron.Yearly(1, 1, 2, 30); private readonly IHangfireBackgroundJobsService hangfireBackgroundJobs; private readonly ILogger logger; @@ -100,5 +102,23 @@ private void AddOrUpdateRecurringJobs() AdministrationQueueName); this.logger.LogBackgroundJobAddedOrUpdated("normalizing all points that exceed allowed limit"); + + this.hangfireBackgroundJobs + .AddOrUpdateRecurringJob( + nameof(IRecurringBackgroundJobsBusinessService.ArchiveOldSubmissionsDailyBatch), + m => m.ArchiveOldSubmissionsDailyBatch(), + this.archiveOldSubmissionsDailyBatchCronExpression, + AdministrationQueueName); + + this.logger.LogBackgroundJobAddedOrUpdated("archiving submissions - daily"); + + this.hangfireBackgroundJobs + .AddOrUpdateRecurringJob( + nameof(IRecurringBackgroundJobsBusinessService.HardDeleteArchivedSubmissions), + m => m.HardDeleteArchivedSubmissions(), + this.hardDeleteArchivedSubmissionsCronExpression, + AdministrationQueueName); + + this.logger.LogBackgroundJobAddedOrUpdated("hard deleting archived submissions"); } } \ No newline at end of file diff --git a/Services/Common/OJS.Services.Common/IArchivedSubmissionsBusinessService.cs b/Services/Common/OJS.Services.Common/IArchivedSubmissionsBusinessService.cs new file mode 100644 index 0000000000..a8ce10590d --- /dev/null +++ b/Services/Common/OJS.Services.Common/IArchivedSubmissionsBusinessService.cs @@ -0,0 +1,31 @@ +namespace OJS.Services.Common +{ + using System.Threading.Tasks; + using OJS.Services.Infrastructure; + + public interface IArchivedSubmissionsBusinessService : IService + { + /// + /// Archives old submissions in batches, splitting the work into sub-batches for efficiency. + /// This is the main method used for automatic nightly archiving. + /// + /// Maximum number of submissions to archive in this batch. + /// Maximum size of each sub-batch. + /// The number of submissions that were archived. + Task ArchiveOldSubmissionsDailyBatch(int limit, int maxSubBatchSize); + + /// + /// Archives up to a specified number of old submissions in one go. + /// + /// Maximum number of submissions to archive. + /// The number of submissions that were archived. + Task ArchiveOldSubmissionsWithLimit(int limit); + + /// + /// Hard deletes archived submissions from the main database, up to a limit. + /// + /// Maximum number of submissions to hard delete. + /// The number of submissions that were hard deleted. + Task HardDeleteArchivedByLimit(int limit); + } +} \ No newline at end of file diff --git a/Services/Common/OJS.Services.Common/IRecurringBackgroundJobsBusinessService.cs b/Services/Common/OJS.Services.Common/IRecurringBackgroundJobsBusinessService.cs index cdef9f8de9..90e0a72c19 100644 --- a/Services/Common/OJS.Services.Common/IRecurringBackgroundJobsBusinessService.cs +++ b/Services/Common/OJS.Services.Common/IRecurringBackgroundJobsBusinessService.cs @@ -29,5 +29,17 @@ public interface IRecurringBackgroundJobsBusinessService : IService [Queue(AdministrationQueueName)] Task NormalizeAllPointsThatExceedAllowedLimit(); + + /// + /// Archives old submissions in batches for automatic nightly archiving. + /// + [Queue(AdministrationQueueName)] + Task ArchiveOldSubmissionsDailyBatch(); + + /// + /// Hard deletes archived submissions that are no longer needed. + /// + [Queue(AdministrationQueueName)] + Task HardDeleteArchivedSubmissions(); } } \ No newline at end of file diff --git a/Services/Infrastructure/OJS.Services.Infrastructure/Extensions/QueryableExtensions.cs b/Services/Infrastructure/OJS.Services.Infrastructure/Extensions/QueryableExtensions.cs index c2fad9eac9..e18934b897 100644 --- a/Services/Infrastructure/OJS.Services.Infrastructure/Extensions/QueryableExtensions.cs +++ b/Services/Infrastructure/OJS.Services.Infrastructure/Extensions/QueryableExtensions.cs @@ -73,4 +73,82 @@ private static IQueryable GetItemsPageQuery(this IQueryable queryable, => queryable .Skip(itemsPerPage * (pageNumber - 1)) .Take(itemsPerPage); + + /// + /// Extension method for splitting query into batches. NOTE: USE THIS ONLY IF THE + /// OPERATION WILL NOT CHANGE THE SELECTED QUERY SET ITSELF. Explanation: + /// The InBatches Extension will essentially modify the collection while iterating over it + /// leading to only half the entries actually being modified + /// (essentially behaving like deleting elements from a List while iterating it). For example if we select + /// all IsDeleted = 0 entries and modify them to IsDeleted = 1 using this extension method + /// after executing on the first batch, a new select is ran with OFFSET equal to batch size, + /// but it will get a modified version of the data + /// (where the original batch is missing since it was already modified) leading to skipping OFFSET amount + /// of entries each execution which leads to half the entries being skipped. + /// + /// + /// + /// Size of a single batch + /// Limits the query to a max amount, the sub queries will execute (limit / size) number of times, + /// regardless of amount of entries returned. Consumer should decide whether to cancel early, based on number of elements returned. + /// + public static IEnumerable> InBatches(this IOrderedQueryable queryable, int size, int limit = 0) + { + IQueryable current = queryable; + + if (limit > 0) + { + var currentAmount = 0; + while (currentAmount < limit) + { + var batch = current.Take(size); + currentAmount += size; + yield return batch; + current = current.Skip(size); + } + } + else + { + while (current.Any()) + { + var batch = current.Take(size); + yield return batch; + current = current.Skip(size); + } + } + } + + /// + /// Extension to split query into batches, if the query you use will modify the elements such that they + /// no longer match the selection criteria of the original query, use this extension method instead. + /// + /// + /// + /// Size of a single batch + /// Limits the query to a max amount, the sub queries will execute (limit / size) number of times, + /// regardless of amount of entries returned. Consumer should decide whether to cancel early, based on number of elements returned. + /// + public static IEnumerable> InSelfModifyingBatches(this IOrderedQueryable queryable, int size, int limit = 0) + { + IQueryable current = queryable; + + if (limit > 0) + { + var currentAmount = 0; + while (currentAmount < limit) + { + var batch = current.Take(size); + currentAmount += size; + yield return batch; + } + } + else + { + while (current.Any()) + { + var batch = current.Take(size); + yield return batch; + } + } + } } diff --git a/Services/UI/OJS.Services.Ui.Business/Implementations/RecurringBackgroundJobsBusinessService.cs b/Services/UI/OJS.Services.Ui.Business/Implementations/RecurringBackgroundJobsBusinessService.cs index 98e430873f..5fa538f3bc 100644 --- a/Services/UI/OJS.Services.Ui.Business/Implementations/RecurringBackgroundJobsBusinessService.cs +++ b/Services/UI/OJS.Services.Ui.Business/Implementations/RecurringBackgroundJobsBusinessService.cs @@ -19,4 +19,10 @@ public class RecurringBackgroundJobsBusinessService : IRecurringBackgroundJobsBu // Method is executed by administration implementation public Task NormalizeAllPointsThatExceedAllowedLimit() => throw new System.NotImplementedException(); + + // Method is executed by administration implementation + public Task ArchiveOldSubmissionsDailyBatch() => throw new System.NotImplementedException(); + + // Method is executed by administration implementation + public Task HardDeleteArchivedSubmissions() => throw new System.NotImplementedException(); } \ No newline at end of file