From dd6f4f7427d9c274fe0574188bb0bc703aeeba8a Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Fri, 24 Apr 2026 12:43:59 -0400 Subject: [PATCH 1/2] Add Paratext metadata to project data files Reviewer comments Add additional fields to metadata; add test --- .../src/Serval.DataFiles/Models/DataFile.cs | 1 + .../Models/ParatextMetadata.cs | 12 + .../Services/DataFileService.cs | 173 ++++++++++++++ .../Services/DataFileServiceTests.cs | 212 ++++++++++++++++++ .../test/Serval.DataFiles.Tests/Usings.cs | 1 + .../data/pt-project/41MATTe1.SFM | 6 + .../data/pt-project/Settings.xml | 34 +++ .../data/pt-project/custom.vrs | 31 +++ 8 files changed, 470 insertions(+) create mode 100644 src/Serval/src/Serval.DataFiles/Models/ParatextMetadata.cs create mode 100644 src/Serval/src/Serval.DataFiles/Services/DataFileService.cs create mode 100644 src/Serval/test/Serval.DataFiles.Tests/Services/DataFileServiceTests.cs create mode 100644 src/Serval/test/Serval.DataFiles.Tests/data/pt-project/41MATTe1.SFM create mode 100644 src/Serval/test/Serval.DataFiles.Tests/data/pt-project/Settings.xml create mode 100644 src/Serval/test/Serval.DataFiles.Tests/data/pt-project/custom.vrs diff --git a/src/Serval/src/Serval.DataFiles/Models/DataFile.cs b/src/Serval/src/Serval.DataFiles/Models/DataFile.cs index 32a660711..b9549daeb 100644 --- a/src/Serval/src/Serval.DataFiles/Models/DataFile.cs +++ b/src/Serval/src/Serval.DataFiles/Models/DataFile.cs @@ -8,4 +8,5 @@ public record DataFile : IOwnedEntity public required string Name { get; init; } public string Filename { get; init; } = ""; public required FileFormat Format { get; init; } + public ParatextMetadata? FileMetadata { get; init; } } diff --git a/src/Serval/src/Serval.DataFiles/Models/ParatextMetadata.cs b/src/Serval/src/Serval.DataFiles/Models/ParatextMetadata.cs new file mode 100644 index 000000000..d7cf93259 --- /dev/null +++ b/src/Serval/src/Serval.DataFiles/Models/ParatextMetadata.cs @@ -0,0 +1,12 @@ +namespace Serval.DataFiles.Models; + +public record ParatextMetadata +{ + public required string ProjectGuid { get; init; } + public required string Name { get; init; } + public required string FullName { get; init; } + public required string Versification { get; init; } + public required string TranslationType { get; init; } + public string? LanguageCode { get; init; } + public string? Visibility { get; init; } +} diff --git a/src/Serval/src/Serval.DataFiles/Services/DataFileService.cs b/src/Serval/src/Serval.DataFiles/Services/DataFileService.cs new file mode 100644 index 000000000..58fe63f8b --- /dev/null +++ b/src/Serval/src/Serval.DataFiles/Services/DataFileService.cs @@ -0,0 +1,173 @@ +using SIL.Machine.Corpora; + +namespace Serval.DataFiles.Services; + +public class DataFileService : OwnedEntityServiceBase, IDataFileService +{ + private readonly IOptionsMonitor _options; + private readonly IDataAccessContext _dataAccessContext; + private readonly IEventRouter _eventRouter; + private readonly IRepository _deletedFiles; + private readonly IFileSystem _fileSystem; + + public DataFileService( + IRepository dataFiles, + IDataAccessContext dataAccessContext, + IOptionsMonitor options, + IEventRouter eventRouter, + IRepository deletedFiles, + IFileSystem fileSystem + ) + : base(dataFiles) + { + _dataAccessContext = dataAccessContext; + _options = options; + _eventRouter = eventRouter; + _deletedFiles = deletedFiles; + _fileSystem = fileSystem; + _fileSystem.CreateDirectory(_options.CurrentValue.FilesDirectory); + } + + public async Task GetAsync(string id, string owner, CancellationToken cancellationToken = default) + { + DataFile? dataFile = await Entities.GetAsync(f => f.Id == id && f.Owner == owner, cancellationToken); + if (dataFile is null) + throw new EntityNotFoundException($"Could not find the DataFile '{id}' with owner '{owner}'."); + return dataFile; + } + + public async Task CreateAsync(DataFile dataFile, Stream stream, CancellationToken cancellationToken = default) + { + string filename = Path.GetRandomFileName(); + string path = GetDataFilePath(filename); + try + { + using (Stream fileStream = _fileSystem.OpenWrite(path)) + { + await stream.CopyToAsync(fileStream, cancellationToken); + } + if (dataFile.Format == FileFormat.Paratext) + { + ParatextMetadata metadata = await ParseParatextMetadataAsync(path); + dataFile = dataFile with { FileMetadata = metadata }; + } + await Entities.InsertAsync(dataFile with { Filename = filename }, cancellationToken); + } + catch + { + _fileSystem.DeleteFile(path); + throw; + } + } + + public async Task ReadAsync(string id, CancellationToken cancellationToken = default) + { + DataFile? dataFile = await GetAsync(id, cancellationToken); + if (dataFile is null) + throw new EntityNotFoundException($"Could not find the DataFile '{id}'."); + string path = GetDataFilePath(dataFile.Filename); + return _fileSystem.OpenRead(path); + } + + public async Task UpdateAsync(string id, Stream stream, CancellationToken cancellationToken = default) + { + string filename = Path.GetRandomFileName(); + string path = GetDataFilePath(filename); + bool deleteFile = false; + try + { + await using (Stream fileStream = _fileSystem.OpenWrite(path)) + await stream.CopyToAsync(fileStream, cancellationToken); + await _dataAccessContext.WithTransactionAsync( + async ct => + { + DataFile? originalDataFile = await Entities.UpdateAsync( + id, + u => u.Set(f => f.Filename, filename), + returnOriginal: true, + cancellationToken: ct + ); + if (originalDataFile is null) + throw new EntityNotFoundException($"Could not find the DataFile '{id}'."); + + if (originalDataFile.Format == FileFormat.Paratext) + { + ParatextMetadata metadata = await ParseParatextMetadataAsync(path); + await Entities.UpdateAsync( + id, + u => u.Set(f => f.FileMetadata, metadata), + cancellationToken: ct + ); + } + + await _deletedFiles.InsertAsync( + new DeletedFile { Filename = originalDataFile.Filename, DeletedAt = DateTime.UtcNow }, + cancellationToken: ct + ); + await _eventRouter.PublishAsync(new DataFileUpdated(id, filename), ct); + }, + cancellationToken: cancellationToken + ); + } + catch + { + deleteFile = true; + throw; + } + finally + { + if (deleteFile) + _fileSystem.DeleteFile(path); + } + + return await GetAsync(id, cancellationToken); + } + + public override async Task DeleteAsync(string id, CancellationToken cancellationToken = default) => + await _dataAccessContext.WithTransactionAsync( + async ct => + { + DataFile? dataFile = await Entities.DeleteAsync(id, ct); + if (dataFile is null) + throw new EntityNotFoundException($"Could not find the DataFile '{id}'."); + // We are intentionally not deleting files so they can be deleted later by DataFileCleaner + await _deletedFiles.InsertAsync( + new DeletedFile { Filename = dataFile.Filename, DeletedAt = DateTime.UtcNow }, + ct + ); + + await _eventRouter.PublishAsync(new DataFileDeleted(id), ct); + }, + cancellationToken: cancellationToken + ); + + private string GetDataFilePath(string filename) => Path.Combine(_options.CurrentValue.FilesDirectory, filename); + + private static async Task ParseParatextMetadataAsync(string path) + { + using ZipContainer zipContainer = new(path); + try + { + ParatextProjectSettings projectSettings = new Shared.Services.ZipParatextProjectSettingsParser( + zipContainer + ).Parse(); + return new ParatextMetadata + { + ProjectGuid = projectSettings.Guid, + Name = projectSettings.Name, + FullName = projectSettings.FullName, + Versification = projectSettings.Versification.Name, + TranslationType = projectSettings.TranslationType, + LanguageCode = projectSettings.LanguageCode, + Visibility = projectSettings.Visibility, + }; + } + catch (Exception e) when (e is not OperationCanceledException) + { + throw new InvalidOperationException( + "Unable to parse the Paratext project settings for the uploaded data file.", + e + ); + } + } +} diff --git a/src/Serval/test/Serval.DataFiles.Tests/Services/DataFileServiceTests.cs b/src/Serval/test/Serval.DataFiles.Tests/Services/DataFileServiceTests.cs new file mode 100644 index 000000000..bb58d92e4 --- /dev/null +++ b/src/Serval/test/Serval.DataFiles.Tests/Services/DataFileServiceTests.cs @@ -0,0 +1,212 @@ +namespace Serval.DataFiles.Services; + +[TestFixture] +public class DataFileServiceTests +{ + private const string DataFileId = "df0000000000000000000001"; + private static readonly DataFile DefaultDataFile = new() + { + Id = DataFileId, + Owner = "owner1", + Name = "file1", + Filename = "file1.txt", + Format = FileFormat.Text, + }; + + [Test] + public async Task CreateAsync_NoError() + { + var env = new TestEnvironment(); + using var fileStream = new MemoryStream(); + env.FileSystem.OpenWrite(Arg.Any()).Returns(fileStream); + string content = "This is a file."; + using (var stream = new MemoryStream(Encoding.UTF8.GetBytes(content))) + await env.Service.CreateAsync(DefaultDataFile with { }, stream); + + Assert.That(env.DataFiles.Contains(DataFileId), Is.True); + Assert.That(Encoding.UTF8.GetString(fileStream.ToArray()), Is.EqualTo(content)); + } + + [Test] + public void CreateAsync_Error() + { + var env = new TestEnvironment(); + env.DataFiles.Add(DefaultDataFile with { }); + using var fileStream = new MemoryStream(); + env.FileSystem.OpenWrite(Arg.Any()).Returns(fileStream); + string content = "This is a file."; + using (var stream = new MemoryStream(Encoding.UTF8.GetBytes(content))) + Assert.ThrowsAsync(() => env.Service.CreateAsync(DefaultDataFile with { }, stream)); + + env.FileSystem.Received().DeleteFile(Arg.Any()); + } + + [Test] + public async Task CreateAsync_Paratext() + { + var env = new TestEnvironment(); + env.FileSystem.OpenWrite(Arg.Any()) + .Returns(callInfo => new FileStream(callInfo.Arg(), FileMode.Create, FileAccess.Write)); + string paratextZipPath = ZipParatextProject(); + using (var stream = File.OpenRead(paratextZipPath)) + { + await env.Service.CreateAsync(DefaultDataFile with { Format = FileFormat.Paratext }, stream); + } + + DataFile dataFile = env.DataFiles.Get(DataFileId); + Assert.That(dataFile.FileMetadata, Is.Not.Null); + ParatextMetadata metadata = dataFile.FileMetadata; + using (Assert.EnterMultipleScope()) + { + Assert.That(metadata.ProjectGuid, Is.EqualTo("a7e0b3ce0200736062f9f810a444dbfbe64aca35")); + Assert.That(metadata.Name, Is.EqualTo("Te1")); + Assert.That(metadata.FullName, Is.EqualTo("Test1")); + Assert.That(metadata.TranslationType, Is.EqualTo("Standard")); + Assert.That(metadata.Versification, Does.StartWith("English")); + Assert.That(metadata.LanguageCode, Is.EqualTo("en")); + Assert.That(metadata.Visibility, Is.EqualTo("Public")); + } + } + + [Test] + public async Task DownloadAsync_Exists() + { + var env = new TestEnvironment(); + env.DataFiles.Add(DefaultDataFile with { }); + byte[] content = Encoding.UTF8.GetBytes("This is a file."); + using var fileStream = new MemoryStream(content); + env.FileSystem.OpenRead(Arg.Any()).Returns(fileStream); + Stream downloadedStream = await env.Service.ReadAsync(DataFileId); + Assert.That(new StreamReader(downloadedStream).ReadToEnd(), Is.EqualTo(content)); + } + + [Test] + public void DownloadAsync_DoesNotExists() + { + var env = new TestEnvironment(); + byte[] content = Encoding.UTF8.GetBytes("This is a file."); + using var fileStream = new MemoryStream(content); + env.FileSystem.OpenRead(Arg.Any()).Returns(fileStream); + Assert.ThrowsAsync(() => env.Service.ReadAsync(DataFileId)); + } + + [Test] + public async Task UpdateAsync_Exists() + { + var env = new TestEnvironment(); + env.DataFiles.Add(DefaultDataFile with { }); + using var fileStream = new MemoryStream(); + env.FileSystem.OpenWrite(Arg.Any()).Returns(fileStream); + string content = "This is a file."; + DataFile dataFile; + using (var stream = new MemoryStream(Encoding.UTF8.GetBytes(content))) + dataFile = await env.Service.UpdateAsync(DataFileId, stream); + + Assert.That(dataFile.Revision, Is.EqualTo(2)); + Assert.That(Encoding.UTF8.GetString(fileStream.ToArray()), Is.EqualTo(content)); + DeletedFile deletedFile = env.DeletedFiles.Entities.Single(); + Assert.That(deletedFile.Filename, Is.EqualTo("file1.txt")); + } + + [Test] + public void UpdateAsync_GetAsyncFails() + { + var env = new TestEnvironment(); + + // We will use the mediator to cancel the token, which will cause GetAsync() to fail + // What we are testing for is GetAsync() failing due to network or other connectivity issues, token cancellation being one source + var cts = new CancellationTokenSource(); + env.EventRouter.When(x => x.PublishAsync(Arg.Any(), Arg.Any())) + .Do(_ => cts.Cancel()); + + // Set up a valid existing file + env.DataFiles.Add(DefaultDataFile with { }); + using var fileStream = new MemoryStream(); + env.FileSystem.OpenWrite(Arg.Any()).Returns(fileStream); + string content = "This is a file."; + using (var stream = new MemoryStream(Encoding.UTF8.GetBytes(content))) + { + Assert.ThrowsAsync(() => + env.Service.UpdateAsync(DataFileId, stream, cts.Token) + ); + } + + // Verify the file was updated + DataFile dataFile = env.DataFiles.Get(DataFileId); + Assert.That(dataFile.Revision, Is.EqualTo(2)); + Assert.That(Encoding.UTF8.GetString(fileStream.ToArray()), Is.EqualTo(content)); + DeletedFile deletedFile = env.DeletedFiles.Entities.Single(); + Assert.That(deletedFile.Filename, Is.EqualTo("file1.txt")); + + env.FileSystem.DidNotReceive().DeleteFile(Arg.Any()); + } + + [Test] + public void UpdateAsync_DoesNotExist() + { + var env = new TestEnvironment(); + using var fileStream = new MemoryStream(); + env.FileSystem.OpenWrite(Arg.Any()).Returns(fileStream); + string content = "This is a file."; + using (var stream = new MemoryStream(Encoding.UTF8.GetBytes(content))) + Assert.ThrowsAsync(() => env.Service.UpdateAsync(DataFileId, stream)); + + env.FileSystem.Received().DeleteFile(Arg.Any()); + } + + [Test] + public async Task DeleteAsync_Exists() + { + var env = new TestEnvironment(); + env.DataFiles.Add(DefaultDataFile with { }); + await env.Service.DeleteAsync(DataFileId); + + Assert.That(env.DataFiles.Contains(DataFileId), Is.False); + DeletedFile deletedFile = env.DeletedFiles.Entities.Single(); + Assert.That(deletedFile.Filename, Is.EqualTo("file1.txt")); + await env.EventRouter.Received().PublishAsync(Arg.Any(), Arg.Any()); + } + + [Test] + public void DeleteAsync_DoesNotExist() + { + var env = new TestEnvironment(); + Assert.ThrowsAsync(() => env.Service.DeleteAsync(DataFileId)); + } + + private class TestEnvironment + { + public TestEnvironment() + { + DataFiles = new MemoryRepository(); + IOptionsMonitor options = Substitute.For>(); + options.CurrentValue.Returns(new DataFileOptions()); + EventRouter = Substitute.For(); + DeletedFiles = new MemoryRepository(); + FileSystem = Substitute.For(); + Service = new DataFileService( + DataFiles, + new MemoryDataAccessContext(), + options, + EventRouter, + DeletedFiles, + FileSystem + ); + } + + public IFileSystem FileSystem { get; } + public MemoryRepository DeletedFiles { get; } + public IEventRouter EventRouter { get; } + public MemoryRepository DataFiles { get; } + public DataFileService Service { get; } + } + + private static string ZipParatextProject() + { + string path = Path.Combine(Path.GetTempPath(), "pt-project.zip"); + if (File.Exists(path)) + File.Delete(path); + ZipFile.CreateFromDirectory(Path.Combine("..", "..", "..", "data", "pt-project"), path); + return path; + } +} diff --git a/src/Serval/test/Serval.DataFiles.Tests/Usings.cs b/src/Serval/test/Serval.DataFiles.Tests/Usings.cs index 5f5be62c5..7bcc11b34 100644 --- a/src/Serval/test/Serval.DataFiles.Tests/Usings.cs +++ b/src/Serval/test/Serval.DataFiles.Tests/Usings.cs @@ -1,3 +1,4 @@ +global using System.IO.Compression; global using System.Text; global using Microsoft.Extensions.DependencyInjection; global using Microsoft.Extensions.Logging; diff --git a/src/Serval/test/Serval.DataFiles.Tests/data/pt-project/41MATTe1.SFM b/src/Serval/test/Serval.DataFiles.Tests/data/pt-project/41MATTe1.SFM new file mode 100644 index 000000000..8130771c2 --- /dev/null +++ b/src/Serval/test/Serval.DataFiles.Tests/data/pt-project/41MATTe1.SFM @@ -0,0 +1,6 @@ +\id MAT - SRC +\c 1 +\v 1 SRC - Chapter one, verse one. +\p new paragraph +\v 2 +\v 3 SRC - Chapter one, verse three. diff --git a/src/Serval/test/Serval.DataFiles.Tests/data/pt-project/Settings.xml b/src/Serval/test/Serval.DataFiles.Tests/data/pt-project/Settings.xml new file mode 100644 index 000000000..6358f4f0b --- /dev/null +++ b/src/Serval/test/Serval.DataFiles.Tests/data/pt-project/Settings.xml @@ -0,0 +1,34 @@ + + usfm.sty + 4 + en::: + English + 8.0.100.76 + Test1 + 65001 + T + + NFC + Te1 + a7e0b3ce0200736062f9f810a444dbfbe64aca35 + Charis SIL + 12 + + + + 41MAT + + Tes.SFM + Major::BiblicalTerms.xml + F + F + F + Public + Standard:: + + 3 + 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 + 000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000 + + + \ No newline at end of file diff --git a/src/Serval/test/Serval.DataFiles.Tests/data/pt-project/custom.vrs b/src/Serval/test/Serval.DataFiles.Tests/data/pt-project/custom.vrs new file mode 100644 index 000000000..9c1cd3873 --- /dev/null +++ b/src/Serval/test/Serval.DataFiles.Tests/data/pt-project/custom.vrs @@ -0,0 +1,31 @@ +# custom.vrs + +LEV 14:56 +ROM 14:26 +REV 12:17 +TOB 5:22 +TOB 10:12 +SIR 23:28 +ESG 1:22 +ESG 3:15 +ESG 5:14 +ESG 8:17 +ESG 10:14 +SIR 33:33 +SIR 41:24 +BAR 1:22 +4MA 7:25 +4MA 12:20 + +# deliberately missing verses +-ROM 16:26 +-ROM 16:27 +-3JN 1:15 +-S3Y 1:49 +-ESG 4:6 +-ESG 9:5 +-ESG 9:30 + +LEV 14:55 = LEV 14:55 +LEV 14:55 = LEV 14:56 +LEV 14:56 = LEV 14:57 From a7964c7374aa3233607e15bb27d81424f30ea04d Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Wed, 20 May 2026 12:36:35 -0400 Subject: [PATCH 2/2] Properly merge Paratext metadata updates given re-arch --- .../Features/DataFiles/CreateDataFile.cs | 11 +- .../Features/DataFiles/UpdateDataFile.cs | 10 + .../Services/DataFileService.cs | 173 -------------- .../Services/ParatextProjectDataParser.cs | 32 +++ src/Serval/src/Serval.DataFiles/Usings.cs | 1 + .../DataFiles/DataFilesHandlersTests.cs | 38 ++++ .../Services/DataFileServiceTests.cs | 212 ------------------ 7 files changed, 90 insertions(+), 387 deletions(-) delete mode 100644 src/Serval/src/Serval.DataFiles/Services/DataFileService.cs create mode 100644 src/Serval/src/Serval.DataFiles/Services/ParatextProjectDataParser.cs delete mode 100644 src/Serval/test/Serval.DataFiles.Tests/Services/DataFileServiceTests.cs diff --git a/src/Serval/src/Serval.DataFiles/Features/DataFiles/CreateDataFile.cs b/src/Serval/src/Serval.DataFiles/Features/DataFiles/CreateDataFile.cs index ced9483df..a9741ca24 100644 --- a/src/Serval/src/Serval.DataFiles/Features/DataFiles/CreateDataFile.cs +++ b/src/Serval/src/Serval.DataFiles/Features/DataFiles/CreateDataFile.cs @@ -27,8 +27,15 @@ public async Task HandleAsync(CreateDataFile request, Ca }; try { - await using Stream fileStream = fileSystem.OpenWrite(path); - await request.FileStream.CopyToAsync(fileStream, cancellationToken); + using (Stream fileStream = fileSystem.OpenWrite(path)) + { + await request.FileStream.CopyToAsync(fileStream, cancellationToken); + } + if (dataFile.Format == FileFormat.Paratext) + { + ParatextMetadata metadata = await ParatextProjectDataParser.ParseParatextMetadataAsync(path); + dataFile = dataFile with { FileMetadata = metadata }; + } await dataFiles.InsertAsync(dataFile, cancellationToken); } catch diff --git a/src/Serval/src/Serval.DataFiles/Features/DataFiles/UpdateDataFile.cs b/src/Serval/src/Serval.DataFiles/Features/DataFiles/UpdateDataFile.cs index b28e0cb16..b076e8587 100644 --- a/src/Serval/src/Serval.DataFiles/Features/DataFiles/UpdateDataFile.cs +++ b/src/Serval/src/Serval.DataFiles/Features/DataFiles/UpdateDataFile.cs @@ -36,6 +36,16 @@ await dataAccessContext.WithTransactionAsync( ); if (originalDataFile is null) throw new EntityNotFoundException($"Could not find the DataFile '{request.FileId}'."); + if (originalDataFile.Format == FileFormat.Paratext) + { + ParatextMetadata metadata = await ParatextProjectDataParser.ParseParatextMetadataAsync(path); + await dataFiles.UpdateAsync( + request.FileId, + u => u.Set(f => f.FileMetadata, metadata), + cancellationToken: ct + ); + } + await deletedFiles.InsertAsync( new DeletedFile { Filename = originalDataFile.Filename, DeletedAt = DateTime.UtcNow }, ct diff --git a/src/Serval/src/Serval.DataFiles/Services/DataFileService.cs b/src/Serval/src/Serval.DataFiles/Services/DataFileService.cs deleted file mode 100644 index 58fe63f8b..000000000 --- a/src/Serval/src/Serval.DataFiles/Services/DataFileService.cs +++ /dev/null @@ -1,173 +0,0 @@ -using SIL.Machine.Corpora; - -namespace Serval.DataFiles.Services; - -public class DataFileService : OwnedEntityServiceBase, IDataFileService -{ - private readonly IOptionsMonitor _options; - private readonly IDataAccessContext _dataAccessContext; - private readonly IEventRouter _eventRouter; - private readonly IRepository _deletedFiles; - private readonly IFileSystem _fileSystem; - - public DataFileService( - IRepository dataFiles, - IDataAccessContext dataAccessContext, - IOptionsMonitor options, - IEventRouter eventRouter, - IRepository deletedFiles, - IFileSystem fileSystem - ) - : base(dataFiles) - { - _dataAccessContext = dataAccessContext; - _options = options; - _eventRouter = eventRouter; - _deletedFiles = deletedFiles; - _fileSystem = fileSystem; - _fileSystem.CreateDirectory(_options.CurrentValue.FilesDirectory); - } - - public async Task GetAsync(string id, string owner, CancellationToken cancellationToken = default) - { - DataFile? dataFile = await Entities.GetAsync(f => f.Id == id && f.Owner == owner, cancellationToken); - if (dataFile is null) - throw new EntityNotFoundException($"Could not find the DataFile '{id}' with owner '{owner}'."); - return dataFile; - } - - public async Task CreateAsync(DataFile dataFile, Stream stream, CancellationToken cancellationToken = default) - { - string filename = Path.GetRandomFileName(); - string path = GetDataFilePath(filename); - try - { - using (Stream fileStream = _fileSystem.OpenWrite(path)) - { - await stream.CopyToAsync(fileStream, cancellationToken); - } - if (dataFile.Format == FileFormat.Paratext) - { - ParatextMetadata metadata = await ParseParatextMetadataAsync(path); - dataFile = dataFile with { FileMetadata = metadata }; - } - await Entities.InsertAsync(dataFile with { Filename = filename }, cancellationToken); - } - catch - { - _fileSystem.DeleteFile(path); - throw; - } - } - - public async Task ReadAsync(string id, CancellationToken cancellationToken = default) - { - DataFile? dataFile = await GetAsync(id, cancellationToken); - if (dataFile is null) - throw new EntityNotFoundException($"Could not find the DataFile '{id}'."); - string path = GetDataFilePath(dataFile.Filename); - return _fileSystem.OpenRead(path); - } - - public async Task UpdateAsync(string id, Stream stream, CancellationToken cancellationToken = default) - { - string filename = Path.GetRandomFileName(); - string path = GetDataFilePath(filename); - bool deleteFile = false; - try - { - await using (Stream fileStream = _fileSystem.OpenWrite(path)) - await stream.CopyToAsync(fileStream, cancellationToken); - await _dataAccessContext.WithTransactionAsync( - async ct => - { - DataFile? originalDataFile = await Entities.UpdateAsync( - id, - u => u.Set(f => f.Filename, filename), - returnOriginal: true, - cancellationToken: ct - ); - if (originalDataFile is null) - throw new EntityNotFoundException($"Could not find the DataFile '{id}'."); - - if (originalDataFile.Format == FileFormat.Paratext) - { - ParatextMetadata metadata = await ParseParatextMetadataAsync(path); - await Entities.UpdateAsync( - id, - u => u.Set(f => f.FileMetadata, metadata), - cancellationToken: ct - ); - } - - await _deletedFiles.InsertAsync( - new DeletedFile { Filename = originalDataFile.Filename, DeletedAt = DateTime.UtcNow }, - cancellationToken: ct - ); - await _eventRouter.PublishAsync(new DataFileUpdated(id, filename), ct); - }, - cancellationToken: cancellationToken - ); - } - catch - { - deleteFile = true; - throw; - } - finally - { - if (deleteFile) - _fileSystem.DeleteFile(path); - } - - return await GetAsync(id, cancellationToken); - } - - public override async Task DeleteAsync(string id, CancellationToken cancellationToken = default) => - await _dataAccessContext.WithTransactionAsync( - async ct => - { - DataFile? dataFile = await Entities.DeleteAsync(id, ct); - if (dataFile is null) - throw new EntityNotFoundException($"Could not find the DataFile '{id}'."); - // We are intentionally not deleting files so they can be deleted later by DataFileCleaner - await _deletedFiles.InsertAsync( - new DeletedFile { Filename = dataFile.Filename, DeletedAt = DateTime.UtcNow }, - ct - ); - - await _eventRouter.PublishAsync(new DataFileDeleted(id), ct); - }, - cancellationToken: cancellationToken - ); - - private string GetDataFilePath(string filename) => Path.Combine(_options.CurrentValue.FilesDirectory, filename); - - private static async Task ParseParatextMetadataAsync(string path) - { - using ZipContainer zipContainer = new(path); - try - { - ParatextProjectSettings projectSettings = new Shared.Services.ZipParatextProjectSettingsParser( - zipContainer - ).Parse(); - return new ParatextMetadata - { - ProjectGuid = projectSettings.Guid, - Name = projectSettings.Name, - FullName = projectSettings.FullName, - Versification = projectSettings.Versification.Name, - TranslationType = projectSettings.TranslationType, - LanguageCode = projectSettings.LanguageCode, - Visibility = projectSettings.Visibility, - }; - } - catch (Exception e) when (e is not OperationCanceledException) - { - throw new InvalidOperationException( - "Unable to parse the Paratext project settings for the uploaded data file.", - e - ); - } - } -} diff --git a/src/Serval/src/Serval.DataFiles/Services/ParatextProjectDataParser.cs b/src/Serval/src/Serval.DataFiles/Services/ParatextProjectDataParser.cs new file mode 100644 index 000000000..2e82efa98 --- /dev/null +++ b/src/Serval/src/Serval.DataFiles/Services/ParatextProjectDataParser.cs @@ -0,0 +1,32 @@ +namespace Serval.DataFiles.Services; + +public class ParatextProjectDataParser +{ + public static async Task ParseParatextMetadataAsync(string path) + { + using ZipContainer zipContainer = new(path); + try + { + ParatextProjectSettings projectSettings = new Shared.Services.ZipParatextProjectSettingsParser( + zipContainer + ).Parse(); + return new ParatextMetadata + { + ProjectGuid = projectSettings.Guid, + Name = projectSettings.Name, + FullName = projectSettings.FullName, + Versification = projectSettings.Versification.Name, + TranslationType = projectSettings.TranslationType, + LanguageCode = projectSettings.LanguageCode, + Visibility = projectSettings.Visibility, + }; + } + catch (Exception e) when (e is not OperationCanceledException) + { + throw new InvalidOperationException( + "Unable to parse the Paratext project settings for the uploaded data file.", + e + ); + } + } +} diff --git a/src/Serval/src/Serval.DataFiles/Usings.cs b/src/Serval/src/Serval.DataFiles/Usings.cs index 291d07e8f..6c8ba62aa 100644 --- a/src/Serval/src/Serval.DataFiles/Usings.cs +++ b/src/Serval/src/Serval.DataFiles/Usings.cs @@ -26,3 +26,4 @@ global using Serval.Shared.Services; global using Serval.Shared.Utils; global using SIL.DataAccess; +global using SIL.Machine.Corpora; diff --git a/src/Serval/test/Serval.DataFiles.Tests/Features/DataFiles/DataFilesHandlersTests.cs b/src/Serval/test/Serval.DataFiles.Tests/Features/DataFiles/DataFilesHandlersTests.cs index f35c63c44..b4adc8873 100644 --- a/src/Serval/test/Serval.DataFiles.Tests/Features/DataFiles/DataFilesHandlersTests.cs +++ b/src/Serval/test/Serval.DataFiles.Tests/Features/DataFiles/DataFilesHandlersTests.cs @@ -90,6 +90,35 @@ public async Task CreateDataFile_Error() env.FileSystem.Received().DeleteFile(Arg.Any()); } + [Test] + public async Task CreateDataFile_Paratext() + { + var env = new TestEnvironment(); + env.FileSystem.OpenWrite(Arg.Any()) + .Returns(callInfo => new FileStream(callInfo.Arg(), FileMode.Create, FileAccess.Write)); + string paratextZipPath = ZipParatextProject(); + CreateDataFileHandler handler = new(env.DataFiles, env.IdGenerator, env.Options, env.FileSystem, env.Mapper); + using FileStream stream = File.OpenRead(paratextZipPath); + CreateDataFileResponse response = await handler.HandleAsync( + new(Owner, "file1", "file1.txt", FileFormat.Paratext, stream), + CancellationToken.None + ); + DataFile? dataFile = await env.DataFiles.GetAsync(response.DataFile.Id, CancellationToken.None); + Assert.That(dataFile, Is.Not.Null); + Assert.That(dataFile.FileMetadata, Is.Not.Null); + ParatextMetadata metadata = dataFile.FileMetadata; + using (Assert.EnterMultipleScope()) + { + Assert.That(metadata.ProjectGuid, Is.EqualTo("a7e0b3ce0200736062f9f810a444dbfbe64aca35")); + Assert.That(metadata.Name, Is.EqualTo("Te1")); + Assert.That(metadata.FullName, Is.EqualTo("Test1")); + Assert.That(metadata.TranslationType, Is.EqualTo("Standard")); + Assert.That(metadata.Versification, Does.StartWith("English")); + Assert.That(metadata.LanguageCode, Is.EqualTo("en")); + Assert.That(metadata.Visibility, Is.EqualTo("Public")); + } + } + [Test] public async Task DownloadDataFile_FileExists() { @@ -273,4 +302,13 @@ public async Task CreateDataFileAsync(string id = "df00000000000000000 return file; } } + + private static string ZipParatextProject() + { + string path = Path.Combine(Path.GetTempPath(), "pt-project.zip"); + if (File.Exists(path)) + File.Delete(path); + ZipFile.CreateFromDirectory(Path.Combine("..", "..", "..", "data", "pt-project"), path); + return path; + } } diff --git a/src/Serval/test/Serval.DataFiles.Tests/Services/DataFileServiceTests.cs b/src/Serval/test/Serval.DataFiles.Tests/Services/DataFileServiceTests.cs deleted file mode 100644 index bb58d92e4..000000000 --- a/src/Serval/test/Serval.DataFiles.Tests/Services/DataFileServiceTests.cs +++ /dev/null @@ -1,212 +0,0 @@ -namespace Serval.DataFiles.Services; - -[TestFixture] -public class DataFileServiceTests -{ - private const string DataFileId = "df0000000000000000000001"; - private static readonly DataFile DefaultDataFile = new() - { - Id = DataFileId, - Owner = "owner1", - Name = "file1", - Filename = "file1.txt", - Format = FileFormat.Text, - }; - - [Test] - public async Task CreateAsync_NoError() - { - var env = new TestEnvironment(); - using var fileStream = new MemoryStream(); - env.FileSystem.OpenWrite(Arg.Any()).Returns(fileStream); - string content = "This is a file."; - using (var stream = new MemoryStream(Encoding.UTF8.GetBytes(content))) - await env.Service.CreateAsync(DefaultDataFile with { }, stream); - - Assert.That(env.DataFiles.Contains(DataFileId), Is.True); - Assert.That(Encoding.UTF8.GetString(fileStream.ToArray()), Is.EqualTo(content)); - } - - [Test] - public void CreateAsync_Error() - { - var env = new TestEnvironment(); - env.DataFiles.Add(DefaultDataFile with { }); - using var fileStream = new MemoryStream(); - env.FileSystem.OpenWrite(Arg.Any()).Returns(fileStream); - string content = "This is a file."; - using (var stream = new MemoryStream(Encoding.UTF8.GetBytes(content))) - Assert.ThrowsAsync(() => env.Service.CreateAsync(DefaultDataFile with { }, stream)); - - env.FileSystem.Received().DeleteFile(Arg.Any()); - } - - [Test] - public async Task CreateAsync_Paratext() - { - var env = new TestEnvironment(); - env.FileSystem.OpenWrite(Arg.Any()) - .Returns(callInfo => new FileStream(callInfo.Arg(), FileMode.Create, FileAccess.Write)); - string paratextZipPath = ZipParatextProject(); - using (var stream = File.OpenRead(paratextZipPath)) - { - await env.Service.CreateAsync(DefaultDataFile with { Format = FileFormat.Paratext }, stream); - } - - DataFile dataFile = env.DataFiles.Get(DataFileId); - Assert.That(dataFile.FileMetadata, Is.Not.Null); - ParatextMetadata metadata = dataFile.FileMetadata; - using (Assert.EnterMultipleScope()) - { - Assert.That(metadata.ProjectGuid, Is.EqualTo("a7e0b3ce0200736062f9f810a444dbfbe64aca35")); - Assert.That(metadata.Name, Is.EqualTo("Te1")); - Assert.That(metadata.FullName, Is.EqualTo("Test1")); - Assert.That(metadata.TranslationType, Is.EqualTo("Standard")); - Assert.That(metadata.Versification, Does.StartWith("English")); - Assert.That(metadata.LanguageCode, Is.EqualTo("en")); - Assert.That(metadata.Visibility, Is.EqualTo("Public")); - } - } - - [Test] - public async Task DownloadAsync_Exists() - { - var env = new TestEnvironment(); - env.DataFiles.Add(DefaultDataFile with { }); - byte[] content = Encoding.UTF8.GetBytes("This is a file."); - using var fileStream = new MemoryStream(content); - env.FileSystem.OpenRead(Arg.Any()).Returns(fileStream); - Stream downloadedStream = await env.Service.ReadAsync(DataFileId); - Assert.That(new StreamReader(downloadedStream).ReadToEnd(), Is.EqualTo(content)); - } - - [Test] - public void DownloadAsync_DoesNotExists() - { - var env = new TestEnvironment(); - byte[] content = Encoding.UTF8.GetBytes("This is a file."); - using var fileStream = new MemoryStream(content); - env.FileSystem.OpenRead(Arg.Any()).Returns(fileStream); - Assert.ThrowsAsync(() => env.Service.ReadAsync(DataFileId)); - } - - [Test] - public async Task UpdateAsync_Exists() - { - var env = new TestEnvironment(); - env.DataFiles.Add(DefaultDataFile with { }); - using var fileStream = new MemoryStream(); - env.FileSystem.OpenWrite(Arg.Any()).Returns(fileStream); - string content = "This is a file."; - DataFile dataFile; - using (var stream = new MemoryStream(Encoding.UTF8.GetBytes(content))) - dataFile = await env.Service.UpdateAsync(DataFileId, stream); - - Assert.That(dataFile.Revision, Is.EqualTo(2)); - Assert.That(Encoding.UTF8.GetString(fileStream.ToArray()), Is.EqualTo(content)); - DeletedFile deletedFile = env.DeletedFiles.Entities.Single(); - Assert.That(deletedFile.Filename, Is.EqualTo("file1.txt")); - } - - [Test] - public void UpdateAsync_GetAsyncFails() - { - var env = new TestEnvironment(); - - // We will use the mediator to cancel the token, which will cause GetAsync() to fail - // What we are testing for is GetAsync() failing due to network or other connectivity issues, token cancellation being one source - var cts = new CancellationTokenSource(); - env.EventRouter.When(x => x.PublishAsync(Arg.Any(), Arg.Any())) - .Do(_ => cts.Cancel()); - - // Set up a valid existing file - env.DataFiles.Add(DefaultDataFile with { }); - using var fileStream = new MemoryStream(); - env.FileSystem.OpenWrite(Arg.Any()).Returns(fileStream); - string content = "This is a file."; - using (var stream = new MemoryStream(Encoding.UTF8.GetBytes(content))) - { - Assert.ThrowsAsync(() => - env.Service.UpdateAsync(DataFileId, stream, cts.Token) - ); - } - - // Verify the file was updated - DataFile dataFile = env.DataFiles.Get(DataFileId); - Assert.That(dataFile.Revision, Is.EqualTo(2)); - Assert.That(Encoding.UTF8.GetString(fileStream.ToArray()), Is.EqualTo(content)); - DeletedFile deletedFile = env.DeletedFiles.Entities.Single(); - Assert.That(deletedFile.Filename, Is.EqualTo("file1.txt")); - - env.FileSystem.DidNotReceive().DeleteFile(Arg.Any()); - } - - [Test] - public void UpdateAsync_DoesNotExist() - { - var env = new TestEnvironment(); - using var fileStream = new MemoryStream(); - env.FileSystem.OpenWrite(Arg.Any()).Returns(fileStream); - string content = "This is a file."; - using (var stream = new MemoryStream(Encoding.UTF8.GetBytes(content))) - Assert.ThrowsAsync(() => env.Service.UpdateAsync(DataFileId, stream)); - - env.FileSystem.Received().DeleteFile(Arg.Any()); - } - - [Test] - public async Task DeleteAsync_Exists() - { - var env = new TestEnvironment(); - env.DataFiles.Add(DefaultDataFile with { }); - await env.Service.DeleteAsync(DataFileId); - - Assert.That(env.DataFiles.Contains(DataFileId), Is.False); - DeletedFile deletedFile = env.DeletedFiles.Entities.Single(); - Assert.That(deletedFile.Filename, Is.EqualTo("file1.txt")); - await env.EventRouter.Received().PublishAsync(Arg.Any(), Arg.Any()); - } - - [Test] - public void DeleteAsync_DoesNotExist() - { - var env = new TestEnvironment(); - Assert.ThrowsAsync(() => env.Service.DeleteAsync(DataFileId)); - } - - private class TestEnvironment - { - public TestEnvironment() - { - DataFiles = new MemoryRepository(); - IOptionsMonitor options = Substitute.For>(); - options.CurrentValue.Returns(new DataFileOptions()); - EventRouter = Substitute.For(); - DeletedFiles = new MemoryRepository(); - FileSystem = Substitute.For(); - Service = new DataFileService( - DataFiles, - new MemoryDataAccessContext(), - options, - EventRouter, - DeletedFiles, - FileSystem - ); - } - - public IFileSystem FileSystem { get; } - public MemoryRepository DeletedFiles { get; } - public IEventRouter EventRouter { get; } - public MemoryRepository DataFiles { get; } - public DataFileService Service { get; } - } - - private static string ZipParatextProject() - { - string path = Path.Combine(Path.GetTempPath(), "pt-project.zip"); - if (File.Exists(path)) - File.Delete(path); - ZipFile.CreateFromDirectory(Path.Combine("..", "..", "..", "data", "pt-project"), path); - return path; - } -}