diff --git a/docker-compose.yml b/docker-compose.yml index ce6a2a8d..5e8ac998 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -20,11 +20,11 @@ services: - "ClearML__AccessKey=${ClearML_AccessKey:?access key needed}" - "ClearML__SecretKey=${ClearML_SecretKey:?secret key needed}" - BuildJob__ClearML__0__Queue=${CLEARML_GPU_QUEUE:-lambert_24gb} - - BuildJob__ClearML__0__DockerImage=${MACHINE_PY_IMAGE:-ghcr.io/sillsdev/machine.py:1.12.0} + - BuildJob__ClearML__0__DockerImage=${MACHINE_PY_IMAGE:-ghcr.io/sillsdev/machine.py:1.13.0} - BuildJob__ClearML__1__Queue=${CLEARML_CPU_QUEUE:-lambert_24gb.cpu_only} - - BuildJob__ClearML__1__DockerImage=${MACHINE_PY_CPU_IMAGE:-ghcr.io/sillsdev/machine.py:1.12.0.cpu_only} + - BuildJob__ClearML__1__DockerImage=${MACHINE_PY_CPU_IMAGE:-ghcr.io/sillsdev/machine.py:1.13.0.cpu_only} - BuildJob__ClearML__2__Queue=${CLEARML_CPU_QUEUE:-lambert_24gb.cpu_only} - - BuildJob__ClearML__2__DockerImage=${MACHINE_PY_CPU_IMAGE:-ghcr.io/sillsdev/machine.py:1.12.0.cpu_only} + - BuildJob__ClearML__2__DockerImage=${MACHINE_PY_CPU_IMAGE:-ghcr.io/sillsdev/machine.py:1.13.0.cpu_only} - SharedFile__Uri=s3://silnlp/docker-compose/ - "SharedFile__S3AccessKeyId=${AWS_ACCESS_KEY_ID:?access key needed}" - "SharedFile__S3SecretAccessKey=${AWS_SECRET_ACCESS_KEY:?secret key needed}" diff --git a/src/Echo/src/EchoEngine/TranslationEngineService.cs b/src/Echo/src/EchoEngine/TranslationEngineService.cs index fb9e2370..4c2e173a 100644 --- a/src/Echo/src/EchoEngine/TranslationEngineService.cs +++ b/src/Echo/src/EchoEngine/TranslationEngineService.cs @@ -212,16 +212,17 @@ await _parallelCorpusService.PreprocessAsync( { CorpusId = corpusId, TextId = row.TextId, - SourceRefs = row.SourceRefs.Select(r => r.ToString()!).ToArray(), - TargetRefs = row.TargetRefs.Select(r => r.ToString()!).ToArray(), + SourceRefs = [.. row.SourceRefs.Select(r => r.ToString()!)], + TargetRefs = [.. row.TargetRefs.Select(r => r.ToString()!)], Translation = row.SourceSegment, SourceTokens = tokens, TranslationTokens = tokens, - Alignment = tokens - .Select( + Alignment = + [ + .. tokens.Select( (_, i) => new AlignedWordPairContract { SourceIndex = i, TargetIndex = i } - ) - .ToList(), + ), + ], Confidence = 1.0, } ); diff --git a/src/Machine/src/Serval.Machine.Shared/Models/Pretranslation.cs b/src/Machine/src/Serval.Machine.Shared/Models/Pretranslation.cs index a1ea6e74..6199cd8a 100644 --- a/src/Machine/src/Serval.Machine.Shared/Models/Pretranslation.cs +++ b/src/Machine/src/Serval.Machine.Shared/Models/Pretranslation.cs @@ -7,8 +7,8 @@ public record Pretranslation public required IReadOnlyList SourceRefs { get; init; } public required IReadOnlyList TargetRefs { get; init; } public required string Translation { get; init; } - public IEnumerable? SourceTokens { get; init; } - public IEnumerable? TranslationTokens { get; init; } - public IReadOnlyList? Alignment { get; init; } + public required IEnumerable SourceTokens { get; init; } + public required IEnumerable TranslationTokens { get; init; } + public required IReadOnlyList Alignment { get; init; } public double Confidence { get; init; } } diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ServalTranslationPlatformService.cs b/src/Machine/src/Serval.Machine.Shared/Services/ServalTranslationPlatformService.cs index 1cd41afa..8581e701 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/ServalTranslationPlatformService.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/ServalTranslationPlatformService.cs @@ -137,15 +137,16 @@ [EnumeratorCancellation] CancellationToken cancellationToken SourceRefs = pretranslation.SourceRefs, TargetRefs = pretranslation.TargetRefs, Translation = pretranslation.Translation, - SourceTokens = pretranslation.SourceTokens?.ToList(), - TranslationTokens = pretranslation.TranslationTokens?.ToList(), - Alignment = pretranslation - .Alignment?.Select(a => new AlignedWordPairContract + SourceTokens = [.. pretranslation.SourceTokens], + TranslationTokens = [.. pretranslation.TranslationTokens], + Alignment = + [ + .. pretranslation.Alignment.Select(a => new AlignedWordPairContract { SourceIndex = a.SourceIndex, TargetIndex = a.TargetIndex, - }) - .ToList(), + }), + ], Confidence = pretranslation.Confidence, }; } @@ -190,6 +191,7 @@ JsonSerializerOptions options textId = reader.GetString()!; break; case "refs": + // Obsolete May 2026 reader.Read(); targetRefs = JsonSerializer.Deserialize>(ref reader, options)!.ToArray(); break; diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ServalWordAlignmentPlatformService.cs b/src/Machine/src/Serval.Machine.Shared/Services/ServalWordAlignmentPlatformService.cs index 72364dcf..204daca6 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/ServalWordAlignmentPlatformService.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/ServalWordAlignmentPlatformService.cs @@ -140,14 +140,15 @@ [EnumeratorCancellation] CancellationToken cancellationToken TargetRefs = record.TargetRefs, SourceTokens = record.SourceTokens, TargetTokens = record.TargetTokens, - Alignment = record - .Alignment.Select(a => new AlignedWordPairContract + Alignment = + [ + .. record.Alignment.Select(a => new AlignedWordPairContract { SourceIndex = a.SourceIndex, TargetIndex = a.TargetIndex, Score = a.TranslationScore, - }) - .ToList(), + }), + ], }; } } @@ -187,6 +188,7 @@ JsonSerializerOptions options textId = reader.GetString()!; break; case "refs": + // Obsolete May 2026 reader.Read(); targetRefs = JsonSerializer.Deserialize>(ref reader, options)!.ToArray(); break; diff --git a/src/Machine/src/Serval.Machine.Shared/Services/TranslationPreprocessBuildJob.cs b/src/Machine/src/Serval.Machine.Shared/Services/TranslationPreprocessBuildJob.cs index 3e9e2cca..606c11a2 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/TranslationPreprocessBuildJob.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/TranslationPreprocessBuildJob.cs @@ -82,7 +82,11 @@ await ParallelCorpusService.PreprocessAsync( pretranslateWriter.WriteStartObject(); pretranslateWriter.WriteString("corpusId", corpusId); pretranslateWriter.WriteString("textId", row.TextId); - pretranslateWriter.WriteStartArray("refs"); + pretranslateWriter.WriteStartArray("sourceRefs"); + foreach (object rowRef in row.SourceRefs) + pretranslateWriter.WriteStringValue(rowRef.ToString()); + pretranslateWriter.WriteEndArray(); + pretranslateWriter.WriteStartArray("targetRefs"); foreach (object rowRef in row.TargetRefs) pretranslateWriter.WriteStringValue(rowRef.ToString()); pretranslateWriter.WriteEndArray(); diff --git a/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentPreprocessBuildJob.cs b/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentPreprocessBuildJob.cs index a782dbb6..ae3dc044 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentPreprocessBuildJob.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentPreprocessBuildJob.cs @@ -82,7 +82,11 @@ await ParallelCorpusService.PreprocessAsync( wordAlignmentWriter.WriteStartObject(); wordAlignmentWriter.WriteString("corpusId", corpusId); wordAlignmentWriter.WriteString("textId", row.TextId); - wordAlignmentWriter.WriteStartArray("refs"); + wordAlignmentWriter.WriteStartArray("sourceRefs"); + foreach (object rowRef in row.SourceRefs) + wordAlignmentWriter.WriteStringValue(rowRef.ToString()); + wordAlignmentWriter.WriteEndArray(); + wordAlignmentWriter.WriteStartArray("targetRefs"); foreach (object rowRef in row.TargetRefs) wordAlignmentWriter.WriteStringValue(rowRef.ToString()); wordAlignmentWriter.WriteEndArray(); diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Services/NmtEngineServiceTests.cs b/src/Machine/test/Serval.Machine.Shared.Tests/Services/NmtEngineServiceTests.cs index 1a087da6..e04f0219 100644 --- a/src/Machine/test/Serval.Machine.Shared.Tests/Services/NmtEngineServiceTests.cs +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Services/NmtEngineServiceTests.cs @@ -1,5 +1,3 @@ -using Serval.Translation.Contracts; - namespace Serval.Machine.Shared.Services; [TestFixture] diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Services/ServalTranslationPlatformServiceTests.cs b/src/Machine/test/Serval.Machine.Shared.Tests/Services/ServalTranslationPlatformServiceTests.cs new file mode 100644 index 00000000..cd5e4a36 --- /dev/null +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Services/ServalTranslationPlatformServiceTests.cs @@ -0,0 +1,144 @@ +namespace Serval.Machine.Shared.Services; + +[TestFixture] +public class ServalTranslationPlatformServiceTests +{ + [Test] + public async Task InsertInferenceResultsAsync_Refs() + { + var env = new TestEnvironment(); + await using (var stream = new MemoryStream()) + { + await JsonSerializer.SerializeAsync( + stream, + new JsonArray + { + new JsonObject + { + ["corpusId"] = "corpus1", + ["textId"] = "MAT", + ["refs"] = new JsonArray { "MAT 1:1" }, + ["translation"] = "translation", + ["sequenceConfidence"] = 0.5, + }, + } + ); + stream.Seek(0, SeekOrigin.Begin); + await env.Service.InsertInferenceResultsAsync("engine1", stream); + } + + await env + .PlatformService.Received() + .InsertPretranslationsAsync( + "engine1", + Arg.Any>(), + Arg.Any() + ); + Assert.That(env.PretranslationContracts, Has.Count.EqualTo(1)); + Assert.That( + env.PretranslationContracts[0], + Is.EqualTo( + new PretranslationContract + { + CorpusId = "corpus1", + TextId = "MAT", + SourceRefs = [], + TargetRefs = ["MAT 1:1"], + Translation = "translation", + SourceTokens = [], + TranslationTokens = [], + Alignment = [], + Confidence = 0.5, + } + ) + .UsingPropertiesComparer() + ); + } + + [Test] + public async Task InsertInferenceResultsAsync_SourceAndTargetRefs() + { + var env = new TestEnvironment(); + await using (var stream = new MemoryStream()) + { + await JsonSerializer.SerializeAsync( + stream, + new JsonArray + { + new JsonObject + { + ["corpusId"] = "corpus1", + ["textId"] = "MAT", + ["sourceRefs"] = new JsonArray { "MAT 1:1" }, + ["targetRefs"] = new JsonArray { "MAT 1:2" }, + ["sourceTokens"] = new JsonArray { "sourceToken1" }, + ["translationTokens"] = new JsonArray { "translationToken1" }, + ["translation"] = "translation", + ["alignment"] = "0-0", + }, + } + ); + stream.Seek(0, SeekOrigin.Begin); + await env.Service.InsertInferenceResultsAsync("engine1", stream); + } + + await env + .PlatformService.Received() + .InsertPretranslationsAsync( + "engine1", + Arg.Any>(), + Arg.Any() + ); + Assert.That(env.PretranslationContracts, Has.Count.EqualTo(1)); + Assert.That( + env.PretranslationContracts[0], + Is.EqualTo( + new PretranslationContract + { + CorpusId = "corpus1", + TextId = "MAT", + SourceRefs = ["MAT 1:1"], + TargetRefs = ["MAT 1:2"], + Translation = "translation", + SourceTokens = ["sourceToken1"], + TranslationTokens = ["translationToken1"], + Alignment = [new AlignedWordPairContract { SourceIndex = 0, TargetIndex = 0 }], + Confidence = 0.0, + } + ) + .UsingPropertiesComparer() + ); + } + + private class TestEnvironment + { + public TestEnvironment() + { + PlatformService = Substitute.For(); + PlatformService + .InsertPretranslationsAsync( + Arg.Any(), + Arg.Any>(), + Arg.Any() + ) + .Returns(async ci => + { + PretranslationContracts.Clear(); + await foreach ( + PretranslationContract pretranslationContract in ci.Arg< + IAsyncEnumerable + >() + ) + { + PretranslationContracts.Add(pretranslationContract); + } + }); + + Service = new ServalTranslationPlatformService(PlatformService); + } + + public ServalTranslationPlatformService Service { get; } + public ITranslationPlatformService PlatformService { get; } + public List PretranslationContracts { get; } = []; + } +} diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Services/ServalWordAlignmentPlatformServiceTests.cs b/src/Machine/test/Serval.Machine.Shared.Tests/Services/ServalWordAlignmentPlatformServiceTests.cs new file mode 100644 index 00000000..64a215e9 --- /dev/null +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Services/ServalWordAlignmentPlatformServiceTests.cs @@ -0,0 +1,154 @@ +namespace Serval.Machine.Shared.Services; + +[TestFixture] +public class ServalWordAlignmentPlatformServiceTests +{ + [Test] + public async Task InsertWordAlignmentsAsync_Refs() + { + var env = new TestEnvironment(); + await using (var stream = new MemoryStream()) + { + await JsonSerializer.SerializeAsync( + stream, + new JsonArray + { + new JsonObject + { + ["corpusId"] = "corpus1", + ["textId"] = "MAT", + ["refs"] = new JsonArray { "MAT 1:1" }, + ["sourceTokens"] = new JsonArray { "sourceToken1" }, + ["targetTokens"] = new JsonArray { "targetToken1" }, + ["alignment"] = "0-0:1.0:1.0", + }, + } + ); + stream.Seek(0, SeekOrigin.Begin); + await env.Service.InsertInferenceResultsAsync("engine1", stream); + } + + await env + .PlatformService.Received() + .InsertWordAlignmentsAsync( + "engine1", + Arg.Any>(), + Arg.Any() + ); + Assert.That(env.WordAlignmentContracts, Has.Count.EqualTo(1)); + Assert.That( + env.WordAlignmentContracts[0], + Is.EqualTo( + new WordAlignmentContract + { + CorpusId = "corpus1", + TextId = "MAT", + SourceRefs = [], + TargetRefs = ["MAT 1:1"], + SourceTokens = ["sourceToken1"], + TargetTokens = ["targetToken1"], + Alignment = + [ + new AlignedWordPairContract + { + SourceIndex = 0, + TargetIndex = 0, + Score = 1.0, + }, + ], + } + ) + .UsingPropertiesComparer() + ); + } + + [Test] + public async Task InsertWordAlignmentsAsync_SourceAndTargetRefs() + { + var env = new TestEnvironment(); + await using (var stream = new MemoryStream()) + { + await JsonSerializer.SerializeAsync( + stream, + new JsonArray + { + new JsonObject + { + ["corpusId"] = "corpus1", + ["textId"] = "MAT", + ["sourceRefs"] = new JsonArray { "MAT 1:1" }, + ["targetRefs"] = new JsonArray { "MAT 1:2" }, + ["sourceTokens"] = new JsonArray { "sourceToken1" }, + ["targetTokens"] = new JsonArray { "targetToken1" }, + ["alignment"] = "0-0:1.0:1.0", + }, + } + ); + stream.Seek(0, SeekOrigin.Begin); + await env.Service.InsertInferenceResultsAsync("engine1", stream); + } + + await env + .PlatformService.Received() + .InsertWordAlignmentsAsync( + "engine1", + Arg.Any>(), + Arg.Any() + ); + Assert.That(env.WordAlignmentContracts, Has.Count.EqualTo(1)); + Assert.That( + env.WordAlignmentContracts[0], + Is.EqualTo( + new WordAlignmentContract + { + CorpusId = "corpus1", + TextId = "MAT", + SourceRefs = ["MAT 1:1"], + TargetRefs = ["MAT 1:2"], + SourceTokens = ["sourceToken1"], + TargetTokens = ["targetToken1"], + Alignment = + [ + new AlignedWordPairContract + { + SourceIndex = 0, + TargetIndex = 0, + Score = 1.0, + }, + ], + } + ) + .UsingPropertiesComparer() + ); + } + + private class TestEnvironment + { + public TestEnvironment() + { + PlatformService = Substitute.For(); + PlatformService + .InsertWordAlignmentsAsync( + Arg.Any(), + Arg.Any>(), + Arg.Any() + ) + .Returns(async ci => + { + WordAlignmentContracts.Clear(); + await foreach ( + WordAlignmentContract wordAlignmentContract in ci.Arg>() + ) + { + WordAlignmentContracts.Add(wordAlignmentContract); + } + }); + + Service = new ServalWordAlignmentPlatformService(PlatformService); + } + + public ServalWordAlignmentPlatformService Service { get; } + public IWordAlignmentPlatformService PlatformService { get; } + public List WordAlignmentContracts { get; } = []; + } +} diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Services/SmtTransferEngineServiceTests.cs b/src/Machine/test/Serval.Machine.Shared.Tests/Services/SmtTransferEngineServiceTests.cs index b6ba9903..78f1965a 100644 --- a/src/Machine/test/Serval.Machine.Shared.Tests/Services/SmtTransferEngineServiceTests.cs +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Services/SmtTransferEngineServiceTests.cs @@ -1,5 +1,3 @@ -using Serval.Translation.Contracts; - namespace Serval.Machine.Shared.Services; [TestFixture] diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Services/StatisticalEngineServiceTests.cs b/src/Machine/test/Serval.Machine.Shared.Tests/Services/StatisticalEngineServiceTests.cs index 92099c77..b4eecb94 100644 --- a/src/Machine/test/Serval.Machine.Shared.Tests/Services/StatisticalEngineServiceTests.cs +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Services/StatisticalEngineServiceTests.cs @@ -1,5 +1,3 @@ -using Serval.WordAlignment.Contracts; - namespace Serval.Machine.Shared.Services; [TestFixture] diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Usings.cs b/src/Machine/test/Serval.Machine.Shared.Tests/Usings.cs index 1ae4c6ce..c3740e91 100644 --- a/src/Machine/test/Serval.Machine.Shared.Tests/Usings.cs +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Usings.cs @@ -1,4 +1,5 @@ -global using System.Text.Json; +global using System.Text.Json; +global using System.Text.Json.Nodes; global using Microsoft.Extensions.DependencyInjection; global using Microsoft.Extensions.Hosting; global using Microsoft.Extensions.Hosting.Internal; @@ -13,6 +14,8 @@ global using Serval.Machine.Shared.Configuration; global using Serval.Machine.Shared.Models; global using Serval.Shared.Contracts; +global using Serval.Translation.Contracts; +global using Serval.WordAlignment.Contracts; global using SIL.DataAccess; global using SIL.Machine.Annotations; global using SIL.Machine.Corpora; diff --git a/src/Serval/src/Serval.ApiServer/appsettings.json b/src/Serval/src/Serval.ApiServer/appsettings.json index 14231ca8..170a8f59 100644 --- a/src/Serval/src/Serval.ApiServer/appsettings.json +++ b/src/Serval/src/Serval.ApiServer/appsettings.json @@ -30,19 +30,19 @@ "EngineType": "Nmt", "ModelType": "huggingface", "Queue": "jobs_backlog", - "DockerImage": "ghcr.io/sillsdev/machine.py:1.12.0" + "DockerImage": "ghcr.io/sillsdev/machine.py:1.13.0" }, { "EngineType": "SmtTransfer", "ModelType": "thot", "Queue": "jobs_backlog.cpu_only", - "DockerImage": "ghcr.io/sillsdev/machine.py:1.12.0.cpu_only" + "DockerImage": "ghcr.io/sillsdev/machine.py:1.13.0.cpu_only" }, { "EngineType": "Statistical", "ModelType": "thot", "Queue": "jobs_backlog.cpu_only", - "DockerImage": "ghcr.io/sillsdev/machine.py:1.12.0.cpu_only" + "DockerImage": "ghcr.io/sillsdev/machine.py:1.13.0.cpu_only" } ] }, diff --git a/src/Serval/src/Serval.Translation.Contracts/PretranslationContract.cs b/src/Serval/src/Serval.Translation.Contracts/PretranslationContract.cs index cd0cdc01..ec7914d8 100644 --- a/src/Serval/src/Serval.Translation.Contracts/PretranslationContract.cs +++ b/src/Serval/src/Serval.Translation.Contracts/PretranslationContract.cs @@ -7,8 +7,8 @@ public record PretranslationContract public required IReadOnlyList SourceRefs { get; init; } public required IReadOnlyList TargetRefs { get; init; } public required string Translation { get; init; } - public IReadOnlyList? SourceTokens { get; init; } - public IReadOnlyList? TranslationTokens { get; init; } - public IReadOnlyList? Alignment { get; init; } + public required IReadOnlyList SourceTokens { get; init; } + public required IReadOnlyList TranslationTokens { get; init; } + public required IReadOnlyList Alignment { get; init; } public double? Confidence { get; init; } } diff --git a/src/Serval/test/Serval.Translation.Tests/Services/PlatformServiceTests.cs b/src/Serval/test/Serval.Translation.Tests/Services/PlatformServiceTests.cs index 8a0704a7..25153972 100644 --- a/src/Serval/test/Serval.Translation.Tests/Services/PlatformServiceTests.cs +++ b/src/Serval/test/Serval.Translation.Tests/Services/PlatformServiceTests.cs @@ -285,6 +285,9 @@ private static async IAsyncEnumerable GetTestPretranslat SourceRefs = ["ref1"], TargetRefs = ["ref1"], Translation = "test", + SourceTokens = [], + TranslationTokens = [], + Alignment = [], }; await Task.CompletedTask; }