Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ services:
- "ClearML__AccessKey=${ClearML_AccessKey:?access key needed}"
- "ClearML__SecretKey=${ClearML_SecretKey:?secret key needed}"
- BuildJob__ClearML__0__Queue=${CLEARML_GPU_QUEUE:-lambert_24gb}
- BuildJob__ClearML__0__DockerImage=${MACHINE_PY_IMAGE:-ghcr.io/sillsdev/machine.py:1.12.0}
- BuildJob__ClearML__0__DockerImage=${MACHINE_PY_IMAGE:-ghcr.io/sillsdev/machine.py:1.13.0}
- BuildJob__ClearML__1__Queue=${CLEARML_CPU_QUEUE:-lambert_24gb.cpu_only}
- BuildJob__ClearML__1__DockerImage=${MACHINE_PY_CPU_IMAGE:-ghcr.io/sillsdev/machine.py:1.12.0.cpu_only}
- BuildJob__ClearML__1__DockerImage=${MACHINE_PY_CPU_IMAGE:-ghcr.io/sillsdev/machine.py:1.13.0.cpu_only}
- BuildJob__ClearML__2__Queue=${CLEARML_CPU_QUEUE:-lambert_24gb.cpu_only}
- BuildJob__ClearML__2__DockerImage=${MACHINE_PY_CPU_IMAGE:-ghcr.io/sillsdev/machine.py:1.12.0.cpu_only}
- BuildJob__ClearML__2__DockerImage=${MACHINE_PY_CPU_IMAGE:-ghcr.io/sillsdev/machine.py:1.13.0.cpu_only}
- SharedFile__Uri=s3://silnlp/docker-compose/
- "SharedFile__S3AccessKeyId=${AWS_ACCESS_KEY_ID:?access key needed}"
- "SharedFile__S3SecretAccessKey=${AWS_SECRET_ACCESS_KEY:?secret key needed}"
Expand Down
13 changes: 7 additions & 6 deletions src/Echo/src/EchoEngine/TranslationEngineService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -212,16 +212,17 @@ await _parallelCorpusService.PreprocessAsync(
{
CorpusId = corpusId,
TextId = row.TextId,
SourceRefs = row.SourceRefs.Select(r => r.ToString()!).ToArray(),
TargetRefs = row.TargetRefs.Select(r => r.ToString()!).ToArray(),
SourceRefs = [.. row.SourceRefs.Select(r => r.ToString()!)],
TargetRefs = [.. row.TargetRefs.Select(r => r.ToString()!)],
Translation = row.SourceSegment,
SourceTokens = tokens,
TranslationTokens = tokens,
Alignment = tokens
.Select(
Alignment =
[
.. tokens.Select(
(_, i) => new AlignedWordPairContract { SourceIndex = i, TargetIndex = i }
)
.ToList(),
),
],
Confidence = 1.0,
}
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ public record Pretranslation
public required IReadOnlyList<string> SourceRefs { get; init; }
public required IReadOnlyList<string> TargetRefs { get; init; }
public required string Translation { get; init; }
public IEnumerable<string>? SourceTokens { get; init; }
public IEnumerable<string>? TranslationTokens { get; init; }
public IReadOnlyList<AlignedWordPair>? Alignment { get; init; }
public required IEnumerable<string> SourceTokens { get; init; }
public required IEnumerable<string> TranslationTokens { get; init; }
public required IReadOnlyList<AlignedWordPair> Alignment { get; init; }
public double Confidence { get; init; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -137,15 +137,16 @@ [EnumeratorCancellation] CancellationToken cancellationToken
SourceRefs = pretranslation.SourceRefs,
TargetRefs = pretranslation.TargetRefs,
Translation = pretranslation.Translation,
SourceTokens = pretranslation.SourceTokens?.ToList(),
TranslationTokens = pretranslation.TranslationTokens?.ToList(),
Alignment = pretranslation
.Alignment?.Select(a => new AlignedWordPairContract
SourceTokens = [.. pretranslation.SourceTokens],
TranslationTokens = [.. pretranslation.TranslationTokens],
Alignment =
[
.. pretranslation.Alignment.Select(a => new AlignedWordPairContract
{
SourceIndex = a.SourceIndex,
TargetIndex = a.TargetIndex,
})
.ToList(),
}),
],
Confidence = pretranslation.Confidence,
};
}
Expand Down Expand Up @@ -190,6 +191,7 @@ JsonSerializerOptions options
textId = reader.GetString()!;
break;
case "refs":
// Obsolete May 2026
reader.Read();
targetRefs = JsonSerializer.Deserialize<IList<string>>(ref reader, options)!.ToArray();
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,14 +140,15 @@ [EnumeratorCancellation] CancellationToken cancellationToken
TargetRefs = record.TargetRefs,
SourceTokens = record.SourceTokens,
TargetTokens = record.TargetTokens,
Alignment = record
.Alignment.Select(a => new AlignedWordPairContract
Alignment =
[
.. record.Alignment.Select(a => new AlignedWordPairContract
{
SourceIndex = a.SourceIndex,
TargetIndex = a.TargetIndex,
Score = a.TranslationScore,
})
.ToList(),
}),
],
};
}
}
Expand Down Expand Up @@ -187,6 +188,7 @@ JsonSerializerOptions options
textId = reader.GetString()!;
break;
case "refs":
// Obsolete May 2026
reader.Read();
targetRefs = JsonSerializer.Deserialize<IList<string>>(ref reader, options)!.ToArray();
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,11 @@ await ParallelCorpusService.PreprocessAsync(
pretranslateWriter.WriteStartObject();
pretranslateWriter.WriteString("corpusId", corpusId);
pretranslateWriter.WriteString("textId", row.TextId);
pretranslateWriter.WriteStartArray("refs");
pretranslateWriter.WriteStartArray("sourceRefs");
foreach (object rowRef in row.SourceRefs)
pretranslateWriter.WriteStringValue(rowRef.ToString());
pretranslateWriter.WriteEndArray();
pretranslateWriter.WriteStartArray("targetRefs");
foreach (object rowRef in row.TargetRefs)
pretranslateWriter.WriteStringValue(rowRef.ToString());
pretranslateWriter.WriteEndArray();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,11 @@ await ParallelCorpusService.PreprocessAsync(
wordAlignmentWriter.WriteStartObject();
wordAlignmentWriter.WriteString("corpusId", corpusId);
wordAlignmentWriter.WriteString("textId", row.TextId);
wordAlignmentWriter.WriteStartArray("refs");
wordAlignmentWriter.WriteStartArray("sourceRefs");
foreach (object rowRef in row.SourceRefs)
wordAlignmentWriter.WriteStringValue(rowRef.ToString());
wordAlignmentWriter.WriteEndArray();
wordAlignmentWriter.WriteStartArray("targetRefs");
foreach (object rowRef in row.TargetRefs)
wordAlignmentWriter.WriteStringValue(rowRef.ToString());
wordAlignmentWriter.WriteEndArray();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
using Serval.Translation.Contracts;

namespace Serval.Machine.Shared.Services;

[TestFixture]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
namespace Serval.Machine.Shared.Services;

[TestFixture]
public class ServalTranslationPlatformServiceTests
{
[Test]
public async Task InsertInferenceResultsAsync_Refs()
{
var env = new TestEnvironment();
await using (var stream = new MemoryStream())
{
await JsonSerializer.SerializeAsync(
stream,
new JsonArray
{
new JsonObject
{
["corpusId"] = "corpus1",
["textId"] = "MAT",
["refs"] = new JsonArray { "MAT 1:1" },
["translation"] = "translation",
["sequenceConfidence"] = 0.5,
},
}
);
stream.Seek(0, SeekOrigin.Begin);
await env.Service.InsertInferenceResultsAsync("engine1", stream);
}

await env
.PlatformService.Received()
.InsertPretranslationsAsync(
"engine1",
Arg.Any<IAsyncEnumerable<PretranslationContract>>(),
Arg.Any<CancellationToken>()
);
Assert.That(env.PretranslationContracts, Has.Count.EqualTo(1));
Assert.That(
env.PretranslationContracts[0],
Is.EqualTo(
new PretranslationContract
{
CorpusId = "corpus1",
TextId = "MAT",
SourceRefs = [],
TargetRefs = ["MAT 1:1"],
Translation = "translation",
SourceTokens = [],
TranslationTokens = [],
Alignment = [],
Confidence = 0.5,
}
)
.UsingPropertiesComparer()
);
}

[Test]
public async Task InsertInferenceResultsAsync_SourceAndTargetRefs()
{
var env = new TestEnvironment();
await using (var stream = new MemoryStream())
{
await JsonSerializer.SerializeAsync(
stream,
new JsonArray
{
new JsonObject
{
["corpusId"] = "corpus1",
["textId"] = "MAT",
["sourceRefs"] = new JsonArray { "MAT 1:1" },
["targetRefs"] = new JsonArray { "MAT 1:2" },
["sourceTokens"] = new JsonArray { "sourceToken1" },
["translationTokens"] = new JsonArray { "translationToken1" },
["translation"] = "translation",
["alignment"] = "0-0",
},
}
);
stream.Seek(0, SeekOrigin.Begin);
await env.Service.InsertInferenceResultsAsync("engine1", stream);
}

await env
.PlatformService.Received()
.InsertPretranslationsAsync(
"engine1",
Arg.Any<IAsyncEnumerable<PretranslationContract>>(),
Arg.Any<CancellationToken>()
);
Assert.That(env.PretranslationContracts, Has.Count.EqualTo(1));
Assert.That(
env.PretranslationContracts[0],
Is.EqualTo(
new PretranslationContract
{
CorpusId = "corpus1",
TextId = "MAT",
SourceRefs = ["MAT 1:1"],
TargetRefs = ["MAT 1:2"],
Translation = "translation",
SourceTokens = ["sourceToken1"],
TranslationTokens = ["translationToken1"],
Alignment = [new AlignedWordPairContract { SourceIndex = 0, TargetIndex = 0 }],
Confidence = 0.0,
}
)
.UsingPropertiesComparer()
);
}

private class TestEnvironment
{
public TestEnvironment()
{
PlatformService = Substitute.For<ITranslationPlatformService>();
PlatformService
.InsertPretranslationsAsync(
Arg.Any<string>(),
Arg.Any<IAsyncEnumerable<PretranslationContract>>(),
Arg.Any<CancellationToken>()
)
.Returns(async ci =>
{
PretranslationContracts.Clear();
await foreach (
PretranslationContract pretranslationContract in ci.Arg<
IAsyncEnumerable<PretranslationContract>
>()
)
{
PretranslationContracts.Add(pretranslationContract);
}
});

Service = new ServalTranslationPlatformService(PlatformService);
}

public ServalTranslationPlatformService Service { get; }
public ITranslationPlatformService PlatformService { get; }
public List<PretranslationContract> PretranslationContracts { get; } = [];
}
}
Loading
Loading