Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions documentation/wiki/ChangeWaves.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ Change wave checks around features will be removed in the release that accompani
### 18.8
- [RAR task: across multiple input properties, resolve relative paths against the project directory (not the process current directory)](https://github.com/dotnet/msbuild/pull/13319)
- [Console, parallel console, and terminal loggers print the paths of log files written by registered loggers (e.g. file logger and binary logger) as part of the end-of-build summary.](https://github.com/dotnet/msbuild/pull/13577)
- [ToolTask and Exec now read tool stdout/stderr using the system ANSI code page (GetACP) instead of OEM (GetOEMCP), fixing garbled output from native tools on Western locales (e.g., MSVC link.exe on French Windows). Some older console-oriented tools still emit OEM-encoded output and may now be misdecoded; if this affects you, you can temporarily opt out with `MSBUILDDISABLEFEATURESFROMVERSION=18.8`, or for `Exec` specifically set `StdOutEncoding`/`StdErrEncoding` explicitly.](https://github.com/dotnet/msbuild/issues/12290)

### 18.7
- [Copy task retries on ERROR_ACCESS_DENIED on non-Windows platforms to handle transient lock conflicts (e.g. macOS CoW filesystems)](https://github.com/dotnet/msbuild/issues/13463)
Expand Down
54 changes: 54 additions & 0 deletions src/Framework/EncodingUtilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ internal static class EncodingUtilities
internal static readonly Encoding Utf8WithoutBom = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false);

private static Encoding s_currentOemEncoding;
private static Encoding s_currentAnsiEncoding;

internal const string UseUtf8Always = "ALWAYS";
internal const string UseUtf8Never = "NEVER";
Expand Down Expand Up @@ -86,6 +87,59 @@ internal static Encoding CurrentSystemOemEncoding
}
}

/// <summary>
/// Get the current system locale code page, ANSI version (GetACP). ANSI code pages are used
/// by most native Windows tools for string resources compiled into their binaries.
/// This differs from OEM code pages which are used for console I/O historically.
/// For example, on a French Windows system: ANSI = CP1252, OEM = CP850.
///
/// Many native build tools (e.g., MSVC v141 link.exe, cl.exe) write output using the ANSI
/// code page rather than the OEM code page, so MSBuild must read with ANSI to decode correctly.
/// See: https://github.com/dotnet/msbuild/issues/12290
/// </summary>
internal static Encoding CurrentSystemAnsiEncoding
{
get
{
if (s_currentAnsiEncoding != null)
{
return s_currentAnsiEncoding;
}

#if FEATURE_ENCODING_DEFAULT
// On .NET Framework, Encoding.Default returns the system ANSI code page (GetACP()).
// e.g., CP1252 for Western European Windows. This is what most native Windows tools
// use when writing string resources to stdout.
s_currentAnsiEncoding = Encoding.Default;
#else
// On .NET Core/5+, Encoding.Default is always UTF-8, so we must explicitly call GetACP().
s_currentAnsiEncoding = Encoding.UTF8; // fallback if GetACP() fails or on non-Windows

try
{
#if FEATURE_WINDOWSINTEROP
if (NativeMethods.IsWindows)
{
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
// GetACP() returns the system ANSI code page (e.g., 1252 for French Windows).
s_currentAnsiEncoding = Encoding.GetEncoding((int)PInvoke.GetACP());
}
#endif
}
catch (ArgumentException ex)
{
Debug.Assert(false, "GetEncoding(system ANSI code page) threw an ArgumentException in EncodingUtilities.CurrentSystemAnsiEncoding! Please log a bug against MSBuild.", ex.Message);
}
catch (NotSupportedException ex)
{
Debug.Assert(false, "GetEncoding(system ANSI code page) threw a NotSupportedException in EncodingUtilities.CurrentSystemAnsiEncoding! Please log a bug against MSBuild.", ex.Message);
}
#endif

return s_currentAnsiEncoding;
}
}

/// <summary>
/// Checks two encoding types to determine if they are similar to each other (equal or if
/// the Encoding Name is the same).
Expand Down
1 change: 1 addition & 0 deletions src/Framework/NativeMethods.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ GetLogicalProcessorInformationEx
GetLongPathName
GetModuleFileName
GetNativeSystemInfo
GetACP
GetOEMCP
GetProcAddress
GetRunningObjectTable
Expand Down
73 changes: 66 additions & 7 deletions src/Tasks.UnitTests/Exec_Tests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -494,12 +494,16 @@ public void ExecTaskUnicodeCharacterInCommand()
}

/// <summary>
/// Tests that Exec task will choose the default code page when UTF8 is not needed.
/// Tests that Exec task uses ANSI (GetACP) encoding by default when the command contains
/// only ASCII characters (no UTF-8 batch file override needed).
/// See: https://github.com/dotnet/msbuild/issues/12290
/// </summary>
[Fact]
public void ExecTaskWithoutUnicodeCharacterInCommand()
{
RunExec(false, EncodingUtilities.CurrentSystemOemEncoding.EncodingName);
// Default stdout encoding is ANSI (GetACP) so that native tool output (e.g., MSVC v141
// link.exe on French Windows) is decoded correctly instead of garbled as OEM-850.
RunExec(false, EncodingUtilities.CurrentSystemAnsiEncoding.EncodingName);
}

/// <summary>
Expand All @@ -521,26 +525,29 @@ public void ExecTaskUtf8AlwaysWithAnsi()
}

/// <summary>
/// Exec task will NOT use UTF8 when UTF8 Never is specified and non-ANSI characters are in the Command
/// <remarks>Exec task will fail as the cmd processor will not be able to run the command.</remarks>
/// Exec task will NOT use UTF8 when UTF8 Never is specified and non-ANSI characters are in the Command.
/// UseUtf8=Never only controls the batch file encoding (stays OEM, no chcp); the stdout reading
/// encoding remains ANSI (GetACP) independently of the batch file encoding.
/// <remarks>Exec task will fail as the cmd processor cannot run the garbled batch file.</remarks>
/// </summary>
[WindowsOnlyTheory]
[InlineData("Never")]
[InlineData("System")]
public void ExecTaskUtf8NeverWithNonAnsi(string useUtf8)
{
RunExec(true, EncodingUtilities.CurrentSystemOemEncoding.EncodingName, useUtf8, false);
RunExec(true, EncodingUtilities.CurrentSystemAnsiEncoding.EncodingName, useUtf8, false);
}

/// <summary>
/// Exec task will NOT use UTF8 when UTF8 Never is specified and only ANSI characters are in the Command
/// Exec task will NOT use UTF8 when UTF8 Never is specified and only ANSI characters are in the Command.
/// UseUtf8=Never controls the batch file encoding only; stdout reading encoding remains ANSI (GetACP).
/// </summary>
[Theory]
[InlineData("Never")]
[InlineData("System")]
public void ExecTaskUtf8NeverWithAnsi(string useUtf8)
{
RunExec(false, EncodingUtilities.CurrentSystemOemEncoding.EncodingName, useUtf8);
RunExec(false, EncodingUtilities.CurrentSystemAnsiEncoding.EncodingName, useUtf8);
}

[Theory]
Expand Down Expand Up @@ -1178,6 +1185,58 @@ public void Exec_RelativeWorkingDirectory_ResolvedAgainstProjectDirectory()
exec.ValidateParametersAccessor().ShouldBeTrue();
exec.GetWorkingDirectoryAccessor().ShouldBe(Path.Combine(projectDir.Path, "builddir"));
}

#region stdout/stderr encoding tests

/// <summary>
/// Exec task constructor initialises stdout/stderr encoding to ANSI (GetACP) — not OEM (GetOEMCP).
/// This fixes garbled output from native tools (e.g., MSVC v141 link.exe on French Windows)
/// that write string resources using the ANSI code page, not OEM.
/// See: https://github.com/dotnet/msbuild/issues/12290
/// </summary>
[WindowsOnlyFact]
public void ExecTask_DefaultStdEncodingIsAnsi()
{
ChangeWaves.ResetStateForTests();
Exec exec = PrepareExec("echo test");
exec.StdOutEncoding.ShouldBe(EncodingUtilities.CurrentSystemAnsiEncoding.EncodingName);
exec.StdErrEncoding.ShouldBe(EncodingUtilities.CurrentSystemAnsiEncoding.EncodingName);
}
Comment thread
huulinhnguyen-dev marked this conversation as resolved.

/// <summary>
/// When Wave18_8 is opted out, Exec falls back to OEM encoding — preserving pre-Wave18_8 behaviour.
/// </summary>
[WindowsOnlyFact]
public void ExecTask_Wave18_8_OptedOut_DefaultEncodingIsOem()
{
try
{
using TestEnvironment env = TestEnvironment.Create(_output);
ChangeWaves.ResetStateForTests();
env.SetEnvironmentVariable("MSBUILDDISABLEFEATURESFROMVERSION", ChangeWaves.Wave18_8.ToString());

Exec exec = PrepareExec("echo test");
exec.StdOutEncoding.ShouldBe(EncodingUtilities.CurrentSystemOemEncoding.EncodingName);
exec.StdErrEncoding.ShouldBe(EncodingUtilities.CurrentSystemOemEncoding.EncodingName);
}
finally
{
ChangeWaves.ResetStateForTests();
}
}

/// <summary>
/// UseUtf8Encoding=Always overrides the ANSI default: when the batch file needs UTF-8
/// (chcp 65001 is injected), the stdout encoding is also switched to UTF-8.
/// UseUtf8 controls the batch-file encoding; the chcp injection then also changes stdout reading.
/// </summary>
[WindowsOnlyFact]
public void ExecTask_UseUtf8AlwaysOverridesAnsiDefault()
{
RunExec(false, new UTF8Encoding(false).EncodingName, "Always");
}

#endregion
}

internal sealed class ExecWrapper : Exec
Expand Down
21 changes: 15 additions & 6 deletions src/Tasks/Exec.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,21 @@ public Exec()
{
Command = string.Empty;

// Console-based output uses the current system OEM code page by default. Note that we should not use Console.OutputEncoding
// here since processes we run don't really have much to do with our console window (and also Console.OutputEncoding
// doesn't return the OEM code page if the running application that hosts MSBuild is not a console application).
// If the cmd file contains non-ANSI characters encoding may change.
_standardOutputEncoding = EncodingUtilities.CurrentSystemOemEncoding;
_standardErrorEncoding = EncodingUtilities.CurrentSystemOemEncoding;
// Wave18_8: use ANSI code page (GetACP) instead of OEM (GetOEMCP). Most native Windows tools
// write output using the ANSI code page (e.g., MSVC link.exe on French Windows: ANSI=CP1252,
// OEM=CP850). Reading with OEM garbles non-ASCII characters (e.g., 'é' → 'Ú').
// See: https://github.com/dotnet/msbuild/issues/12290
// If the cmd file contains non-ANSI characters the encoding may change later (see CreateTemporaryBatchFile).
if (ChangeWaves.AreFeaturesEnabled(ChangeWaves.Wave18_8))
{
_standardOutputEncoding = EncodingUtilities.CurrentSystemAnsiEncoding;
_standardErrorEncoding = EncodingUtilities.CurrentSystemAnsiEncoding;
}
else
{
_standardOutputEncoding = EncodingUtilities.CurrentSystemOemEncoding;
_standardErrorEncoding = EncodingUtilities.CurrentSystemOemEncoding;
}
}

#endregion
Expand Down
65 changes: 35 additions & 30 deletions src/Utilities/ToolTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -238,49 +238,54 @@ public virtual string ToolExe
/// Overridable property specifying the encoding of the captured task standard output stream
/// </summary>
/// <remarks>
/// Console-based output uses the current system OEM code page by default. Note that we should not use Console.OutputEncoding
/// here since processes we run don't really have much to do with our console window (and also Console.OutputEncoding
/// doesn't return the OEM code page if the running application that hosts MSBuild is not a console application).
/// Most native Windows tools (e.g., MSVC v141 link.exe, cl.exe) write their string resources
/// using the system ANSI code page (GetACP), not the OEM code page (GetOEMCP).
/// On a French Windows system for example: ANSI = CP1252, OEM = CP850.
/// Reading tool output with OEM-850 when the tool outputs CP1252 causes garbled non-ASCII
/// characters (e.g., 'é' → 'Ú', 'à' → 'Ó'). Using ANSI encoding fixes this mismatch.
/// See: https://github.com/dotnet/msbuild/issues/12290
/// </remarks>
protected virtual Encoding StandardOutputEncoding
protected virtual Encoding StandardOutputEncoding => GetDefaultToolEncoding();

/// <summary>
/// Returns the encoding to use for reading standard output and error streams from child tool processes.
/// Priority: UTF-8 encoding set by UseUtf8Encoding (Wave17_10) > ANSI (Wave18_8) > OEM legacy.
/// </summary>
private Encoding GetDefaultToolEncoding()
{
get
if (ChangeWaves.AreFeaturesEnabled(ChangeWaves.Wave17_10))
{
if (ChangeWaves.AreFeaturesEnabled(ChangeWaves.Wave17_10))
if (_encoding != null)
{
if (_encoding != null)
{
// Keep the encoding of standard output & error consistent with the console code page.
return _encoding;
}
// Keep the encoding of standard output & error consistent with the console code page.
return _encoding;
}
return EncodingUtilities.CurrentSystemOemEncoding;
}

// Wave18_8: use ANSI code page (GetACP) rather than OEM (GetOEMCP). Most native Windows tools
// compile their string resources with the ANSI code page, so reading with OEM garbles
// non-ASCII characters (e.g., 'é' → 'Ú' on French Windows: ANSI=CP1252, OEM=CP850).
// See: https://github.com/dotnet/msbuild/issues/12290
if (ChangeWaves.AreFeaturesEnabled(ChangeWaves.Wave18_8))
{
return EncodingUtilities.CurrentSystemAnsiEncoding;
}

return EncodingUtilities.CurrentSystemOemEncoding;
}

/// <summary>
/// Overridable property specifying the encoding of the captured task standard error stream
/// </summary>
/// <remarks>
/// Console-based output uses the current system OEM code page by default. Note that we should not use Console.OutputEncoding
/// here since processes we run don't really have much to do with our console window (and also Console.OutputEncoding
/// doesn't return the OEM code page if the running application that hosts MSBuild is not a console application).
/// Most native Windows tools (e.g., MSVC v141 link.exe, cl.exe) write their string resources
/// using the system ANSI code page (GetACP), not the OEM code page (GetOEMCP).
/// On a French Windows system for example: ANSI = CP1252, OEM = CP850.
/// Reading tool output with OEM-850 when the tool outputs CP1252 causes garbled non-ASCII
/// characters (e.g., 'é' → 'Ú', 'à' → 'Ó'). Using ANSI encoding fixes this mismatch.
/// See: https://github.com/dotnet/msbuild/issues/12290
/// </remarks>
protected virtual Encoding StandardErrorEncoding
{
get
{
if (ChangeWaves.AreFeaturesEnabled(ChangeWaves.Wave17_10))
{
if (_encoding != null)
{
// Keep the encoding of standard output & error consistent with the console code page.
return _encoding;
}
}
return EncodingUtilities.CurrentSystemOemEncoding;
}
}
protected virtual Encoding StandardErrorEncoding => GetDefaultToolEncoding();

/// <summary>
/// Gets the Path override value.
Expand Down