diff --git a/documentation/wiki/ChangeWaves.md b/documentation/wiki/ChangeWaves.md index a6fd5b23f0f..23c7f9645a6 100644 --- a/documentation/wiki/ChangeWaves.md +++ b/documentation/wiki/ChangeWaves.md @@ -32,6 +32,7 @@ Change wave checks around features will be removed in the release that accompani ### 18.8 - [RAR task: across multiple input properties, resolve relative paths against the project directory (not the process current directory)](https://github.com/dotnet/msbuild/pull/13319) - [Console, parallel console, and terminal loggers print the paths of log files written by registered loggers (e.g. file logger and binary logger) as part of the end-of-build summary.](https://github.com/dotnet/msbuild/pull/13577) +- [ToolTask and Exec now read tool stdout/stderr using the system ANSI code page (GetACP) instead of OEM (GetOEMCP), fixing garbled output from native tools on Western locales (e.g., MSVC link.exe on French Windows). Some older console-oriented tools still emit OEM-encoded output and may now be misdecoded; if this affects you, you can temporarily opt out with `MSBUILDDISABLEFEATURESFROMVERSION=18.8`, or for `Exec` specifically set `StdOutEncoding`/`StdErrEncoding` explicitly.](https://github.com/dotnet/msbuild/issues/12290) ### 18.7 - [Copy task retries on ERROR_ACCESS_DENIED on non-Windows platforms to handle transient lock conflicts (e.g. macOS CoW filesystems)](https://github.com/dotnet/msbuild/issues/13463) diff --git a/src/Framework/EncodingUtilities.cs b/src/Framework/EncodingUtilities.cs index afb8bf0a384..9b8abddd173 100644 --- a/src/Framework/EncodingUtilities.cs +++ b/src/Framework/EncodingUtilities.cs @@ -28,6 +28,7 @@ internal static class EncodingUtilities internal static readonly Encoding Utf8WithoutBom = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); private static Encoding s_currentOemEncoding; + private static Encoding s_currentAnsiEncoding; internal const string UseUtf8Always = "ALWAYS"; internal const string UseUtf8Never = "NEVER"; @@ -86,6 +87,59 @@ internal static Encoding CurrentSystemOemEncoding } } + /// + /// Get the current system locale code page, ANSI version (GetACP). ANSI code pages are used + /// by most native Windows tools for string resources compiled into their binaries. + /// This differs from OEM code pages which are used for console I/O historically. + /// For example, on a French Windows system: ANSI = CP1252, OEM = CP850. + /// + /// Many native build tools (e.g., MSVC v141 link.exe, cl.exe) write output using the ANSI + /// code page rather than the OEM code page, so MSBuild must read with ANSI to decode correctly. + /// See: https://github.com/dotnet/msbuild/issues/12290 + /// + internal static Encoding CurrentSystemAnsiEncoding + { + get + { + if (s_currentAnsiEncoding != null) + { + return s_currentAnsiEncoding; + } + +#if FEATURE_ENCODING_DEFAULT + // On .NET Framework, Encoding.Default returns the system ANSI code page (GetACP()). + // e.g., CP1252 for Western European Windows. This is what most native Windows tools + // use when writing string resources to stdout. + s_currentAnsiEncoding = Encoding.Default; +#else + // On .NET Core/5+, Encoding.Default is always UTF-8, so we must explicitly call GetACP(). + s_currentAnsiEncoding = Encoding.UTF8; // fallback if GetACP() fails or on non-Windows + + try + { +#if FEATURE_WINDOWSINTEROP + if (NativeMethods.IsWindows) + { + Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); + // GetACP() returns the system ANSI code page (e.g., 1252 for French Windows). + s_currentAnsiEncoding = Encoding.GetEncoding((int)PInvoke.GetACP()); + } +#endif + } + catch (ArgumentException ex) + { + Debug.Assert(false, "GetEncoding(system ANSI code page) threw an ArgumentException in EncodingUtilities.CurrentSystemAnsiEncoding! Please log a bug against MSBuild.", ex.Message); + } + catch (NotSupportedException ex) + { + Debug.Assert(false, "GetEncoding(system ANSI code page) threw a NotSupportedException in EncodingUtilities.CurrentSystemAnsiEncoding! Please log a bug against MSBuild.", ex.Message); + } +#endif + + return s_currentAnsiEncoding; + } + } + /// /// Checks two encoding types to determine if they are similar to each other (equal or if /// the Encoding Name is the same). diff --git a/src/Framework/NativeMethods.txt b/src/Framework/NativeMethods.txt index 3508948fd36..b41cf0d4a83 100644 --- a/src/Framework/NativeMethods.txt +++ b/src/Framework/NativeMethods.txt @@ -52,6 +52,7 @@ GetLogicalProcessorInformationEx GetLongPathName GetModuleFileName GetNativeSystemInfo +GetACP GetOEMCP GetProcAddress GetRunningObjectTable diff --git a/src/Tasks.UnitTests/Exec_Tests.cs b/src/Tasks.UnitTests/Exec_Tests.cs index 40bc1521676..dfc8dacf86d 100644 --- a/src/Tasks.UnitTests/Exec_Tests.cs +++ b/src/Tasks.UnitTests/Exec_Tests.cs @@ -494,12 +494,16 @@ public void ExecTaskUnicodeCharacterInCommand() } /// - /// Tests that Exec task will choose the default code page when UTF8 is not needed. + /// Tests that Exec task uses ANSI (GetACP) encoding by default when the command contains + /// only ASCII characters (no UTF-8 batch file override needed). + /// See: https://github.com/dotnet/msbuild/issues/12290 /// [Fact] public void ExecTaskWithoutUnicodeCharacterInCommand() { - RunExec(false, EncodingUtilities.CurrentSystemOemEncoding.EncodingName); + // Default stdout encoding is ANSI (GetACP) so that native tool output (e.g., MSVC v141 + // link.exe on French Windows) is decoded correctly instead of garbled as OEM-850. + RunExec(false, EncodingUtilities.CurrentSystemAnsiEncoding.EncodingName); } /// @@ -521,26 +525,29 @@ public void ExecTaskUtf8AlwaysWithAnsi() } /// - /// Exec task will NOT use UTF8 when UTF8 Never is specified and non-ANSI characters are in the Command - /// Exec task will fail as the cmd processor will not be able to run the command. + /// Exec task will NOT use UTF8 when UTF8 Never is specified and non-ANSI characters are in the Command. + /// UseUtf8=Never only controls the batch file encoding (stays OEM, no chcp); the stdout reading + /// encoding remains ANSI (GetACP) independently of the batch file encoding. + /// Exec task will fail as the cmd processor cannot run the garbled batch file. /// [WindowsOnlyTheory] [InlineData("Never")] [InlineData("System")] public void ExecTaskUtf8NeverWithNonAnsi(string useUtf8) { - RunExec(true, EncodingUtilities.CurrentSystemOemEncoding.EncodingName, useUtf8, false); + RunExec(true, EncodingUtilities.CurrentSystemAnsiEncoding.EncodingName, useUtf8, false); } /// - /// Exec task will NOT use UTF8 when UTF8 Never is specified and only ANSI characters are in the Command + /// Exec task will NOT use UTF8 when UTF8 Never is specified and only ANSI characters are in the Command. + /// UseUtf8=Never controls the batch file encoding only; stdout reading encoding remains ANSI (GetACP). /// [Theory] [InlineData("Never")] [InlineData("System")] public void ExecTaskUtf8NeverWithAnsi(string useUtf8) { - RunExec(false, EncodingUtilities.CurrentSystemOemEncoding.EncodingName, useUtf8); + RunExec(false, EncodingUtilities.CurrentSystemAnsiEncoding.EncodingName, useUtf8); } [Theory] @@ -1178,6 +1185,58 @@ public void Exec_RelativeWorkingDirectory_ResolvedAgainstProjectDirectory() exec.ValidateParametersAccessor().ShouldBeTrue(); exec.GetWorkingDirectoryAccessor().ShouldBe(Path.Combine(projectDir.Path, "builddir")); } + + #region stdout/stderr encoding tests + + /// + /// Exec task constructor initialises stdout/stderr encoding to ANSI (GetACP) — not OEM (GetOEMCP). + /// This fixes garbled output from native tools (e.g., MSVC v141 link.exe on French Windows) + /// that write string resources using the ANSI code page, not OEM. + /// See: https://github.com/dotnet/msbuild/issues/12290 + /// + [WindowsOnlyFact] + public void ExecTask_DefaultStdEncodingIsAnsi() + { + ChangeWaves.ResetStateForTests(); + Exec exec = PrepareExec("echo test"); + exec.StdOutEncoding.ShouldBe(EncodingUtilities.CurrentSystemAnsiEncoding.EncodingName); + exec.StdErrEncoding.ShouldBe(EncodingUtilities.CurrentSystemAnsiEncoding.EncodingName); + } + + /// + /// When Wave18_8 is opted out, Exec falls back to OEM encoding — preserving pre-Wave18_8 behaviour. + /// + [WindowsOnlyFact] + public void ExecTask_Wave18_8_OptedOut_DefaultEncodingIsOem() + { + try + { + using TestEnvironment env = TestEnvironment.Create(_output); + ChangeWaves.ResetStateForTests(); + env.SetEnvironmentVariable("MSBUILDDISABLEFEATURESFROMVERSION", ChangeWaves.Wave18_8.ToString()); + + Exec exec = PrepareExec("echo test"); + exec.StdOutEncoding.ShouldBe(EncodingUtilities.CurrentSystemOemEncoding.EncodingName); + exec.StdErrEncoding.ShouldBe(EncodingUtilities.CurrentSystemOemEncoding.EncodingName); + } + finally + { + ChangeWaves.ResetStateForTests(); + } + } + + /// + /// UseUtf8Encoding=Always overrides the ANSI default: when the batch file needs UTF-8 + /// (chcp 65001 is injected), the stdout encoding is also switched to UTF-8. + /// UseUtf8 controls the batch-file encoding; the chcp injection then also changes stdout reading. + /// + [WindowsOnlyFact] + public void ExecTask_UseUtf8AlwaysOverridesAnsiDefault() + { + RunExec(false, new UTF8Encoding(false).EncodingName, "Always"); + } + + #endregion } internal sealed class ExecWrapper : Exec diff --git a/src/Tasks/Exec.cs b/src/Tasks/Exec.cs index b34d6d61bf7..4bcabcaf50e 100644 --- a/src/Tasks/Exec.cs +++ b/src/Tasks/Exec.cs @@ -33,12 +33,21 @@ public Exec() { Command = string.Empty; - // Console-based output uses the current system OEM code page by default. Note that we should not use Console.OutputEncoding - // here since processes we run don't really have much to do with our console window (and also Console.OutputEncoding - // doesn't return the OEM code page if the running application that hosts MSBuild is not a console application). - // If the cmd file contains non-ANSI characters encoding may change. - _standardOutputEncoding = EncodingUtilities.CurrentSystemOemEncoding; - _standardErrorEncoding = EncodingUtilities.CurrentSystemOemEncoding; + // Wave18_8: use ANSI code page (GetACP) instead of OEM (GetOEMCP). Most native Windows tools + // write output using the ANSI code page (e.g., MSVC link.exe on French Windows: ANSI=CP1252, + // OEM=CP850). Reading with OEM garbles non-ASCII characters (e.g., 'é' → 'Ú'). + // See: https://github.com/dotnet/msbuild/issues/12290 + // If the cmd file contains non-ANSI characters the encoding may change later (see CreateTemporaryBatchFile). + if (ChangeWaves.AreFeaturesEnabled(ChangeWaves.Wave18_8)) + { + _standardOutputEncoding = EncodingUtilities.CurrentSystemAnsiEncoding; + _standardErrorEncoding = EncodingUtilities.CurrentSystemAnsiEncoding; + } + else + { + _standardOutputEncoding = EncodingUtilities.CurrentSystemOemEncoding; + _standardErrorEncoding = EncodingUtilities.CurrentSystemOemEncoding; + } } #endregion diff --git a/src/Utilities/ToolTask.cs b/src/Utilities/ToolTask.cs index 05ed6aca646..6b196865c53 100644 --- a/src/Utilities/ToolTask.cs +++ b/src/Utilities/ToolTask.cs @@ -238,49 +238,54 @@ public virtual string ToolExe /// Overridable property specifying the encoding of the captured task standard output stream /// /// - /// Console-based output uses the current system OEM code page by default. Note that we should not use Console.OutputEncoding - /// here since processes we run don't really have much to do with our console window (and also Console.OutputEncoding - /// doesn't return the OEM code page if the running application that hosts MSBuild is not a console application). + /// Most native Windows tools (e.g., MSVC v141 link.exe, cl.exe) write their string resources + /// using the system ANSI code page (GetACP), not the OEM code page (GetOEMCP). + /// On a French Windows system for example: ANSI = CP1252, OEM = CP850. + /// Reading tool output with OEM-850 when the tool outputs CP1252 causes garbled non-ASCII + /// characters (e.g., 'é' → 'Ú', 'à' → 'Ó'). Using ANSI encoding fixes this mismatch. + /// See: https://github.com/dotnet/msbuild/issues/12290 /// - protected virtual Encoding StandardOutputEncoding + protected virtual Encoding StandardOutputEncoding => GetDefaultToolEncoding(); + + /// + /// Returns the encoding to use for reading standard output and error streams from child tool processes. + /// Priority: UTF-8 encoding set by UseUtf8Encoding (Wave17_10) > ANSI (Wave18_8) > OEM legacy. + /// + private Encoding GetDefaultToolEncoding() { - get + if (ChangeWaves.AreFeaturesEnabled(ChangeWaves.Wave17_10)) { - if (ChangeWaves.AreFeaturesEnabled(ChangeWaves.Wave17_10)) + if (_encoding != null) { - if (_encoding != null) - { - // Keep the encoding of standard output & error consistent with the console code page. - return _encoding; - } + // Keep the encoding of standard output & error consistent with the console code page. + return _encoding; } - return EncodingUtilities.CurrentSystemOemEncoding; } + + // Wave18_8: use ANSI code page (GetACP) rather than OEM (GetOEMCP). Most native Windows tools + // compile their string resources with the ANSI code page, so reading with OEM garbles + // non-ASCII characters (e.g., 'é' → 'Ú' on French Windows: ANSI=CP1252, OEM=CP850). + // See: https://github.com/dotnet/msbuild/issues/12290 + if (ChangeWaves.AreFeaturesEnabled(ChangeWaves.Wave18_8)) + { + return EncodingUtilities.CurrentSystemAnsiEncoding; + } + + return EncodingUtilities.CurrentSystemOemEncoding; } /// /// Overridable property specifying the encoding of the captured task standard error stream /// /// - /// Console-based output uses the current system OEM code page by default. Note that we should not use Console.OutputEncoding - /// here since processes we run don't really have much to do with our console window (and also Console.OutputEncoding - /// doesn't return the OEM code page if the running application that hosts MSBuild is not a console application). + /// Most native Windows tools (e.g., MSVC v141 link.exe, cl.exe) write their string resources + /// using the system ANSI code page (GetACP), not the OEM code page (GetOEMCP). + /// On a French Windows system for example: ANSI = CP1252, OEM = CP850. + /// Reading tool output with OEM-850 when the tool outputs CP1252 causes garbled non-ASCII + /// characters (e.g., 'é' → 'Ú', 'à' → 'Ó'). Using ANSI encoding fixes this mismatch. + /// See: https://github.com/dotnet/msbuild/issues/12290 /// - protected virtual Encoding StandardErrorEncoding - { - get - { - if (ChangeWaves.AreFeaturesEnabled(ChangeWaves.Wave17_10)) - { - if (_encoding != null) - { - // Keep the encoding of standard output & error consistent with the console code page. - return _encoding; - } - } - return EncodingUtilities.CurrentSystemOemEncoding; - } - } + protected virtual Encoding StandardErrorEncoding => GetDefaultToolEncoding(); /// /// Gets the Path override value.