Skip to content

Commit

Permalink
[release/8.0-staging] Enable TLS on linux/arm64 only for static resol…
Browse files Browse the repository at this point in the history
…ver (#106101)

* disable TLS for dynamic resolver

* add check to skip nop for older resolver

* Add test and config variables

* Apply suggestions from code review

Co-authored-by: Jan Kotas <jkotas@microsoft.com>

* Set all the tls values in the test explicitly

* Fix build on OSX arm64

---------

Co-authored-by: Kunal Pathak <Kunal.Pathak@microsoft.com>
Co-authored-by: Jan Kotas <jkotas@microsoft.com>
  • Loading branch information
3 people authored Aug 8, 2024
1 parent 5807052 commit 8912b21
Show file tree
Hide file tree
Showing 11 changed files with 303 additions and 1 deletion.
17 changes: 17 additions & 0 deletions src/coreclr/hosts/corerun/corerun.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,7 @@ static void display_usage()
W(" -p, --property - Property to pass to runtime during initialization.\n")
W(" If a property value contains spaces, quote the entire argument.\n")
W(" May be supplied multiple times. Format: <key>=<value>.\n")
W(" -l, --preload - path to shared library to load before loading the CLR.\n")
W(" -d, --debug - causes corerun to wait for a debugger to attach before executing.\n")
W(" -e, --env - path to a .env file with environment variables that corerun should set.\n")
W(" -?, -h, --help - show this help.\n")
Expand Down Expand Up @@ -569,6 +570,22 @@ static bool parse_args(
config.user_defined_keys.push_back(std::move(key));
config.user_defined_values.push_back(std::move(value));
}
else if (pal::strcmp(option, W("l")) == 0 || (pal::strcmp(option, W("preload")) == 0))
{
i++;
if (i >= argc)
{
pal::fprintf(stderr, W("Option %s: missing shared library path\n"), arg);
break;
}

string_t library = argv[i];
pal::mod_t hMod;
if (!pal::try_load_library(library, hMod))
{
break;
}
}
else if (pal::strcmp(option, W("d")) == 0 || (pal::strcmp(option, W("debug")) == 0))
{
config.wait_to_debug = true;
Expand Down
23 changes: 23 additions & 0 deletions src/coreclr/hosts/corerun/corerun.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,17 @@ namespace pal
return hMod != nullptr;
}

inline bool try_load_library(const pal::string_t& path, pal::mod_t& hMod)
{
hMod = (pal::mod_t)::LoadLibraryExW(path.c_str(), nullptr, 0);
if (hMod == nullptr)
{
pal::fprintf(stderr, W("Failed to load: '%s'. Error: 0x%08x\n"), path.c_str(), ::GetLastError());
return false;
}
return true;
}

inline bool try_load_coreclr(const pal::string_t& core_root, pal::mod_t& hMod)
{
pal::string_t coreclr_path = core_root;
Expand Down Expand Up @@ -602,6 +613,18 @@ namespace pal
return hMod != nullptr;
}

inline bool try_load_library(const pal::string_t& path, pal::mod_t& hMod)
{
hMod = (pal::mod_t)dlopen(path.c_str(), RTLD_NOW | RTLD_LOCAL);
if (hMod == nullptr)
{
pal::fprintf(stderr, W("Failed to load: '%s'. Error: %s\n"), path.c_str(), dlerror());
return false;
}
return true;
}


inline bool try_load_coreclr(const pal::string_t& core_root, pal::mod_t& hMod)
{
pal::string_t coreclr_path = core_root;
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,8 @@ CONFIG_STRING_INFO(INTERNAL_TailCallMax, W("TailCallMax"), "")
RETAIL_CONFIG_STRING_INFO(EXTERNAL_TailCallOpt, W("TailCallOpt"), "")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TailCallLoopOpt, W("TailCallLoopOpt"), 1, "Convert recursive tail calls to loops")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_Jit_NetFx40PInvokeStackResilience, W("NetFx40_PInvokeStackResilience"), (DWORD)-1, "Makes P/Invoke resilient against mismatched signature and calling convention (significant perf penalty).")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_DisableOptimizedThreadStaticAccess, W("DisableOptimizedThreadStaticAccess"), (DWORD)0, "Disable the OptimizedThreadStaticAccess feature.")
CONFIG_DWORD_INFO(EXTERNAL_AssertNotStaticTlsResolver, W("AssertNotStaticTlsResolver"), (DWORD)0, "Assert if we attempt to use the static tls resolver path.")

// AltJitAssertOnNYI should be 0 on targets where JIT is under development or bring up stage, so as to facilitate fallback to main JIT on hitting a NYI.
#if defined(TARGET_X86)
Expand Down
14 changes: 14 additions & 0 deletions src/coreclr/vm/arm64/asmhelpers.S
Original file line number Diff line number Diff line change
Expand Up @@ -1035,4 +1035,18 @@ LEAF_ENTRY GetThreadStaticsVariableOffset, _TEXT
EPILOG_RETURN
LEAF_END GetThreadStaticsVariableOffset, _TEXT
// ------------------------------------------------------------------

// ------------------------------------------------------------------
// size_t GetTLSResolverAddress()

// Helper to get the TLS resolver address. This will be then used to determine if we have a static or dynamic resolver.
LEAF_ENTRY GetTLSResolverAddress, _TEXT
PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32
adrp x0, :tlsdesc:t_ThreadStatics
ldr x1, [x0, #:tlsdesc_lo12:t_ThreadStatics]
mov x0, x1
EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32
EPILOG_RETURN
LEAF_END GetTLSResolverAddress, _TEXT
// ------------------------------------------------------------------
#endif // !TARGET_OSX
3 changes: 3 additions & 0 deletions src/coreclr/vm/eeconfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ HRESULT EEConfig::Init()
fJitFramed = false;
fJitMinOpts = false;
fJitEnableOptionalRelocs = false;
fDisableOptimizedThreadStaticAccess = false;
fPInvokeRestoreEsp = (DWORD)-1;

fNgenBindOptimizeNonGac = false;
Expand Down Expand Up @@ -548,6 +549,8 @@ HRESULT EEConfig::sync()
iJitOptimizeType = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_JitOptimizeType);
if (iJitOptimizeType > OPT_RANDOM) iJitOptimizeType = OPT_DEFAULT;

fDisableOptimizedThreadStaticAccess = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_DisableOptimizedThreadStaticAccess) != 0;

#ifdef TARGET_X86
fPInvokeRestoreEsp = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_Jit_NetFx40PInvokeStackResilience);
#endif
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/vm/eeconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ class EEConfig
bool JitFramed(void) const {LIMITED_METHOD_CONTRACT; return fJitFramed; }
bool JitMinOpts(void) const {LIMITED_METHOD_CONTRACT; return fJitMinOpts; }
bool JitEnableOptionalRelocs(void) const {LIMITED_METHOD_CONTRACT; return fJitEnableOptionalRelocs; }
bool DisableOptimizedThreadStaticAccess(void) const {LIMITED_METHOD_CONTRACT; return fDisableOptimizedThreadStaticAccess; }

// Tiered Compilation config
#if defined(FEATURE_TIERED_COMPILATION)
Expand Down Expand Up @@ -480,6 +481,7 @@ class EEConfig
bool fJitFramed; // Enable/Disable EBP based frames
bool fJitMinOpts; // Enable MinOpts for all jitted methods
bool fJitEnableOptionalRelocs; // Allow optional relocs
bool fDisableOptimizedThreadStaticAccess; // Disable OptimizedThreadStatic access

unsigned iJitOptimizeType; // 0=Blended,1=SmallCode,2=FastCode, default is 0=Blended

Expand Down
43 changes: 42 additions & 1 deletion src/coreclr/vm/jitinterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1463,6 +1463,10 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO*
EE_TO_JIT_TRANSITION_LEAF();
}

#if !defined(TARGET_OSX) && defined(TARGET_UNIX) && defined(TARGET_ARM64)
extern "C" size_t GetTLSResolverAddress();
#endif // !TARGET_OSX && TARGET_UNIX && TARGET_ARM64

/*********************************************************************/
void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken,
CORINFO_METHOD_HANDLE callerHandle,
Expand Down Expand Up @@ -1567,21 +1571,58 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken,
fieldAccessor = CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER;

pResult->helper = getSharedStaticsHelper(pField, pFieldMT);

bool optimizeThreadStaticAccess = false;
#if defined(TARGET_ARM)
// Optimization is disabled for linux/windows arm
#elif !defined(TARGET_WINDOWS) && defined(TARGET_X86)
// Optimization is disabled for linux/x86
#elif defined(TARGET_LINUX_MUSL) && defined(TARGET_ARM64)
// Optimization is disabled for linux musl arm64
#elif !defined(TARGET_OSX) && defined(TARGET_UNIX) && defined(TARGET_ARM64)
// Optimization is enabled for linux/arm64 only for static resolver.
// For static resolver, the TP offset is same for all threads.
// For dynamic resolver, TP offset returned is for the current thread and
// will be different for the other threads.
uint32_t* resolverAddress = reinterpret_cast<uint32_t*>(GetTLSResolverAddress());
int ip = 0;
if ((resolverAddress[ip] == 0xd503201f) || (resolverAddress[ip] == 0xd503241f))
{
// nop might not be present in older resolver, so skip it.

// nop or hint 32
ip++;
}

if (
// ldr x0, [x0, #8]
(resolverAddress[ip] == 0xf9400400) &&
// ret
(resolverAddress[ip + 1] == 0xd65f03c0)
)
{
optimizeThreadStaticAccess = true;
#ifdef _DEBUG
if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_AssertNotStaticTlsResolver) != 0)
{
_ASSERTE(!"Detected static resolver in use when not expected");
}
#endif
}
#else
bool optimizeThreadStaticAccess = true;
optimizeThreadStaticAccess = true;
#if !defined(TARGET_OSX) && defined(TARGET_UNIX) && defined(TARGET_AMD64)
// For linux/x64, check if compiled coreclr as .so file and not single file.
// For single file, the `tls_index` might not be accurate.
// Do not perform this optimization in such case.
optimizeThreadStaticAccess = GetTlsIndexObjectAddress() != nullptr;
#endif // !TARGET_OSX && TARGET_UNIX && TARGET_AMD64

if (g_pConfig->DisableOptimizedThreadStaticAccess())
{
optimizeThreadStaticAccess = false;
}

if (optimizeThreadStaticAccess)
{
// For windows x64/x86/arm64, linux x64/arm64/loongarch64/riscv64:
Expand Down
8 changes: 8 additions & 0 deletions src/tests/JIT/Directed/tls/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Licensed to the .NET Foundation under one or more agreements.
# The .NET Foundation licenses this file to you under the MIT license.

include_directories(${INC_PLATFORM_DIR})

add_library(usetls SHARED testtls.cpp)

install (TARGETS usetls DESTINATION bin)
130 changes: 130 additions & 0 deletions src/tests/JIT/Directed/tls/TestTLSWithLoadedDlls.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.


// This test is verifying that the runtime properly handles the cases where the TLS infra in the runtime is forced
// to use a dynamic resolver. This is done by means of a private config variable to validate the behavior on Linux Arm64
// and a set of multithreaded tasks, that has been known to cause the runtime to crash when this is handled incorrectly.

using System;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Runtime.InteropServices;
using System.Runtime.Loader;
using System.Text;
using System.Threading;
using System.Threading.Tasks;

namespace TestTLSWithLoadedDlls
{
static class TLSWithLoadedDlls
{
private const int CountOfLibTlsToLoad = 40;

static async Task DoLotsOfAsyncWork(int loopCount)
{
for (int i = 0; i < loopCount; i++)
{
Console.WriteLine("Starting a new batch of tasks...");
var tasks = Enumerable.Range(1, 100).Select(i => Task.Run(async () =>
{
await Task.Delay(1);
})).ToArray();

await Task.WhenAll(tasks);

Console.WriteLine("Batch of tasks completed. Main loop sleeping for 20 ms...");
await Task.Delay(20);
}
}

static int Main(string[] args)
{
if ((args.Length == 1) && (args[0] == "RunLotsOfTasks"))
{
DoLotsOfAsyncWork(100).GetAwaiter().GetResult();
return 100;
}

int CountOfLibTlsToLoad = 60;

if (OperatingSystem.IsWindows()) // Windows does not have a really long command line length limit, and doesn't have a problem with many TLS using images used
CountOfLibTlsToLoad = 10;

StringBuilder arguments = new();

(string prefix, string suffix) = GetSharedLibraryPrefixSuffix();

string UseTlsFileName = GetSharedLibraryFileNameForCurrentPlatform("usetls");
string testDirectory = Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location);
string UseTlsFilePath = Path.Combine(testDirectory, UseTlsFileName);

for (int i = 0; i < CountOfLibTlsToLoad; i++)
{
string tlsNumberSpecificPath = Path.Combine(testDirectory, i.ToString());
string finalUseTlsPath = Path.Combine(tlsNumberSpecificPath, prefix + "usetls" + suffix);

Directory.CreateDirectory(tlsNumberSpecificPath);
if (!File.Exists(finalUseTlsPath))
{
File.Copy(
UseTlsFilePath,
finalUseTlsPath);
}

arguments.Append(" -l ");
arguments.Append(finalUseTlsPath);
}

arguments.Append(' ');
arguments.Append(System.Reflection.Assembly.GetExecutingAssembly().Location);
arguments.Append(" RunLotsOfTasks");

Process process = new Process();
process.StartInfo.FileName = GetCorerunPath();
process.StartInfo.Arguments = arguments.ToString();
process.StartInfo.UseShellExecute = false;
process.StartInfo.EnvironmentVariables["DOTNET_AssertNotStaticTlsResolver"] = "1";

Console.WriteLine($"Launching {process.StartInfo.FileName} {process.StartInfo.Arguments}");

process.Start();
process.WaitForExit();
return process.ExitCode;
}

private static string GetCorerunPath()
{
string corerunName;
if (OperatingSystem.IsWindows())
{
corerunName = "CoreRun.exe";
}
else
{
corerunName = "corerun";
}

return Path.Combine(Environment.GetEnvironmentVariable("CORE_ROOT"), corerunName);
}

public static (string, string) GetSharedLibraryPrefixSuffix()
{
if (OperatingSystem.IsWindows())
return (string.Empty, ".dll");

if (OperatingSystem.IsMacOS())
return ("lib", ".dylib");

return ("lib", ".so");
}

public static string GetSharedLibraryFileNameForCurrentPlatform(string libraryName)
{
(string prefix, string suffix) = GetSharedLibraryPrefixSuffix();
return prefix + libraryName + suffix;
}
}
}
16 changes: 16 additions & 0 deletions src/tests/JIT/Directed/tls/TestTLSWithLoadedDlls.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<CLRTestPriority>0</CLRTestPriority>
<RequiresProcessIsolation>true</RequiresProcessIsolation>
<ReferenceXUnitWrapperGenerator>false</ReferenceXUnitWrapperGenerator>
<NativeAotIncompatible>true</NativeAotIncompatible>
</PropertyGroup>
<PropertyGroup>
<DebugType>PdbOnly</DebugType>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
<CMakeProjectReference Include="CMakeLists.txt" />
</ItemGroup>
</Project>
Loading

0 comments on commit 8912b21

Please sign in to comment.