From 57a19ac3365f1dc255e6f24fcb7afcde2ccef8f9 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Thu, 15 Aug 2024 13:34:03 +0200 Subject: [PATCH] Reapply "[lldb] Tolerate multiple compile units with the same DWO ID (#100577)" (#104041) The only change vs. the first version of the patch is that I've made DWARFUnit linking thread-safe/unit. This was necessary because, during the indexing step, two skeleton units could attempt to associate themselves with the split unit. The original commit message was: I ran into this when LTO completely emptied two compile units, so they ended up with the same hash (see #100375). Although, ideally, the compiler would try to ensure we don't end up with a hash collision even in this case, guaranteeing their absence is practically impossible. This patch ensures this situation does not bring down lldb. --- .../Plugins/SymbolFile/DWARF/DWARFUnit.cpp | 41 ++--- .../Plugins/SymbolFile/DWARF/DWARFUnit.h | 4 +- .../SymbolFile/DWARF/x86/dwp-hash-collision.s | 142 ++++++++++++++++++ 3 files changed, 168 insertions(+), 19 deletions(-) create mode 100644 lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp index 66a762bf9b6854..81f937762e35a6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp @@ -97,12 +97,14 @@ void DWARFUnit::ExtractUnitDIEIfNeeded() { *m_dwo_id, m_first_die.GetOffset())); return; // Can't fetch the compile unit from the dwo file. } - // If the skeleton compile unit gets its unit DIE parsed first, then this - // will fill in the DWO file's back pointer to this skeleton compile unit. - // If the DWO files get parsed on their own first the skeleton back link - // can be done manually in DWARFUnit::GetSkeletonCompileUnit() which will - // do a reverse lookup and cache the result. - dwo_cu->SetSkeletonUnit(this); + + // Link the DWO unit to this object, if it hasn't been linked already (this + // can happen when we have an index, and the DWO unit is parsed first). + if (!dwo_cu->LinkToSkeletonUnit(*this)) { + SetDwoError(Status::createWithFormat( + "multiple compile units with Dwo ID {0:x16}", *m_dwo_id)); + return; + } DWARFBaseDIE dwo_cu_die = dwo_cu->GetUnitDIEOnly(); if (!dwo_cu_die.IsValid()) { @@ -708,23 +710,28 @@ uint8_t DWARFUnit::GetAddressByteSize(const DWARFUnit *cu) { uint8_t DWARFUnit::GetDefaultAddressSize() { return 4; } DWARFCompileUnit *DWARFUnit::GetSkeletonUnit() { - if (m_skeleton_unit == nullptr && IsDWOUnit()) { + if (m_skeleton_unit.load() == nullptr && IsDWOUnit()) { SymbolFileDWARFDwo *dwo = llvm::dyn_cast_or_null(&GetSymbolFileDWARF()); // Do a reverse lookup if the skeleton compile unit wasn't set. - if (dwo) - m_skeleton_unit = dwo->GetBaseSymbolFile().GetSkeletonUnit(this); + DWARFUnit *candidate_skeleton_unit = + dwo ? dwo->GetBaseSymbolFile().GetSkeletonUnit(this) : nullptr; + if (candidate_skeleton_unit) + (void)LinkToSkeletonUnit(*candidate_skeleton_unit); + // Linking may fail due to a race, so be sure to return the actual value. } - return llvm::dyn_cast_or_null(m_skeleton_unit); + return llvm::dyn_cast_or_null(m_skeleton_unit.load()); } -void DWARFUnit::SetSkeletonUnit(DWARFUnit *skeleton_unit) { - // If someone is re-setting the skeleton compile unit backlink, make sure - // it is setting it to a valid value when it wasn't valid, or if the - // value in m_skeleton_unit was valid, it should be the same value. - assert(skeleton_unit); - assert(m_skeleton_unit == nullptr || m_skeleton_unit == skeleton_unit); - m_skeleton_unit = skeleton_unit; +bool DWARFUnit::LinkToSkeletonUnit(DWARFUnit &skeleton_unit) { + DWARFUnit *expected_unit = nullptr; + if (m_skeleton_unit.compare_exchange_strong(expected_unit, &skeleton_unit)) + return true; + if (expected_unit == &skeleton_unit) { + // Exchange failed because it already contained the right value. + return true; + } + return false; // Already linked to a different unit. } bool DWARFUnit::Supports_DW_AT_APPLE_objc_complete_type() { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index 85c37971ced8e0..148932d67b908c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -170,7 +170,7 @@ class DWARFUnit : public UserID { /// both cases correctly and avoids crashes. DWARFCompileUnit *GetSkeletonUnit(); - void SetSkeletonUnit(DWARFUnit *skeleton_unit); + bool LinkToSkeletonUnit(DWARFUnit &skeleton_unit); bool Supports_DW_AT_APPLE_objc_complete_type(); @@ -308,7 +308,7 @@ class DWARFUnit : public UserID { const llvm::DWARFAbbreviationDeclarationSet *m_abbrevs = nullptr; lldb_private::CompileUnit *m_lldb_cu = nullptr; // If this is a DWO file, we have a backlink to our skeleton compile unit. - DWARFUnit *m_skeleton_unit = nullptr; + std::atomic m_skeleton_unit = nullptr; // The compile unit debug information entry item DWARFDebugInfoEntry::collection m_die_array; mutable llvm::sys::RWMutex m_die_array_mutex; diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s new file mode 100644 index 00000000000000..d626b4602ad58f --- /dev/null +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s @@ -0,0 +1,142 @@ +## Test that lldb handles (mainly, that it doesn't crash) the situation where +## two skeleton compile units have the same DWO ID (and try to claim the same +## split unit from the DWP file. This can sometimes happen when the compile unit +## is nearly empty (e.g. because LTO has optimized all of it away). + +# RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %s --defsym MAIN=0 > %t +# RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %s > %t.dwp +# RUN: %lldb %t -o "image lookup -t my_enum_type" \ +# RUN: -o "image dump separate-debug-info" -o exit | FileCheck %s + +## Check that we're able to access the type within the split unit (no matter +## which skeleton unit it ends up associated with). Completely ignoring the unit +## might also be reasonable. +# CHECK: image lookup -t my_enum_type +# CHECK: 1 match found +# CHECK: name = "my_enum_type", byte-size = 4, compiler_type = "enum my_enum_type { +# CHECK-NEXT: }" +# +## Check that we get some indication of the error. +# CHECK: image dump separate-debug-info +# CHECK: Dwo ID Err Dwo Path +# CHECK: 0xdeadbeefbaadf00d E multiple compile units with Dwo ID 0xdeadbeefbaadf00d + +.set DWO_ID, 0xdeadbeefbaadf00d + +## The main file. +.ifdef MAIN + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 74 # DW_TAG_compile_unit + .byte 0 # DW_CHILDREN_no + .byte 0x76 # DW_AT_dwo_name + .byte 8 # DW_FORM_string + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + + + .section .debug_info,"",@progbits +.irpc I,01 +.Lcu_begin\I: + .long .Ldebug_info_end\I-.Ldebug_info_start\I # Length of Unit +.Ldebug_info_start\I: + .short 5 # DWARF version number + .byte 4 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .quad DWO_ID # DWO id + .byte 1 # Abbrev [1] DW_TAG_compile_unit + .ascii "foo" + .byte '0' + \I + .asciz ".dwo\0" # DW_AT_dwo_name +.Ldebug_info_end\I: +.endr + +.else +## DWP file starts here. + + .section .debug_abbrev.dwo,"e",@progbits +.LAbbrevBegin: + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 8 # DW_FORM_string + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 4 # DW_TAG_enumeration_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) +.LAbbrevEnd: + .section .debug_info.dwo,"e",@progbits +.LCUBegin: +.Lcu_begin1: + .long .Ldebug_info_end1-.Ldebug_info_start1 # Length of Unit +.Ldebug_info_start1: + .short 5 # DWARF version number + .byte 5 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long 0 # Offset Into Abbrev. Section + .quad DWO_ID # DWO id + .byte 1 # Abbrev [1] DW_TAG_compile_unit + .asciz "Hand-written DWARF" # DW_AT_producer + .short 12 # DW_AT_language + .byte 2 # Abbrev [2] DW_TAG_enumeration_type + .asciz "my_enum_type" # DW_AT_name + .long .Lint-.Lcu_begin1 # DW_AT_type + .byte 4 # DW_AT_byte_size +.Lint: + .byte 4 # Abbrev [4] DW_TAG_base_type + .asciz "int" # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_end1: +.LCUEnd: + .section .debug_cu_index, "", @progbits +## Header: + .short 5 # Version + .short 0 # Padding + .long 2 # Section count + .long 1 # Unit count + .long 2 # Slot count +## Hash Table of Signatures: + .quad 0 + .quad DWO_ID +## Parallel Table of Indexes: + .long 0 + .long 1 +## Table of Section Offsets: +## Row 0: + .long 1 # DW_SECT_INFO + .long 3 # DW_SECT_ABBREV +## Row 1: + .long 0 # Offset in .debug_info.dwo + .long 0 # Offset in .debug_abbrev.dwo +## Table of Section Sizes: + .long .LCUEnd-.LCUBegin # Size in .debug_info.dwo + .long .LAbbrevEnd-.LAbbrevBegin # Size in .debug_abbrev.dwo +.endif