Skip to content

Commit

Permalink
Reapply "[lldb] Tolerate multiple compile units with the same DWO ID (l…
Browse files Browse the repository at this point in the history
…lvm#100577)" (llvm#104041)

The only change vs. the first version of the patch is that I've made
DWARFUnit linking thread-safe/unit. This was necessary because, during
the
indexing step, two skeleton units could attempt to associate themselves
with the split unit.

The original commit message was:

I ran into this when LTO completely emptied two compile units, so they
ended up with the same hash (see llvm#100375). Although, ideally, the
compiler would try to ensure we don't end up with a hash collision even
in this case, guaranteeing their absence is practically impossible. This
patch ensures this situation does not bring down lldb.
  • Loading branch information
labath authored Aug 15, 2024
1 parent 7898866 commit 57a19ac
Show file tree
Hide file tree
Showing 3 changed files with 168 additions and 19 deletions.
41 changes: 24 additions & 17 deletions lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,14 @@ void DWARFUnit::ExtractUnitDIEIfNeeded() {
*m_dwo_id, m_first_die.GetOffset()));
return; // Can't fetch the compile unit from the dwo file.
}
// If the skeleton compile unit gets its unit DIE parsed first, then this
// will fill in the DWO file's back pointer to this skeleton compile unit.
// If the DWO files get parsed on their own first the skeleton back link
// can be done manually in DWARFUnit::GetSkeletonCompileUnit() which will
// do a reverse lookup and cache the result.
dwo_cu->SetSkeletonUnit(this);

// Link the DWO unit to this object, if it hasn't been linked already (this
// can happen when we have an index, and the DWO unit is parsed first).
if (!dwo_cu->LinkToSkeletonUnit(*this)) {
SetDwoError(Status::createWithFormat(
"multiple compile units with Dwo ID {0:x16}", *m_dwo_id));
return;
}

DWARFBaseDIE dwo_cu_die = dwo_cu->GetUnitDIEOnly();
if (!dwo_cu_die.IsValid()) {
Expand Down Expand Up @@ -708,23 +710,28 @@ uint8_t DWARFUnit::GetAddressByteSize(const DWARFUnit *cu) {
uint8_t DWARFUnit::GetDefaultAddressSize() { return 4; }

DWARFCompileUnit *DWARFUnit::GetSkeletonUnit() {
if (m_skeleton_unit == nullptr && IsDWOUnit()) {
if (m_skeleton_unit.load() == nullptr && IsDWOUnit()) {
SymbolFileDWARFDwo *dwo =
llvm::dyn_cast_or_null<SymbolFileDWARFDwo>(&GetSymbolFileDWARF());
// Do a reverse lookup if the skeleton compile unit wasn't set.
if (dwo)
m_skeleton_unit = dwo->GetBaseSymbolFile().GetSkeletonUnit(this);
DWARFUnit *candidate_skeleton_unit =
dwo ? dwo->GetBaseSymbolFile().GetSkeletonUnit(this) : nullptr;
if (candidate_skeleton_unit)
(void)LinkToSkeletonUnit(*candidate_skeleton_unit);
// Linking may fail due to a race, so be sure to return the actual value.
}
return llvm::dyn_cast_or_null<DWARFCompileUnit>(m_skeleton_unit);
return llvm::dyn_cast_or_null<DWARFCompileUnit>(m_skeleton_unit.load());
}

void DWARFUnit::SetSkeletonUnit(DWARFUnit *skeleton_unit) {
// If someone is re-setting the skeleton compile unit backlink, make sure
// it is setting it to a valid value when it wasn't valid, or if the
// value in m_skeleton_unit was valid, it should be the same value.
assert(skeleton_unit);
assert(m_skeleton_unit == nullptr || m_skeleton_unit == skeleton_unit);
m_skeleton_unit = skeleton_unit;
bool DWARFUnit::LinkToSkeletonUnit(DWARFUnit &skeleton_unit) {
DWARFUnit *expected_unit = nullptr;
if (m_skeleton_unit.compare_exchange_strong(expected_unit, &skeleton_unit))
return true;
if (expected_unit == &skeleton_unit) {
// Exchange failed because it already contained the right value.
return true;
}
return false; // Already linked to a different unit.
}

bool DWARFUnit::Supports_DW_AT_APPLE_objc_complete_type() {
Expand Down
4 changes: 2 additions & 2 deletions lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ class DWARFUnit : public UserID {
/// both cases correctly and avoids crashes.
DWARFCompileUnit *GetSkeletonUnit();

void SetSkeletonUnit(DWARFUnit *skeleton_unit);
bool LinkToSkeletonUnit(DWARFUnit &skeleton_unit);

bool Supports_DW_AT_APPLE_objc_complete_type();

Expand Down Expand Up @@ -308,7 +308,7 @@ class DWARFUnit : public UserID {
const llvm::DWARFAbbreviationDeclarationSet *m_abbrevs = nullptr;
lldb_private::CompileUnit *m_lldb_cu = nullptr;
// If this is a DWO file, we have a backlink to our skeleton compile unit.
DWARFUnit *m_skeleton_unit = nullptr;
std::atomic<DWARFUnit *> m_skeleton_unit = nullptr;
// The compile unit debug information entry item
DWARFDebugInfoEntry::collection m_die_array;
mutable llvm::sys::RWMutex m_die_array_mutex;
Expand Down
142 changes: 142 additions & 0 deletions lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
## Test that lldb handles (mainly, that it doesn't crash) the situation where
## two skeleton compile units have the same DWO ID (and try to claim the same
## split unit from the DWP file. This can sometimes happen when the compile unit
## is nearly empty (e.g. because LTO has optimized all of it away).

# RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %s --defsym MAIN=0 > %t
# RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %s > %t.dwp
# RUN: %lldb %t -o "image lookup -t my_enum_type" \
# RUN: -o "image dump separate-debug-info" -o exit | FileCheck %s

## Check that we're able to access the type within the split unit (no matter
## which skeleton unit it ends up associated with). Completely ignoring the unit
## might also be reasonable.
# CHECK: image lookup -t my_enum_type
# CHECK: 1 match found
# CHECK: name = "my_enum_type", byte-size = 4, compiler_type = "enum my_enum_type {
# CHECK-NEXT: }"
#
## Check that we get some indication of the error.
# CHECK: image dump separate-debug-info
# CHECK: Dwo ID Err Dwo Path
# CHECK: 0xdeadbeefbaadf00d E multiple compile units with Dwo ID 0xdeadbeefbaadf00d

.set DWO_ID, 0xdeadbeefbaadf00d

## The main file.
.ifdef MAIN
.section .debug_abbrev,"",@progbits
.byte 1 # Abbreviation Code
.byte 74 # DW_TAG_compile_unit
.byte 0 # DW_CHILDREN_no
.byte 0x76 # DW_AT_dwo_name
.byte 8 # DW_FORM_string
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 0 # EOM(3)


.section .debug_info,"",@progbits
.irpc I,01
.Lcu_begin\I:
.long .Ldebug_info_end\I-.Ldebug_info_start\I # Length of Unit
.Ldebug_info_start\I:
.short 5 # DWARF version number
.byte 4 # DWARF Unit Type
.byte 8 # Address Size (in bytes)
.long .debug_abbrev # Offset Into Abbrev. Section
.quad DWO_ID # DWO id
.byte 1 # Abbrev [1] DW_TAG_compile_unit
.ascii "foo"
.byte '0' + \I
.asciz ".dwo\0" # DW_AT_dwo_name
.Ldebug_info_end\I:
.endr

.else
## DWP file starts here.

.section .debug_abbrev.dwo,"e",@progbits
.LAbbrevBegin:
.byte 1 # Abbreviation Code
.byte 17 # DW_TAG_compile_unit
.byte 1 # DW_CHILDREN_yes
.byte 37 # DW_AT_producer
.byte 8 # DW_FORM_string
.byte 19 # DW_AT_language
.byte 5 # DW_FORM_data2
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 2 # Abbreviation Code
.byte 4 # DW_TAG_enumeration_type
.byte 0 # DW_CHILDREN_no
.byte 3 # DW_AT_name
.byte 8 # DW_FORM_string
.byte 73 # DW_AT_type
.byte 19 # DW_FORM_ref4
.byte 11 # DW_AT_byte_size
.byte 11 # DW_FORM_data1
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 4 # Abbreviation Code
.byte 36 # DW_TAG_base_type
.byte 0 # DW_CHILDREN_no
.byte 3 # DW_AT_name
.byte 8 # DW_FORM_string
.byte 62 # DW_AT_encoding
.byte 11 # DW_FORM_data1
.byte 11 # DW_AT_byte_size
.byte 11 # DW_FORM_data1
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 0 # EOM(3)
.LAbbrevEnd:
.section .debug_info.dwo,"e",@progbits
.LCUBegin:
.Lcu_begin1:
.long .Ldebug_info_end1-.Ldebug_info_start1 # Length of Unit
.Ldebug_info_start1:
.short 5 # DWARF version number
.byte 5 # DWARF Unit Type
.byte 8 # Address Size (in bytes)
.long 0 # Offset Into Abbrev. Section
.quad DWO_ID # DWO id
.byte 1 # Abbrev [1] DW_TAG_compile_unit
.asciz "Hand-written DWARF" # DW_AT_producer
.short 12 # DW_AT_language
.byte 2 # Abbrev [2] DW_TAG_enumeration_type
.asciz "my_enum_type" # DW_AT_name
.long .Lint-.Lcu_begin1 # DW_AT_type
.byte 4 # DW_AT_byte_size
.Lint:
.byte 4 # Abbrev [4] DW_TAG_base_type
.asciz "int" # DW_AT_name
.byte 5 # DW_AT_encoding
.byte 4 # DW_AT_byte_size
.byte 0 # End Of Children Mark
.Ldebug_info_end1:
.LCUEnd:
.section .debug_cu_index, "", @progbits
## Header:
.short 5 # Version
.short 0 # Padding
.long 2 # Section count
.long 1 # Unit count
.long 2 # Slot count
## Hash Table of Signatures:
.quad 0
.quad DWO_ID
## Parallel Table of Indexes:
.long 0
.long 1
## Table of Section Offsets:
## Row 0:
.long 1 # DW_SECT_INFO
.long 3 # DW_SECT_ABBREV
## Row 1:
.long 0 # Offset in .debug_info.dwo
.long 0 # Offset in .debug_abbrev.dwo
## Table of Section Sizes:
.long .LCUEnd-.LCUBegin # Size in .debug_info.dwo
.long .LAbbrevEnd-.LAbbrevBegin # Size in .debug_abbrev.dwo
.endif

0 comments on commit 57a19ac

Please sign in to comment.