From d333a0de6829616427182b26923b14d779ce1dbb Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Tue, 30 Apr 2024 22:23:10 +0800 Subject: [PATCH] Revert "[Modules] No transitive source location change (#86912)" This reverts commit 6c3110464bac3600685af9650269b0b2b8669d34. Required by the post commit comments: https://github.com/llvm/llvm-project/pull/86912 --- clang/include/clang/Basic/SourceLocation.h | 1 - .../include/clang/Serialization/ASTBitCodes.h | 56 +++++++----- clang/include/clang/Serialization/ASTReader.h | 48 ++++------ clang/include/clang/Serialization/ASTWriter.h | 4 - .../include/clang/Serialization/ModuleFile.h | 14 +-- .../Serialization/SourceLocationEncoding.h | 91 ++++++------------- clang/lib/Frontend/ASTUnit.cpp | 2 + clang/lib/Serialization/ASTReader.cpp | 57 ++++++++---- clang/lib/Serialization/ASTReaderDecl.cpp | 2 +- clang/lib/Serialization/ASTWriter.cpp | 41 ++------- clang/lib/Serialization/ASTWriterDecl.cpp | 8 +- clang/lib/Serialization/ModuleFile.cpp | 1 + .../no-transitive-source-location-change.cppm | 87 ------------------ clang/test/Modules/pr61067.cppm | 25 +++++ .../SourceLocationEncodingTest.cpp | 12 +-- 15 files changed, 162 insertions(+), 287 deletions(-) delete mode 100644 clang/test/Modules/no-transitive-source-location-change.cppm diff --git a/clang/include/clang/Basic/SourceLocation.h b/clang/include/clang/Basic/SourceLocation.h index 7a0f5ba8d1270b..00b1e0fa855b7a 100644 --- a/clang/include/clang/Basic/SourceLocation.h +++ b/clang/include/clang/Basic/SourceLocation.h @@ -90,7 +90,6 @@ class SourceLocation { friend class ASTWriter; friend class SourceManager; friend struct llvm::FoldingSetTrait; - friend class SourceLocationEncoding; public: using UIntTy = uint32_t; diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 93e971d7e142c3..a8df5a0bda0850 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -23,7 +23,6 @@ #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/OperatorKinds.h" #include "clang/Basic/SourceLocation.h" -#include "clang/Serialization/SourceLocationEncoding.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/Bitstream/BitCodes.h" #include @@ -168,38 +167,45 @@ const unsigned int NUM_PREDEF_SUBMODULE_IDS = 1; /// Source range/offset of a preprocessed entity. struct PPEntityOffset { - using RawLocEncoding = SourceLocationEncoding::RawLocEncoding; - /// Raw source location of beginning of range. - RawLocEncoding Begin; + SourceLocation::UIntTy Begin; /// Raw source location of end of range. - RawLocEncoding End; + SourceLocation::UIntTy End; /// Offset in the AST file relative to ModuleFile::MacroOffsetsBase. uint32_t BitOffset; - PPEntityOffset(RawLocEncoding Begin, RawLocEncoding End, uint32_t BitOffset) - : Begin(Begin), End(End), BitOffset(BitOffset) {} + PPEntityOffset(SourceRange R, uint32_t BitOffset) + : Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()), + BitOffset(BitOffset) {} + + SourceLocation getBegin() const { + return SourceLocation::getFromRawEncoding(Begin); + } - RawLocEncoding getBegin() const { return Begin; } - RawLocEncoding getEnd() const { return End; } + SourceLocation getEnd() const { + return SourceLocation::getFromRawEncoding(End); + } }; /// Source range of a skipped preprocessor region struct PPSkippedRange { - using RawLocEncoding = SourceLocationEncoding::RawLocEncoding; - /// Raw source location of beginning of range. - RawLocEncoding Begin; + SourceLocation::UIntTy Begin; /// Raw source location of end of range. - RawLocEncoding End; + SourceLocation::UIntTy End; - PPSkippedRange(RawLocEncoding Begin, RawLocEncoding End) - : Begin(Begin), End(End) {} + PPSkippedRange(SourceRange R) + : Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()) { + } - RawLocEncoding getBegin() const { return Begin; } - RawLocEncoding getEnd() const { return End; } + SourceLocation getBegin() const { + return SourceLocation::getFromRawEncoding(Begin); + } + SourceLocation getEnd() const { + return SourceLocation::getFromRawEncoding(End); + } }; /// Offset in the AST file. Use splitted 64-bit integer into low/high @@ -225,10 +231,8 @@ struct UnderalignedInt64 { /// Source location and bit offset of a declaration. struct DeclOffset { - using RawLocEncoding = SourceLocationEncoding::RawLocEncoding; - /// Raw source location. - RawLocEncoding RawLoc = 0; + SourceLocation::UIntTy Loc = 0; /// Offset relative to the start of the DECLTYPES_BLOCK block. Keep /// structure alignment 32-bit and avoid padding gap because undefined @@ -236,15 +240,17 @@ struct DeclOffset { UnderalignedInt64 BitOffset; DeclOffset() = default; - DeclOffset(RawLocEncoding RawLoc, uint64_t BitOffset, - uint64_t DeclTypesBlockStartOffset) - : RawLoc(RawLoc) { + DeclOffset(SourceLocation Loc, uint64_t BitOffset, + uint64_t DeclTypesBlockStartOffset) { + setLocation(Loc); setBitOffset(BitOffset, DeclTypesBlockStartOffset); } - void setRawLoc(RawLocEncoding Loc) { RawLoc = Loc; } + void setLocation(SourceLocation L) { Loc = L.getRawEncoding(); } - RawLocEncoding getRawLoc() const { return RawLoc; } + SourceLocation getLocation() const { + return SourceLocation::getFromRawEncoding(Loc); + } void setBitOffset(uint64_t Offset, const uint64_t DeclTypesBlockStartOffset) { BitOffset.setBitOffset(Offset - DeclTypesBlockStartOffset); diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index e24fa121528f3f..64f1ebc117b327 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -1771,7 +1771,6 @@ class ASTReader /// Retrieve the module manager. ModuleManager &getModuleManager() { return ModuleMgr; } - const ModuleManager &getModuleManager() const { return ModuleMgr; } /// Retrieve the preprocessor. Preprocessor &getPreprocessor() const { return PP; } @@ -2178,8 +2177,8 @@ class ASTReader /// Retrieve the global submodule ID given a module and its local ID /// number. - serialization::SubmoduleID getGlobalSubmoduleID(ModuleFile &M, - unsigned LocalID) const; + serialization::SubmoduleID + getGlobalSubmoduleID(ModuleFile &M, unsigned LocalID); /// Retrieve the submodule that corresponds to a global submodule ID. /// @@ -2192,7 +2191,7 @@ class ASTReader /// Retrieve the module file with a given local ID within the specified /// ModuleFile. - ModuleFile *getLocalModuleFile(ModuleFile &M, unsigned ID) const; + ModuleFile *getLocalModuleFile(ModuleFile &M, unsigned ID); /// Get an ID for the given module file. unsigned getModuleFileID(ModuleFile *M); @@ -2228,46 +2227,33 @@ class ASTReader return Sema::AlignPackInfo::getFromRawEncoding(Raw); } - using RawLocEncoding = SourceLocationEncoding::RawLocEncoding; - /// Read a source location from raw form and return it in its /// originating module file's source location space. - std::pair - ReadUntranslatedSourceLocation(RawLocEncoding Raw, - LocSeq *Seq = nullptr) const { + SourceLocation ReadUntranslatedSourceLocation(SourceLocation::UIntTy Raw, + LocSeq *Seq = nullptr) const { return SourceLocationEncoding::decode(Raw, Seq); } /// Read a source location from raw form. - SourceLocation ReadSourceLocation(ModuleFile &MF, RawLocEncoding Raw, + SourceLocation ReadSourceLocation(ModuleFile &ModuleFile, + SourceLocation::UIntTy Raw, LocSeq *Seq = nullptr) const { - if (!MF.ModuleOffsetMap.empty()) - ReadModuleOffsetMap(MF); - - auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq); - ModuleFile *OwningModuleFile = - ModuleFileIndex == 0 ? &MF : MF.DependentModules[ModuleFileIndex - 1]; - - assert(!SourceMgr.isLoadedSourceLocation(Loc) && - "Run out source location space"); - - return TranslateSourceLocation(*OwningModuleFile, Loc); + SourceLocation Loc = ReadUntranslatedSourceLocation(Raw, Seq); + return TranslateSourceLocation(ModuleFile, Loc); } /// Translate a source location from another module file's source /// location space into ours. SourceLocation TranslateSourceLocation(ModuleFile &ModuleFile, SourceLocation Loc) const { - if (Loc.isInvalid()) - return Loc; - - // FIXME: TranslateSourceLocation is not re-enterable. It is problematic - // to call TranslateSourceLocation on a translated source location. - // We either need a method to know whether or not a source location is - // translated or refactor the code to make it clear that - // TranslateSourceLocation won't be called with translated source location. - - return Loc.getLocWithOffset(ModuleFile.SLocEntryBaseOffset - 2); + if (!ModuleFile.ModuleOffsetMap.empty()) + ReadModuleOffsetMap(ModuleFile); + assert(ModuleFile.SLocRemap.find(Loc.getOffset()) != + ModuleFile.SLocRemap.end() && + "Cannot find offset to remap."); + SourceLocation::IntTy Remap = + ModuleFile.SLocRemap.find(Loc.getOffset())->second; + return Loc.getLocWithOffset(Remap); } /// Read a source location. diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index cfcd787530d0e9..436acbb1803ca7 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -667,10 +667,6 @@ class ASTWriter : public ASTDeserializationListener, void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record, LocSeq *Seq = nullptr); - /// Return the raw encodings for source locations. - SourceLocationEncoding::RawLocEncoding - getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq = nullptr); - /// Emit a source range. void AddSourceRange(SourceRange Range, RecordDataImpl &Record, LocSeq *Seq = nullptr); diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h index ba8df0ed259427..25f644e76edb1a 100644 --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -295,6 +295,10 @@ class ModuleFile { /// AST file. const uint32_t *SLocEntryOffsets = nullptr; + /// Remapping table for source locations in this module. + ContinuousRangeMap + SLocRemap; + // === Identifiers === /// The number of identifiers in this AST file. @@ -508,17 +512,9 @@ class ModuleFile { /// List of modules which depend on this module llvm::SetVector ImportedBy; - /// List of modules which this module directly imported + /// List of modules which this module depends on llvm::SetVector Imports; - /// List of modules which this modules dependent on. Different - /// from `Imports`, this includes indirectly imported modules too. - /// The order of DependentModules is significant. It should keep - /// the same order with that module file manager when we write - /// the current module file. The value of the member will be initialized - /// in `ASTReader::ReadModuleOffsetMap`. - llvm::SmallVector DependentModules; - /// Determine whether this module was directly imported at /// any point during translation. bool isDirectlyImported() const { return DirectlyImported; } diff --git a/clang/include/clang/Serialization/SourceLocationEncoding.h b/clang/include/clang/Serialization/SourceLocationEncoding.h index 33ca1728fa4797..9bb0dbe2e4d6f6 100644 --- a/clang/include/clang/Serialization/SourceLocationEncoding.h +++ b/clang/include/clang/Serialization/SourceLocationEncoding.h @@ -6,33 +6,28 @@ // //===----------------------------------------------------------------------===// // -// We wish to encode the SourceLocation from other module file not dependent -// on the other module file. So that the source location changes from other -// module file may not affect the contents of the current module file. Then the -// users don't need to recompile the whole project due to a new line in a module -// unit in the root of the dependency graph. +// Source locations are stored pervasively in the AST, making up a third of +// the size of typical serialized files. Storing them efficiently is important. // -// To achieve this, we need to encode the index of the module file into the -// encoding of the source location. The encoding of the source location may be: +// We use integers optimized by VBR-encoding, because: +// - when abbreviations cannot be used, VBR6 encoding is our only choice +// - in the worst case a SourceLocation can be ~any 32-bit number, but in +// practice they are highly predictable // -// |-----------------------|-----------------------| -// | A | B | C | -// -// * A: 32 bit. The index of the module file in the module manager + 1. The +1 -// here is necessary since we wish 0 stands for the current module file. -// * B: 31 bit. The offset of the source location to the module file containing -// it. -// * C: The macro bit. We rotate it to the lowest bit so that we can save some -// space in case the index of the module file is 0. -// -// Specially, if the index of the module file is 0, we allow to encode a -// sequence of locations we store only differences between successive elements. +// We encode the integer so that likely values encode as small numbers that +// turn into few VBR chunks: +// - the invalid sentinel location is a very common value: it encodes as 0 +// - the "macro or not" bit is stored at the bottom of the integer +// (rather than at the top, as in memory), so macro locations can have +// small representations. +// - related locations (e.g. of a left and right paren pair) are usually +// similar, so when encoding a sequence of locations we store only +// differences between successive elements. // //===----------------------------------------------------------------------===// -#include "clang/Basic/SourceLocation.h" -#include "llvm/Support/MathExtras.h" #include +#include "clang/Basic/SourceLocation.h" #ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H #define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H @@ -57,13 +52,9 @@ class SourceLocationEncoding { friend SourceLocationSequence; public: - using RawLocEncoding = uint64_t; - - static RawLocEncoding encode(SourceLocation Loc, UIntTy BaseOffset, - unsigned BaseModuleFileIndex, - SourceLocationSequence * = nullptr); - static std::pair - decode(RawLocEncoding, SourceLocationSequence * = nullptr); + static uint64_t encode(SourceLocation Loc, + SourceLocationSequence * = nullptr); + static SourceLocation decode(uint64_t, SourceLocationSequence * = nullptr); }; /// Serialized encoding of a sequence of SourceLocations. @@ -158,44 +149,14 @@ class SourceLocationSequence::State { operator SourceLocationSequence *() { return &Seq; } }; -inline SourceLocationEncoding::RawLocEncoding -SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset, - unsigned BaseModuleFileIndex, - SourceLocationSequence *Seq) { - // If the source location is a local source location, we can try to optimize - // the similar sequences to only record the differences. - if (!BaseOffset) - return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding()); - - if (Loc.isInvalid()) - return 0; - - // Otherwise, the higher bits are used to store the module file index, - // so it is meaningless to optimize the source locations into small - // integers. Let's try to always use the raw encodings. - assert(Loc.getOffset() >= BaseOffset); - Loc = Loc.getLocWithOffset(-BaseOffset); - RawLocEncoding Encoded = encodeRaw(Loc.getRawEncoding()); - - // 16 bits should be sufficient to store the module file index. - assert(BaseModuleFileIndex < (1 << 16)); - Encoded |= (RawLocEncoding)BaseModuleFileIndex << 32; - return Encoded; +inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc, + SourceLocationSequence *Seq) { + return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding()); } -inline std::pair -SourceLocationEncoding::decode(RawLocEncoding Encoded, - SourceLocationSequence *Seq) { - unsigned ModuleFileIndex = Encoded >> 32; - - if (!ModuleFileIndex) - return {Seq ? Seq->decode(Encoded) - : SourceLocation::getFromRawEncoding(decodeRaw(Encoded)), - ModuleFileIndex}; - - Encoded &= llvm::maskTrailingOnes(32); - SourceLocation Loc = SourceLocation::getFromRawEncoding(decodeRaw(Encoded)); - - return {Loc, ModuleFileIndex}; +inline SourceLocation +SourceLocationEncoding::decode(uint64_t Encoded, SourceLocationSequence *Seq) { + return Seq ? Seq->decode(Encoded) + : SourceLocation::getFromRawEncoding(decodeRaw(Encoded)); } } // namespace clang diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp index 755aaddc0ad747..1b93588553a276 100644 --- a/clang/lib/Frontend/ASTUnit.cpp +++ b/clang/lib/Frontend/ASTUnit.cpp @@ -2374,6 +2374,8 @@ bool ASTUnit::serialize(raw_ostream &OS) { return serializeUnit(Writer, Buffer, getSema(), OS); } +using SLocRemap = ContinuousRangeMap; + void ASTUnit::TranslateStoredDiagnostics( FileManager &FileMgr, SourceManager &SrcMgr, diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index a670f2a2af41c7..0ef57a3ea804ef 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3030,10 +3030,8 @@ ASTReader::ReadControlBlock(ModuleFile &F, // The import location will be the local one for now; we will adjust // all import locations of module imports after the global source // location info are setup, in ReadAST. - auto [ImportLoc, ImportModuleFileIndex] = + SourceLocation ImportLoc = ReadUntranslatedSourceLocation(Record[Idx++]); - // The import location must belong to the current module file itself. - assert(ImportModuleFileIndex == 0); off_t StoredSize = !IsImportingStdCXXModule ? (off_t)Record[Idx++] : 0; time_t StoredModTime = !IsImportingStdCXXModule ? (time_t)Record[Idx++] : 0; @@ -3655,6 +3653,13 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, std::make_pair(SourceManager::MaxLoadedOffset - F.SLocEntryBaseOffset - SLocSpaceSize,&F)); + // Initialize the remapping table. + // Invalid stays invalid. + F.SLocRemap.insertOrReplace(std::make_pair(0U, 0)); + // This module. Base was 2 when being compiled. + F.SLocRemap.insertOrReplace(std::make_pair( + 2U, static_cast(F.SLocEntryBaseOffset - 2))); + TotalNumSLocEntries += F.LocalNumSLocEntries; break; } @@ -4042,7 +4047,18 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { const unsigned char *DataEnd = Data + F.ModuleOffsetMap.size(); F.ModuleOffsetMap = StringRef(); + // If we see this entry before SOURCE_LOCATION_OFFSETS, add placeholders. + if (F.SLocRemap.find(0) == F.SLocRemap.end()) { + F.SLocRemap.insert(std::make_pair(0U, 0)); + F.SLocRemap.insert(std::make_pair(2U, 1)); + } + + // Continuous range maps we may be updating in our module. + using SLocRemapBuilder = + ContinuousRangeMap::Builder; using RemapBuilder = ContinuousRangeMap::Builder; + SLocRemapBuilder SLocRemap(F.SLocRemap); RemapBuilder IdentifierRemap(F.IdentifierRemap); RemapBuilder MacroRemap(F.MacroRemap); RemapBuilder PreprocessedEntityRemap(F.PreprocessedEntityRemap); @@ -4051,9 +4067,6 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { RemapBuilder DeclRemap(F.DeclRemap); RemapBuilder TypeRemap(F.TypeRemap); - auto &ImportedModuleVector = F.DependentModules; - assert(ImportedModuleVector.empty()); - while (Data < DataEnd) { // FIXME: Looking up dependency modules by filename is horrible. Let's // start fixing this with prebuilt, explicit and implicit modules and see @@ -4069,14 +4082,15 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { ? ModuleMgr.lookupByModuleName(Name) : ModuleMgr.lookupByFileName(Name)); if (!OM) { - std::string Msg = "refers to unknown module, cannot find "; + std::string Msg = + "SourceLocation remap refers to unknown module, cannot find "; Msg.append(std::string(Name)); Error(Msg); return; } - ImportedModuleVector.push_back(OM); - + SourceLocation::UIntTy SLocOffset = + endian::readNext(Data); uint32_t IdentifierIDOffset = endian::readNext(Data); uint32_t MacroIDOffset = @@ -4100,6 +4114,13 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { static_cast(BaseOffset - Offset))); }; + constexpr SourceLocation::UIntTy SLocNone = + std::numeric_limits::max(); + if (SLocOffset != SLocNone) + SLocRemap.insert(std::make_pair( + SLocOffset, static_cast( + OM->SLocEntryBaseOffset - SLocOffset))); + mapOffset(IdentifierIDOffset, OM->BaseIdentifierID, IdentifierRemap); mapOffset(MacroIDOffset, OM->BaseMacroID, MacroRemap); mapOffset(PreprocessedEntityIDOffset, OM->BasePreprocessedEntityID, @@ -6235,8 +6256,8 @@ SourceRange ASTReader::ReadSkippedRange(unsigned GlobalIndex) { unsigned LocalIndex = GlobalIndex - M->BasePreprocessedSkippedRangeID; assert(LocalIndex < M->NumPreprocessedSkippedRanges); PPSkippedRange RawRange = M->PreprocessedSkippedRangeOffsets[LocalIndex]; - SourceRange Range(ReadSourceLocation(*M, RawRange.getBegin()), - ReadSourceLocation(*M, RawRange.getEnd())); + SourceRange Range(TranslateSourceLocation(*M, RawRange.getBegin()), + TranslateSourceLocation(*M, RawRange.getEnd())); assert(Range.isValid()); return Range; } @@ -6272,8 +6293,8 @@ PreprocessedEntity *ASTReader::ReadPreprocessedEntity(unsigned Index) { return nullptr; // Read the record. - SourceRange Range(ReadSourceLocation(M, PPOffs.getBegin()), - ReadSourceLocation(M, PPOffs.getEnd())); + SourceRange Range(TranslateSourceLocation(M, PPOffs.getBegin()), + TranslateSourceLocation(M, PPOffs.getEnd())); PreprocessingRecord &PPRec = *PP.getPreprocessingRecord(); StringRef Blob; RecordData Record; @@ -6385,7 +6406,7 @@ struct PPEntityComp { } SourceLocation getLoc(const PPEntityOffset &PPE) const { - return Reader.ReadSourceLocation(M, PPE.getBegin()); + return Reader.TranslateSourceLocation(M, PPE.getBegin()); } }; @@ -6429,7 +6450,7 @@ PreprocessedEntityID ASTReader::findPreprocessedEntity(SourceLocation Loc, PPI = First; std::advance(PPI, Half); if (SourceMgr.isBeforeInTranslationUnit( - ReadSourceLocation(M, PPI->getEnd()), Loc)) { + TranslateSourceLocation(M, PPI->getEnd()), Loc)) { First = PPI; ++First; Count = Count - Half - 1; @@ -6470,7 +6491,7 @@ std::optional ASTReader::isPreprocessedEntityInFileID(unsigned Index, unsigned LocalIndex = PPInfo.second; const PPEntityOffset &PPOffs = M.PreprocessedEntityOffsets[LocalIndex]; - SourceLocation Loc = ReadSourceLocation(M, PPOffs.getBegin()); + SourceLocation Loc = TranslateSourceLocation(M, PPOffs.getBegin()); if (Loc.isInvalid()) return false; @@ -8943,7 +8964,7 @@ MacroID ASTReader::getGlobalMacroID(ModuleFile &M, unsigned LocalID) { } serialization::SubmoduleID -ASTReader::getGlobalSubmoduleID(ModuleFile &M, unsigned LocalID) const { +ASTReader::getGlobalSubmoduleID(ModuleFile &M, unsigned LocalID) { if (LocalID < NUM_PREDEF_SUBMODULE_IDS) return LocalID; @@ -8976,7 +8997,7 @@ Module *ASTReader::getModule(unsigned ID) { return getSubmodule(ID); } -ModuleFile *ASTReader::getLocalModuleFile(ModuleFile &M, unsigned ID) const { +ModuleFile *ASTReader::getLocalModuleFile(ModuleFile &M, unsigned ID) { if (ID & 1) { // It's a module, look it up by submodule ID. auto I = GlobalSubmoduleMap.find(getGlobalSubmoduleID(M, ID >> 1)); diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 40b543f2b72462..744f11de88c2f8 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -3249,7 +3249,7 @@ ASTReader::RecordLocation ASTReader::DeclCursorForID(GlobalDeclID ID, ModuleFile *M = I->second; const DeclOffset &DOffs = M->DeclOffsets[ID.get() - M->BaseDeclID - NUM_PREDEF_DECL_IDS]; - Loc = ReadSourceLocation(*M, DOffs.getRawLoc()); + Loc = TranslateSourceLocation(*M, DOffs.getLocation()); return RecordLocation(M, DOffs.getBitOffset(M->DeclsBlockStartOffset)); } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index c3fcd1a4df2368..c638c767216d94 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -2703,10 +2703,8 @@ void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec, uint64_t Offset = Stream.GetCurrentBitNo() - MacroOffsetsBase; assert((Offset >> 32) == 0 && "Preprocessed entity offset too large"); - SourceRange R = getAdjustedRange((*E)->getSourceRange()); - PreprocessedEntityOffsets.emplace_back( - getRawSourceLocationEncoding(R.getBegin()), - getRawSourceLocationEncoding(R.getEnd()), Offset); + PreprocessedEntityOffsets.push_back( + PPEntityOffset(getAdjustedRange((*E)->getSourceRange()), Offset)); if (auto *MD = dyn_cast(*E)) { // Record this macro definition's ID. @@ -2773,9 +2771,7 @@ void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec, std::vector SerializedSkippedRanges; SerializedSkippedRanges.reserve(SkippedRanges.size()); for (auto const& Range : SkippedRanges) - SerializedSkippedRanges.emplace_back( - getRawSourceLocationEncoding(Range.getBegin()), - getRawSourceLocationEncoding(Range.getEnd())); + SerializedSkippedRanges.emplace_back(Range); using namespace llvm; auto Abbrev = std::make_shared(); @@ -2941,8 +2937,8 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) { ParentID = SubmoduleIDs[Mod->Parent]; } - SourceLocationEncoding::RawLocEncoding DefinitionLoc = - getRawSourceLocationEncoding(getAdjustedLocation(Mod->DefinitionLoc)); + uint64_t DefinitionLoc = + SourceLocationEncoding::encode(getAdjustedLocation(Mod->DefinitionLoc)); // Emit the definition of the block. { @@ -5358,6 +5354,7 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, // These values should be unique within a chain, since they will be read // as keys into ContinuousRangeMaps. + writeBaseIDOrNone(M.SLocEntryBaseOffset, M.LocalNumSLocEntries); writeBaseIDOrNone(M.BaseIdentifierID, M.LocalNumIdentifiers); writeBaseIDOrNone(M.BaseMacroID, M.LocalNumMacros); writeBaseIDOrNone(M.BasePreprocessedEntityID, @@ -5850,34 +5847,10 @@ void ASTWriter::AddFileID(FileID FID, RecordDataImpl &Record) { Record.push_back(getAdjustedFileID(FID).getOpaqueValue()); } -SourceLocationEncoding::RawLocEncoding -ASTWriter::getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq) { - unsigned BaseOffset = 0; - unsigned ModuleFileIndex = 0; - - // See SourceLocationEncoding.h for the encoding details. - if (Context->getSourceManager().isLoadedSourceLocation(Loc) && - Loc.isValid()) { - assert(getChain()); - auto SLocMapI = getChain()->GlobalSLocOffsetMap.find( - SourceManager::MaxLoadedOffset - Loc.getOffset() - 1); - assert(SLocMapI != getChain()->GlobalSLocOffsetMap.end() && - "Corrupted global sloc offset map"); - ModuleFile *F = SLocMapI->second; - BaseOffset = F->SLocEntryBaseOffset - 2; - // 0 means the location is not loaded. So we need to add 1 to the index to - // make it clear. - ModuleFileIndex = F->Index + 1; - assert(&getChain()->getModuleManager()[F->Index] == F); - } - - return SourceLocationEncoding::encode(Loc, BaseOffset, ModuleFileIndex, Seq); -} - void ASTWriter::AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record, SourceLocationSequence *Seq) { Loc = getAdjustedLocation(Loc); - Record.push_back(getRawSourceLocationEncoding(Loc, Seq)); + Record.push_back(SourceLocationEncoding::encode(Loc, Seq)); } void ASTWriter::AddSourceRange(SourceRange Range, RecordDataImpl &Record, diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 6201d284f0e025..0edc4feda3ef23 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -2809,16 +2809,14 @@ void ASTWriter::WriteDecl(ASTContext &Context, Decl *D) { // Record the offset for this declaration SourceLocation Loc = D->getLocation(); - SourceLocationEncoding::RawLocEncoding RawLoc = - getRawSourceLocationEncoding(getAdjustedLocation(Loc)); - unsigned Index = ID.get() - FirstDeclID.get(); if (DeclOffsets.size() == Index) - DeclOffsets.emplace_back(RawLoc, Offset, DeclTypesBlockStartOffset); + DeclOffsets.emplace_back(getAdjustedLocation(Loc), Offset, + DeclTypesBlockStartOffset); else if (DeclOffsets.size() < Index) { // FIXME: Can/should this happen? DeclOffsets.resize(Index+1); - DeclOffsets[Index].setRawLoc(RawLoc); + DeclOffsets[Index].setLocation(getAdjustedLocation(Loc)); DeclOffsets[Index].setBitOffset(Offset, DeclTypesBlockStartOffset); } else { llvm_unreachable("declarations should be emitted in ID order"); diff --git a/clang/lib/Serialization/ModuleFile.cpp b/clang/lib/Serialization/ModuleFile.cpp index 2c42d33a8f5dd3..db896fd361159d 100644 --- a/clang/lib/Serialization/ModuleFile.cpp +++ b/clang/lib/Serialization/ModuleFile.cpp @@ -59,6 +59,7 @@ LLVM_DUMP_METHOD void ModuleFile::dump() { // Remapping tables. llvm::errs() << " Base source location offset: " << SLocEntryBaseOffset << '\n'; + dumpLocalRemap("Source location offset local -> global map", SLocRemap); llvm::errs() << " Base identifier ID: " << BaseIdentifierID << '\n' << " Number of identifiers: " << LocalNumIdentifiers << '\n'; diff --git a/clang/test/Modules/no-transitive-source-location-change.cppm b/clang/test/Modules/no-transitive-source-location-change.cppm deleted file mode 100644 index 303142a1af890b..00000000000000 --- a/clang/test/Modules/no-transitive-source-location-change.cppm +++ /dev/null @@ -1,87 +0,0 @@ -// Testing that adding a new line in a module interface unit won't cause the BMI -// of consuming module unit changes. -// -// RUN: rm -rf %t -// RUN: split-file %s %t -// -// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm -// RUN: %clang_cc1 -std=c++20 %t/A.v1.cppm -emit-module-interface -o %t/A.v1.pcm -// -// The BMI may not be the same since the source location differs. -// RUN: not diff %t/A.pcm %t/A.v1.pcm &> /dev/null -// -// The BMI of B shouldn't change since all the locations remain the same. -// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-module-interface -fmodule-file=A=%t/A.pcm \ -// RUN: -o %t/B.pcm -// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-module-interface -fmodule-file=A=%t/A.v1.pcm \ -// RUN: -o %t/B.v1.pcm -// RUN: diff %t/B.v1.pcm %t/B.pcm &> /dev/null -// -// The BMI of C may change since the locations for instantiations changes. -// RUN: %clang_cc1 -std=c++20 %t/C.cppm -emit-module-interface -fmodule-file=A=%t/A.pcm \ -// RUN: -o %t/C.pcm -// RUN: %clang_cc1 -std=c++20 %t/C.cppm -emit-module-interface -fmodule-file=A=%t/A.v1.pcm \ -// RUN: -o %t/C.v1.pcm -// RUN: not diff %t/C.v1.pcm %t/C.pcm &> /dev/null -// -// Test again with reduced BMI. -// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm -// RUN: %clang_cc1 -std=c++20 %t/A.v1.cppm -emit-reduced-module-interface -o %t/A.v1.pcm -// -// The BMI may not be the same since the source location differs. -// RUN: not diff %t/A.pcm %t/A.v1.pcm &> /dev/null -// -// The BMI of B shouldn't change since all the locations remain the same. -// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-reduced-module-interface -fmodule-file=A=%t/A.pcm \ -// RUN: -o %t/B.pcm -// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-reduced-module-interface -fmodule-file=A=%t/A.v1.pcm \ -// RUN: -o %t/B.v1.pcm -// RUN: diff %t/B.v1.pcm %t/B.pcm &> /dev/null -// -// The BMI of C may change since the locations for instantiations changes. -// RUN: %clang_cc1 -std=c++20 %t/C.cppm -emit-reduced-module-interface -fmodule-file=A=%t/A.pcm \ -// RUN: -o %t/C.pcm -// RUN: %clang_cc1 -std=c++20 %t/C.cppm -emit-reduced-module-interface -fmodule-file=A=%t/A.v1.pcm \ -// RUN: -o %t/C.v1.pcm -// RUN: not diff %t/C.v1.pcm %t/C.pcm &> /dev/null - -//--- A.cppm -export module A; -export template -struct C { - T func() { - return T(43); - } -}; -export int funcA() { - return 43; -} - -//--- A.v1.cppm -export module A; - -export template -struct C { - T func() { - return T(43); - } -}; -export int funcA() { - return 43; -} - -//--- B.cppm -export module B; -import A; - -export int funcB() { - return funcA(); -} - -//--- C.cppm -export module C; -import A; -export inline void testD() { - C c; - c.func(); -} diff --git a/clang/test/Modules/pr61067.cppm b/clang/test/Modules/pr61067.cppm index f853491fe76bf7..b7f9d22e253854 100644 --- a/clang/test/Modules/pr61067.cppm +++ b/clang/test/Modules/pr61067.cppm @@ -9,6 +9,22 @@ // RUN: -emit-module-interface -fmodule-file=a=%t/a.pcm -o %t/b.pcm // RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.pcm -S \ // RUN: -emit-llvm -fmodule-file=a=%t/a.pcm -disable-llvm-passes -o - | FileCheck %t/b.cppm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/c.cpp -fmodule-file=a=%t/a.pcm \ +// RUN: -S -emit-llvm -disable-llvm-passes -o - | FileCheck %t/c.cpp + +// Test again with reduced BMI +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/a.cppm \ +// RUN: -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.cppm \ +// RUN: -emit-module-interface -fmodule-file=a=%t/a.pcm -o %t/b.pcm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.pcm -S \ +// RUN: -emit-llvm -fmodule-file=a=%t/a.pcm -disable-llvm-passes -o - | FileCheck %t/b.cppm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/c.cpp -fmodule-file=a=%t/a.pcm \ +// RUN: -S -emit-llvm -disable-llvm-passes -o - | FileCheck %t/c.cpp //--- a.cppm export module a; @@ -27,3 +43,12 @@ void b() { } // CHECK: define{{.*}}linkonce_odr{{.*}}@_ZW1aeqS_1aS0_( + +//--- c.cpp +import a; + +int c() { + (void)(a() == a()); +} + +// CHECK: define{{.*}}linkonce_odr{{.*}}@_ZW1aeqS_1aS0_( diff --git a/clang/unittests/Serialization/SourceLocationEncodingTest.cpp b/clang/unittests/Serialization/SourceLocationEncodingTest.cpp index c80a8fd0e52b17..141da4c27f8d0b 100644 --- a/clang/unittests/Serialization/SourceLocationEncodingTest.cpp +++ b/clang/unittests/Serialization/SourceLocationEncodingTest.cpp @@ -23,14 +23,13 @@ using LocSeq = SourceLocationSequence; // Loc is the raw (in-memory) form of SourceLocation. void roundTrip(SourceLocation::UIntTy Loc, std::optional ExpectedEncoded = std::nullopt) { - uint64_t ActualEncoded = SourceLocationEncoding::encode( - SourceLocation::getFromRawEncoding(Loc), /*BaseOffset=*/0, - /*BaseModuleFileIndex=*/0); + uint64_t ActualEncoded = + SourceLocationEncoding::encode(SourceLocation::getFromRawEncoding(Loc)); if (ExpectedEncoded) { ASSERT_EQ(ActualEncoded, *ExpectedEncoded) << "Encoding " << Loc; } SourceLocation::UIntTy DecodedEncoded = - SourceLocationEncoding::decode(ActualEncoded).first.getRawEncoding(); + SourceLocationEncoding::decode(ActualEncoded).getRawEncoding(); ASSERT_EQ(DecodedEncoded, Loc) << "Decoding " << ActualEncoded; } @@ -42,8 +41,7 @@ void roundTrip(std::vector Locs, LocSeq::State Seq; for (auto L : Locs) ActualEncoded.push_back(SourceLocationEncoding::encode( - SourceLocation::getFromRawEncoding(L), /*BaseOffset=*/0, - /*BaseModuleFileIndex=*/0, Seq)); + SourceLocation::getFromRawEncoding(L), Seq)); if (!ExpectedEncoded.empty()) { ASSERT_EQ(ActualEncoded, ExpectedEncoded) << "Encoding " << testing::PrintToString(Locs); @@ -53,7 +51,7 @@ void roundTrip(std::vector Locs, { LocSeq::State Seq; for (auto L : ActualEncoded) { - SourceLocation Loc = SourceLocationEncoding::decode(L, Seq).first; + SourceLocation Loc = SourceLocationEncoding::decode(L, Seq); DecodedEncoded.push_back(Loc.getRawEncoding()); } ASSERT_EQ(DecodedEncoded, Locs)