Skip to content

Commit

Permalink
Revert "[Modules] No transitive source location change (llvm#86912)"
Browse files Browse the repository at this point in the history
This reverts commit 6c31104.

Required by the post commit comments: llvm#86912
  • Loading branch information
ChuanqiXu9 committed Apr 30, 2024
1 parent c12bc57 commit d333a0d
Show file tree
Hide file tree
Showing 15 changed files with 162 additions and 287 deletions.
1 change: 0 additions & 1 deletion clang/include/clang/Basic/SourceLocation.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ class SourceLocation {
friend class ASTWriter;
friend class SourceManager;
friend struct llvm::FoldingSetTrait<SourceLocation, void>;
friend class SourceLocationEncoding;

public:
using UIntTy = uint32_t;
Expand Down
56 changes: 31 additions & 25 deletions clang/include/clang/Serialization/ASTBitCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/OperatorKinds.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Serialization/SourceLocationEncoding.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/Bitstream/BitCodes.h"
#include <cassert>
Expand Down Expand Up @@ -168,38 +167,45 @@ const unsigned int NUM_PREDEF_SUBMODULE_IDS = 1;

/// Source range/offset of a preprocessed entity.
struct PPEntityOffset {
using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;

/// Raw source location of beginning of range.
RawLocEncoding Begin;
SourceLocation::UIntTy Begin;

/// Raw source location of end of range.
RawLocEncoding End;
SourceLocation::UIntTy End;

/// Offset in the AST file relative to ModuleFile::MacroOffsetsBase.
uint32_t BitOffset;

PPEntityOffset(RawLocEncoding Begin, RawLocEncoding End, uint32_t BitOffset)
: Begin(Begin), End(End), BitOffset(BitOffset) {}
PPEntityOffset(SourceRange R, uint32_t BitOffset)
: Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()),
BitOffset(BitOffset) {}

SourceLocation getBegin() const {
return SourceLocation::getFromRawEncoding(Begin);
}

RawLocEncoding getBegin() const { return Begin; }
RawLocEncoding getEnd() const { return End; }
SourceLocation getEnd() const {
return SourceLocation::getFromRawEncoding(End);
}
};

/// Source range of a skipped preprocessor region
struct PPSkippedRange {
using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;

/// Raw source location of beginning of range.
RawLocEncoding Begin;
SourceLocation::UIntTy Begin;
/// Raw source location of end of range.
RawLocEncoding End;
SourceLocation::UIntTy End;

PPSkippedRange(RawLocEncoding Begin, RawLocEncoding End)
: Begin(Begin), End(End) {}
PPSkippedRange(SourceRange R)
: Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()) {
}

RawLocEncoding getBegin() const { return Begin; }
RawLocEncoding getEnd() const { return End; }
SourceLocation getBegin() const {
return SourceLocation::getFromRawEncoding(Begin);
}
SourceLocation getEnd() const {
return SourceLocation::getFromRawEncoding(End);
}
};

/// Offset in the AST file. Use splitted 64-bit integer into low/high
Expand All @@ -225,26 +231,26 @@ struct UnderalignedInt64 {

/// Source location and bit offset of a declaration.
struct DeclOffset {
using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;

/// Raw source location.
RawLocEncoding RawLoc = 0;
SourceLocation::UIntTy Loc = 0;

/// Offset relative to the start of the DECLTYPES_BLOCK block. Keep
/// structure alignment 32-bit and avoid padding gap because undefined
/// value in the padding affects AST hash.
UnderalignedInt64 BitOffset;

DeclOffset() = default;
DeclOffset(RawLocEncoding RawLoc, uint64_t BitOffset,
uint64_t DeclTypesBlockStartOffset)
: RawLoc(RawLoc) {
DeclOffset(SourceLocation Loc, uint64_t BitOffset,
uint64_t DeclTypesBlockStartOffset) {
setLocation(Loc);
setBitOffset(BitOffset, DeclTypesBlockStartOffset);
}

void setRawLoc(RawLocEncoding Loc) { RawLoc = Loc; }
void setLocation(SourceLocation L) { Loc = L.getRawEncoding(); }

RawLocEncoding getRawLoc() const { return RawLoc; }
SourceLocation getLocation() const {
return SourceLocation::getFromRawEncoding(Loc);
}

void setBitOffset(uint64_t Offset, const uint64_t DeclTypesBlockStartOffset) {
BitOffset.setBitOffset(Offset - DeclTypesBlockStartOffset);
Expand Down
48 changes: 17 additions & 31 deletions clang/include/clang/Serialization/ASTReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -1771,7 +1771,6 @@ class ASTReader

/// Retrieve the module manager.
ModuleManager &getModuleManager() { return ModuleMgr; }
const ModuleManager &getModuleManager() const { return ModuleMgr; }

/// Retrieve the preprocessor.
Preprocessor &getPreprocessor() const { return PP; }
Expand Down Expand Up @@ -2178,8 +2177,8 @@ class ASTReader

/// Retrieve the global submodule ID given a module and its local ID
/// number.
serialization::SubmoduleID getGlobalSubmoduleID(ModuleFile &M,
unsigned LocalID) const;
serialization::SubmoduleID
getGlobalSubmoduleID(ModuleFile &M, unsigned LocalID);

/// Retrieve the submodule that corresponds to a global submodule ID.
///
Expand All @@ -2192,7 +2191,7 @@ class ASTReader

/// Retrieve the module file with a given local ID within the specified
/// ModuleFile.
ModuleFile *getLocalModuleFile(ModuleFile &M, unsigned ID) const;
ModuleFile *getLocalModuleFile(ModuleFile &M, unsigned ID);

/// Get an ID for the given module file.
unsigned getModuleFileID(ModuleFile *M);
Expand Down Expand Up @@ -2228,46 +2227,33 @@ class ASTReader
return Sema::AlignPackInfo::getFromRawEncoding(Raw);
}

using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;

/// Read a source location from raw form and return it in its
/// originating module file's source location space.
std::pair<SourceLocation, unsigned>
ReadUntranslatedSourceLocation(RawLocEncoding Raw,
LocSeq *Seq = nullptr) const {
SourceLocation ReadUntranslatedSourceLocation(SourceLocation::UIntTy Raw,
LocSeq *Seq = nullptr) const {
return SourceLocationEncoding::decode(Raw, Seq);
}

/// Read a source location from raw form.
SourceLocation ReadSourceLocation(ModuleFile &MF, RawLocEncoding Raw,
SourceLocation ReadSourceLocation(ModuleFile &ModuleFile,
SourceLocation::UIntTy Raw,
LocSeq *Seq = nullptr) const {
if (!MF.ModuleOffsetMap.empty())
ReadModuleOffsetMap(MF);

auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq);
ModuleFile *OwningModuleFile =
ModuleFileIndex == 0 ? &MF : MF.DependentModules[ModuleFileIndex - 1];

assert(!SourceMgr.isLoadedSourceLocation(Loc) &&
"Run out source location space");

return TranslateSourceLocation(*OwningModuleFile, Loc);
SourceLocation Loc = ReadUntranslatedSourceLocation(Raw, Seq);
return TranslateSourceLocation(ModuleFile, Loc);
}

/// Translate a source location from another module file's source
/// location space into ours.
SourceLocation TranslateSourceLocation(ModuleFile &ModuleFile,
SourceLocation Loc) const {
if (Loc.isInvalid())
return Loc;

// FIXME: TranslateSourceLocation is not re-enterable. It is problematic
// to call TranslateSourceLocation on a translated source location.
// We either need a method to know whether or not a source location is
// translated or refactor the code to make it clear that
// TranslateSourceLocation won't be called with translated source location.

return Loc.getLocWithOffset(ModuleFile.SLocEntryBaseOffset - 2);
if (!ModuleFile.ModuleOffsetMap.empty())
ReadModuleOffsetMap(ModuleFile);
assert(ModuleFile.SLocRemap.find(Loc.getOffset()) !=
ModuleFile.SLocRemap.end() &&
"Cannot find offset to remap.");
SourceLocation::IntTy Remap =
ModuleFile.SLocRemap.find(Loc.getOffset())->second;
return Loc.getLocWithOffset(Remap);
}

/// Read a source location.
Expand Down
4 changes: 0 additions & 4 deletions clang/include/clang/Serialization/ASTWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -667,10 +667,6 @@ class ASTWriter : public ASTDeserializationListener,
void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record,
LocSeq *Seq = nullptr);

/// Return the raw encodings for source locations.
SourceLocationEncoding::RawLocEncoding
getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq = nullptr);

/// Emit a source range.
void AddSourceRange(SourceRange Range, RecordDataImpl &Record,
LocSeq *Seq = nullptr);
Expand Down
14 changes: 5 additions & 9 deletions clang/include/clang/Serialization/ModuleFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,10 @@ class ModuleFile {
/// AST file.
const uint32_t *SLocEntryOffsets = nullptr;

/// Remapping table for source locations in this module.
ContinuousRangeMap<SourceLocation::UIntTy, SourceLocation::IntTy, 2>
SLocRemap;

// === Identifiers ===

/// The number of identifiers in this AST file.
Expand Down Expand Up @@ -508,17 +512,9 @@ class ModuleFile {
/// List of modules which depend on this module
llvm::SetVector<ModuleFile *> ImportedBy;

/// List of modules which this module directly imported
/// List of modules which this module depends on
llvm::SetVector<ModuleFile *> Imports;

/// List of modules which this modules dependent on. Different
/// from `Imports`, this includes indirectly imported modules too.
/// The order of DependentModules is significant. It should keep
/// the same order with that module file manager when we write
/// the current module file. The value of the member will be initialized
/// in `ASTReader::ReadModuleOffsetMap`.
llvm::SmallVector<ModuleFile *, 16> DependentModules;

/// Determine whether this module was directly imported at
/// any point during translation.
bool isDirectlyImported() const { return DirectlyImported; }
Expand Down
91 changes: 26 additions & 65 deletions clang/include/clang/Serialization/SourceLocationEncoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,33 +6,28 @@
//
//===----------------------------------------------------------------------===//
//
// We wish to encode the SourceLocation from other module file not dependent
// on the other module file. So that the source location changes from other
// module file may not affect the contents of the current module file. Then the
// users don't need to recompile the whole project due to a new line in a module
// unit in the root of the dependency graph.
// Source locations are stored pervasively in the AST, making up a third of
// the size of typical serialized files. Storing them efficiently is important.
//
// To achieve this, we need to encode the index of the module file into the
// encoding of the source location. The encoding of the source location may be:
// We use integers optimized by VBR-encoding, because:
// - when abbreviations cannot be used, VBR6 encoding is our only choice
// - in the worst case a SourceLocation can be ~any 32-bit number, but in
// practice they are highly predictable
//
// |-----------------------|-----------------------|
// | A | B | C |
//
// * A: 32 bit. The index of the module file in the module manager + 1. The +1
// here is necessary since we wish 0 stands for the current module file.
// * B: 31 bit. The offset of the source location to the module file containing
// it.
// * C: The macro bit. We rotate it to the lowest bit so that we can save some
// space in case the index of the module file is 0.
//
// Specially, if the index of the module file is 0, we allow to encode a
// sequence of locations we store only differences between successive elements.
// We encode the integer so that likely values encode as small numbers that
// turn into few VBR chunks:
// - the invalid sentinel location is a very common value: it encodes as 0
// - the "macro or not" bit is stored at the bottom of the integer
// (rather than at the top, as in memory), so macro locations can have
// small representations.
// - related locations (e.g. of a left and right paren pair) are usually
// similar, so when encoding a sequence of locations we store only
// differences between successive elements.
//
//===----------------------------------------------------------------------===//

#include "clang/Basic/SourceLocation.h"
#include "llvm/Support/MathExtras.h"
#include <climits>
#include "clang/Basic/SourceLocation.h"

#ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
#define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
Expand All @@ -57,13 +52,9 @@ class SourceLocationEncoding {
friend SourceLocationSequence;

public:
using RawLocEncoding = uint64_t;

static RawLocEncoding encode(SourceLocation Loc, UIntTy BaseOffset,
unsigned BaseModuleFileIndex,
SourceLocationSequence * = nullptr);
static std::pair<SourceLocation, unsigned>
decode(RawLocEncoding, SourceLocationSequence * = nullptr);
static uint64_t encode(SourceLocation Loc,
SourceLocationSequence * = nullptr);
static SourceLocation decode(uint64_t, SourceLocationSequence * = nullptr);
};

/// Serialized encoding of a sequence of SourceLocations.
Expand Down Expand Up @@ -158,44 +149,14 @@ class SourceLocationSequence::State {
operator SourceLocationSequence *() { return &Seq; }
};

inline SourceLocationEncoding::RawLocEncoding
SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset,
unsigned BaseModuleFileIndex,
SourceLocationSequence *Seq) {
// If the source location is a local source location, we can try to optimize
// the similar sequences to only record the differences.
if (!BaseOffset)
return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());

if (Loc.isInvalid())
return 0;

// Otherwise, the higher bits are used to store the module file index,
// so it is meaningless to optimize the source locations into small
// integers. Let's try to always use the raw encodings.
assert(Loc.getOffset() >= BaseOffset);
Loc = Loc.getLocWithOffset(-BaseOffset);
RawLocEncoding Encoded = encodeRaw(Loc.getRawEncoding());

// 16 bits should be sufficient to store the module file index.
assert(BaseModuleFileIndex < (1 << 16));
Encoded |= (RawLocEncoding)BaseModuleFileIndex << 32;
return Encoded;
inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc,
SourceLocationSequence *Seq) {
return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
}
inline std::pair<SourceLocation, unsigned>
SourceLocationEncoding::decode(RawLocEncoding Encoded,
SourceLocationSequence *Seq) {
unsigned ModuleFileIndex = Encoded >> 32;

if (!ModuleFileIndex)
return {Seq ? Seq->decode(Encoded)
: SourceLocation::getFromRawEncoding(decodeRaw(Encoded)),
ModuleFileIndex};

Encoded &= llvm::maskTrailingOnes<RawLocEncoding>(32);
SourceLocation Loc = SourceLocation::getFromRawEncoding(decodeRaw(Encoded));

return {Loc, ModuleFileIndex};
inline SourceLocation
SourceLocationEncoding::decode(uint64_t Encoded, SourceLocationSequence *Seq) {
return Seq ? Seq->decode(Encoded)
: SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
}

} // namespace clang
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Frontend/ASTUnit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2374,6 +2374,8 @@ bool ASTUnit::serialize(raw_ostream &OS) {
return serializeUnit(Writer, Buffer, getSema(), OS);
}

using SLocRemap = ContinuousRangeMap<unsigned, int, 2>;

void ASTUnit::TranslateStoredDiagnostics(
FileManager &FileMgr,
SourceManager &SrcMgr,
Expand Down
Loading

0 comments on commit d333a0d

Please sign in to comment.