From c5b611a419ca8acab041ca52a94c6338bdcd1756 Mon Sep 17 00:00:00 2001 From: Jannick Kremer Date: Fri, 16 Aug 2024 00:32:58 +0200 Subject: [PATCH] [libclang/python] Expose `clang_isBeforeInTranslationUnit` for `SourceRange.__contains__` Add libclang function `clang_isBeforeInTranslationUnit` to allow checking the order between two source locations. Simplify the `SourceRange.__contains__` implementation using this new function. Add tests for `SourceRange.__contains__` and the newly added functionality. Fixes #22617 Fixes #52827 --- clang/bindings/python/clang/cindex.py | 28 ++---- .../python/tests/cindex/test_location.py | 20 +++++ .../python/tests/cindex/test_source_range.py | 85 +++++++++++++++++++ clang/docs/ReleaseNotes.rst | 5 ++ clang/include/clang-c/CXSourceLocation.h | 10 +++ clang/lib/Basic/SourceManager.cpp | 3 +- clang/tools/libclang/CXSourceLocation.cpp | 12 +++ clang/tools/libclang/libclang.map | 5 ++ 8 files changed, 146 insertions(+), 22 deletions(-) create mode 100644 clang/bindings/python/tests/cindex/test_source_range.py diff --git a/clang/bindings/python/clang/cindex.py b/clang/bindings/python/clang/cindex.py index c251c46a04adf4..4da99e899e7f7c 100644 --- a/clang/bindings/python/clang/cindex.py +++ b/clang/bindings/python/clang/cindex.py @@ -319,6 +319,12 @@ def __eq__(self, other): def __ne__(self, other): return not self.__eq__(other) + def __lt__(self, other: SourceLocation) -> bool: + return conf.lib.clang_isBeforeInTranslationUnit(self, other) # type: ignore [no-any-return] + + def __le__(self, other: SourceLocation) -> bool: + return self < other or self == other + def __repr__(self): if self.file: filename = self.file.name @@ -375,26 +381,7 @@ def __contains__(self, other): """Useful to detect the Token/Lexer bug""" if not isinstance(other, SourceLocation): return False - if other.file is None and self.start.file is None: - pass - elif ( - self.start.file.name != other.file.name - or other.file.name != self.end.file.name - ): - # same file name - return False - # same file, in between lines - if self.start.line < other.line < self.end.line: - return True - elif self.start.line == other.line: - # same file first line - if self.start.column <= other.column: - return True - elif other.line == self.end.line: - # same file last line - if other.column <= self.end.column: - return True - return False + return self.start <= other <= self.end def __repr__(self): return "" % (self.start, self.end) @@ -3908,6 +3895,7 @@ def write_main_file_to_stdout(self): ("clang_isUnexposed", [CursorKind], bool), ("clang_isVirtualBase", [Cursor], bool), ("clang_isVolatileQualifiedType", [Type], bool), + ("clang_isBeforeInTranslationUnit", [SourceLocation, SourceLocation], bool), ( "clang_parseTranslationUnit", [Index, c_interop_string, c_void_p, c_int, c_void_p, c_int, c_int], diff --git a/clang/bindings/python/tests/cindex/test_location.py b/clang/bindings/python/tests/cindex/test_location.py index e23677a9be3c09..27854a312e6721 100644 --- a/clang/bindings/python/tests/cindex/test_location.py +++ b/clang/bindings/python/tests/cindex/test_location.py @@ -130,3 +130,23 @@ def test_is_system_location(self): two = get_cursor(tu, "two") self.assertFalse(one.location.is_in_system_header) self.assertTrue(two.location.is_in_system_header) + + def test_operator_lt(self): + tu = get_tu("aaaaa") + t1_f = tu.get_file(tu.spelling) + tu2 = get_tu("aaaaa") + + l_t1_12 = SourceLocation.from_position(tu, t1_f, 1, 2) + l_t1_13 = SourceLocation.from_position(tu, t1_f, 1, 3) + l_t1_14 = SourceLocation.from_position(tu, t1_f, 1, 4) + + l_t2_13 = SourceLocation.from_position(tu2, tu2.get_file(tu2.spelling), 1, 3) + + # In same file + assert l_t1_12 < l_t1_13 < l_t1_14 + assert not l_t1_13 < l_t1_12 + + # In same file, different TU + assert l_t1_12 < l_t2_13 < l_t1_14 + assert not l_t2_13 < l_t1_12 + assert not l_t1_14 < l_t2_13 diff --git a/clang/bindings/python/tests/cindex/test_source_range.py b/clang/bindings/python/tests/cindex/test_source_range.py new file mode 100644 index 00000000000000..47d8960fcafb35 --- /dev/null +++ b/clang/bindings/python/tests/cindex/test_source_range.py @@ -0,0 +1,85 @@ +import os +from clang.cindex import Config + +if "CLANG_LIBRARY_PATH" in os.environ: + Config.set_library_path(os.environ["CLANG_LIBRARY_PATH"]) + +import unittest +from clang.cindex import SourceLocation, SourceRange, TranslationUnit + +from .util import get_tu + + +def create_range(tu, line1, column1, line2, column2): + return SourceRange.from_locations( + SourceLocation.from_position(tu, tu.get_file(tu.spelling), line1, column1), + SourceLocation.from_position(tu, tu.get_file(tu.spelling), line2, column2), + ) + + +class TestSourceRange(unittest.TestCase): + def test_contains(self): + tu = get_tu( + """aaaaa +aaaaa +aaaaa +aaaaa""" + ) + file = tu.get_file(tu.spelling) + + l13 = SourceLocation.from_position(tu, file, 1, 3) + l21 = SourceLocation.from_position(tu, file, 2, 1) + l22 = SourceLocation.from_position(tu, file, 2, 2) + l23 = SourceLocation.from_position(tu, file, 2, 3) + l24 = SourceLocation.from_position(tu, file, 2, 4) + l25 = SourceLocation.from_position(tu, file, 2, 5) + l33 = SourceLocation.from_position(tu, file, 3, 3) + l31 = SourceLocation.from_position(tu, file, 3, 1) + r22_24 = create_range(tu, 2, 2, 2, 4) + r23_23 = create_range(tu, 2, 3, 2, 3) + r24_32 = create_range(tu, 2, 4, 3, 2) + r14_32 = create_range(tu, 1, 4, 3, 2) + + assert l13 not in r22_24 # Line before start + assert l21 not in r22_24 # Column before start + assert l22 in r22_24 # Colum on start + assert l23 in r22_24 # Column in range + assert l24 in r22_24 # Column on end + assert l25 not in r22_24 # Column after end + assert l33 not in r22_24 # Line after end + + assert l23 in r23_23 # In one-column range + + assert l23 not in r24_32 # Outside range in first line + assert l33 not in r24_32 # Outside range in last line + assert l25 in r24_32 # In range in first line + assert l31 in r24_32 # In range in last line + + assert l21 in r14_32 # In range at start of center line + assert l25 in r14_32 # In range at end of center line + + # In range within included file + tu2 = TranslationUnit.from_source( + "main.c", + unsaved_files=[ + ( + "main.c", + """int a[] = { +#include "numbers.inc" +}; +""", + ), + ( + "./numbers.inc", + """1, +2, +3, +4 + """, + ), + ], + ) + + r_curly = create_range(tu2, 1, 11, 3, 1) + l_f2 = SourceLocation.from_position(tu2, tu2.get_file("./numbers.inc"), 4, 1) + assert l_f2 in r_curly diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 14f1eecc5748ed..598fccfe59bff1 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -90,6 +90,9 @@ Clang Python Bindings Potentially Breaking Changes - Calling a property on the ``CompletionChunk`` or ``CompletionString`` class statically now leads to an error, instead of returning a ``CachedProperty`` object that is used internally. Properties are only available on instances. +- For a single-line ``SourceRange`` and a ``SourceLocation`` in the same line, + but after the end of the ``SourceRange``, ``SourceRange.__contains__`` + used to incorrectly return ``True``. (#GH22617), (#GH52827) What's New in Clang |release|? ============================== @@ -364,6 +367,8 @@ clang-format libclang -------- +- Add ``clang_isBeforeInTranslationUnit``. Given two source locations, it determines + whether the first one comes strictly before the second in the source code. Static Analyzer --------------- diff --git a/clang/include/clang-c/CXSourceLocation.h b/clang/include/clang-c/CXSourceLocation.h index dcb13ba273173f..421802151d02ab 100644 --- a/clang/include/clang-c/CXSourceLocation.h +++ b/clang/include/clang-c/CXSourceLocation.h @@ -74,6 +74,16 @@ CINDEX_LINKAGE CXSourceLocation clang_getNullLocation(void); CINDEX_LINKAGE unsigned clang_equalLocations(CXSourceLocation loc1, CXSourceLocation loc2); +/** + * Determine for two source locations if the first comes + * strictly before the second one in the source code. + * + * \returns non-zero if the first source location comes + * strictly before the second one, zero otherwise. + */ +CINDEX_LINKAGE unsigned clang_isBeforeInTranslationUnit(CXSourceLocation loc1, + CXSourceLocation loc2); + /** * Returns non-zero if the given source location is in a system header. */ diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index 533a9fe88a2150..b0256a8ce9ed04 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -2038,8 +2038,7 @@ bool SourceManager::isBeforeInTranslationUnit(SourceLocation LHS, std::pair InSameTU = isInTheSameTranslationUnit(LOffs, ROffs); if (InSameTU.first) return InSameTU.second; - // TODO: This should be unreachable, but some clients are calling this - // function before making sure LHS and RHS are in the same TU. + // This case is used by libclang: clang_isBeforeInTranslationUnit return LOffs.first < ROffs.first; } diff --git a/clang/tools/libclang/CXSourceLocation.cpp b/clang/tools/libclang/CXSourceLocation.cpp index 53cb71f7276f29..cd41e9f0d64031 100644 --- a/clang/tools/libclang/CXSourceLocation.cpp +++ b/clang/tools/libclang/CXSourceLocation.cpp @@ -50,6 +50,18 @@ unsigned clang_equalLocations(CXSourceLocation loc1, CXSourceLocation loc2) { loc1.int_data == loc2.int_data); } +unsigned clang_isBeforeInTranslationUnit(CXSourceLocation loc1, + CXSourceLocation loc2) { + const SourceLocation Loc1 = SourceLocation::getFromRawEncoding(loc1.int_data); + const SourceLocation Loc2 = SourceLocation::getFromRawEncoding(loc2.int_data); + + const SourceManager &SM = + *static_cast(loc1.ptr_data[0]); + // Use the appropriate SourceManager method here rather than operator< because + // ordering is meaningful only if LHS and RHS have the same FileID. + return SM.isBeforeInTranslationUnit(Loc1, Loc2); +} + CXSourceRange clang_getNullRange() { CXSourceRange Result = { { nullptr, nullptr }, 0, 0 }; return Result; diff --git a/clang/tools/libclang/libclang.map b/clang/tools/libclang/libclang.map index 371fe512ce71c1..25d8ba57d32514 100644 --- a/clang/tools/libclang/libclang.map +++ b/clang/tools/libclang/libclang.map @@ -434,6 +434,11 @@ LLVM_19 { clang_Cursor_getBinaryOpcodeStr; }; +LLVM_20 { + global: + clang_isBeforeInTranslationUnit; +}; + # Example of how to add a new symbol version entry. If you do add a new symbol # version, please update the example to depend on the version you added. # LLVM_X {