This repository has been archived by the owner on Nov 18, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy path0016-MSVC-Compatibility.patch
98 lines (94 loc) · 3.63 KB
/
0016-MSVC-Compatibility.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h
index cc29c24f5a35..c8e44dd40593 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -259,6 +259,12 @@ public:
// If BufStr has an invalid BOM, returns the BOM name; otherwise, returns
// nullptr
static const char *getInvalidBOM(StringRef BufStr);
+
+ // Check buffer is UTF16
+ static bool checkBufUTF16(StringRef BufStr);
+
+ // Check buffer is UTF32
+ static bool checkBufUTF32(StringRef BufStr);
};
// Assert that the \c ContentCache objects will always be 8-byte aligned so
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index ec3e35595bb7..6419442b0c86 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -33,6 +33,7 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ConvertUTF.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -98,6 +99,33 @@ const char *ContentCache::getInvalidBOM(StringRef BufStr) {
return InvalidBOM;
}
+// Check buffer is UTF16
+bool ContentCache::checkBufUTF16(StringRef BufStr) {
+ const char *Result = llvm::StringSwitch<const char *>(BufStr)
+ .StartsWith("\xFE\xFF", "UTF-16 (BE)")
+ .StartsWith("\xFF\xFE", "UTF-16 (LE)")
+ .Default(nullptr);
+ if (Result) {
+ return true;
+ }
+ return false;
+}
+
+// Check buffer is UTF32
+bool ContentCache::checkBufUTF32(StringRef BufStr) {
+ const char *Result =
+ llvm::StringSwitch<const char *>(BufStr)
+ .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"),
+ "UTF-32 (BE)")
+ .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"),
+ "UTF-32 (LE)")
+ .Default(nullptr);
+ if (Result) {
+ return true;
+ }
+ return false;
+}
+
llvm::Optional<llvm::MemoryBufferRef>
ContentCache::getBufferOrNone(DiagnosticsEngine &Diag, FileManager &FM,
SourceLocation Loc) const {
@@ -175,6 +203,21 @@ ContentCache::getBufferOrNone(DiagnosticsEngine &Diag, FileManager &FM,
// http://en.wikipedia.org/wiki/Byte_order_mark for more information.
StringRef BufStr = Buffer->getBuffer();
const char *InvalidBOM = getInvalidBOM(BufStr);
+
+ // [MSVC Compatibility] Trying to convert UTF16 to UTF8.
+ if (InvalidBOM && checkBufUTF16(BufStr)) {
+ std::string StrUtf8;
+ if (llvm::convertUTF16ToUTF8String(
+ llvm::makeArrayRef(BufStr.data(), BufStr.size()), StrUtf8)) {
+ char *NewBuf = new char[StrUtf8.size() + 4]{0};
+ memcpy(NewBuf, StrUtf8.data(), StrUtf8.size());
+ Buffer->BufferStart = NewBuf;
+ Buffer->BufferEnd = NewBuf + StrUtf8.size();
+ BufStr = Buffer->getBuffer();
+ // Verify it again.
+ InvalidBOM = getInvalidBOM(BufStr);
+ }
+ }
if (InvalidBOM) {
Diag.Report(Loc, diag::err_unsupported_bom)
diff --git a/llvm/include/llvm/Support/MemoryBuffer.h b/llvm/include/llvm/Support/MemoryBuffer.h
index 6385805eba1d..7add4aa3d9a7 100644
--- a/llvm/include/llvm/Support/MemoryBuffer.h
+++ b/llvm/include/llvm/Support/MemoryBuffer.h
@@ -48,6 +48,8 @@ using file_t = int;
/// reading when they encounter a '\0' than to continually check the file
/// position to see if it has reached the end of the file.
class MemoryBuffer {
+// hack it
+public:
const char *BufferStart; // Start of the buffer.
const char *BufferEnd; // End of the buffer.