Skip to content

Commit

Permalink
Merge branch 'master' into upload-build-check-statistics
Browse files Browse the repository at this point in the history
  • Loading branch information
alexey-milovidov committed Jul 30, 2023
2 parents 41ffc09 + 3b481b9 commit a1b83c5
Show file tree
Hide file tree
Showing 17 changed files with 99 additions and 56 deletions.
10 changes: 8 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,14 @@ elseif(GLIBC_COMPATIBILITY)
message (${RECONFIGURE_MESSAGE_LEVEL} "Glibc compatibility cannot be enabled in current configuration")
endif ()

# Make sure the final executable has symbols exported
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
if (OS_LINUX)
# We should not export dynamic symbols, because:
# - The main clickhouse binary does not use dlopen,
# and whatever is poisoning it by LD_PRELOAD should not link to our symbols.
# - The clickhouse-odbc-bridge and clickhouse-library-bridge binaries
# should not expose their symbols to ODBC drivers and libraries.
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic")
endif ()

if (OS_DARWIN)
# The `-all_load` flag forces loading of all symbols from all libraries,
Expand Down
5 changes: 3 additions & 2 deletions cmake/split_debug_symbols.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ macro(clickhouse_split_debug_symbols)
# Splits debug symbols into separate file, leaves the binary untouched:
COMMAND "${OBJCOPY_PATH}" --only-keep-debug "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
# Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check:
COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note --keep-section=.clickhouse.hash "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
# Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check.
# Also, after we disabled the export of symbols for dynamic linking, we still to keep a static symbol table for good stack traces.
COMMAND "${STRIP_PATH}" --strip-debug --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
# Associate stripped binary with debug symbols:
COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
COMMENT "Stripping clickhouse binary" VERBATIM
Expand Down
2 changes: 1 addition & 1 deletion docker/packager/binary/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ then
ninja $NINJA_FLAGS clickhouse-keeper

ls -la ./programs/
ldd ./programs/clickhouse-keeper
ldd ./programs/clickhouse-keeper ||:

if [ -n "$MAKE_DEB" ]; then
# No quotes because I want it to expand to nothing if empty.
Expand Down
4 changes: 4 additions & 0 deletions docker/test/base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ RUN apt-get update \
pv \
ripgrep \
zstd \
locales \
--yes --no-install-recommends

# Sanitizer options for services (clickhouse-server)
Expand All @@ -28,6 +29,9 @@ ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_de
ENV UBSAN_OPTIONS='print_stacktrace=1'
ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'

RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
ENV LC_ALL en_US.UTF-8

ENV TZ=Europe/Moscow
RUN ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone

Expand Down
6 changes: 5 additions & 1 deletion docker/test/style/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,13 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
python3-pip \
shellcheck \
yamllint \
locales \
&& pip3 install black==23.1.0 boto3 codespell==2.2.1 mypy==1.3.0 PyGithub unidiff pylint==2.6.2 \
&& apt-get clean \
&& rm -rf /root/.cache/pip
&& rm -rf /root/.cache/pip

RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
ENV LC_ALL en_US.UTF-8

# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH
Expand Down
4 changes: 0 additions & 4 deletions programs/library-bridge/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES
library-bridge.cpp
)

if (OS_LINUX)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic")
endif ()

clickhouse_add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES})

target_link_libraries(clickhouse-library-bridge PRIVATE
Expand Down
6 changes: 0 additions & 6 deletions programs/odbc-bridge/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,6 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES
validateODBCConnectionString.cpp
)

if (OS_LINUX)
# clickhouse-odbc-bridge is always a separate binary.
# Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers.
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic")
endif ()

clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES})

target_link_libraries(clickhouse-odbc-bridge PRIVATE
Expand Down
13 changes: 11 additions & 2 deletions src/Common/Config/ConfigProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root,
}
}

void ConfigProcessor::merge(XMLDocumentPtr config, XMLDocumentPtr with)
bool ConfigProcessor::merge(XMLDocumentPtr config, XMLDocumentPtr with)
{
Node * config_root = getRootNode(config.get());
Node * with_root = getRootNode(with.get());
Expand All @@ -343,11 +343,15 @@ void ConfigProcessor::merge(XMLDocumentPtr config, XMLDocumentPtr with)
&& !((config_root_node_name == "yandex" || config_root_node_name == "clickhouse")
&& (merged_root_node_name == "yandex" || merged_root_node_name == "clickhouse")))
{
if (config_root_node_name != "clickhouse" && config_root_node_name != "yandex")
return false;

throw Poco::Exception("Root element doesn't have the corresponding root element as the config file."
" It must be <" + config_root->nodeName() + ">");
}

mergeRecursive(config, config_root, with_root);
return true;
}

void ConfigProcessor::doIncludesRecursive(
Expand Down Expand Up @@ -645,7 +649,12 @@ XMLDocumentPtr ConfigProcessor::processConfig(
with = dom_parser.parse(merge_file);
}

merge(config, with);
if (!merge(config, with))
{
LOG_DEBUG(log, "Merging bypassed - configuration file '{}' doesn't belong to configuration '{}' - merging root node name '{}' doesn't match '{}'",
merge_file, path, getRootNode(with.get())->nodeName(), getRootNode(config.get())->nodeName());
continue;
}

contributing_files.push_back(merge_file);
}
Expand Down
4 changes: 3 additions & 1 deletion src/Common/Config/ConfigProcessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,9 @@ class ConfigProcessor

void mergeRecursive(XMLDocumentPtr config, Poco::XML::Node * config_root, const Poco::XML::Node * with_root);

void merge(XMLDocumentPtr config, XMLDocumentPtr with);
/// If config root node name is not 'clickhouse' and merging config's root node names doesn't match, bypasses merging and returns false.
/// For compatibility root node 'yandex' considered equal to 'clickhouse'.
bool merge(XMLDocumentPtr config, XMLDocumentPtr with);

void doIncludesRecursive(
XMLDocumentPtr config,
Expand Down
81 changes: 52 additions & 29 deletions src/Common/SymbolIndex.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#if defined(__ELF__) && !defined(OS_FREEBSD)

#include <Common/SymbolIndex.h>
#include <base/hex.h>

#include <algorithm>
#include <optional>
Expand Down Expand Up @@ -62,9 +61,11 @@ Otherwise you will get only exported symbols from program headers.
#endif

#define __msan_unpoison_string(X) // NOLINT
#define __msan_unpoison(X, Y) // NOLINT
#if defined(ch_has_feature)
# if ch_has_feature(memory_sanitizer)
# undef __msan_unpoison_string
# undef __msan_unpoison
# include <sanitizer/msan_interface.h>
# endif
#endif
Expand Down Expand Up @@ -98,17 +99,21 @@ void collectSymbolsFromProgramHeaders(
/* Iterate over all headers of the current shared lib
* (first call is for the executable itself)
*/
__msan_unpoison(&info->dlpi_phnum, sizeof(info->dlpi_phnum));
__msan_unpoison(&info->dlpi_phdr, sizeof(info->dlpi_phdr));
for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index)
{
/* Further processing is only needed if the dynamic section is reached
*/
__msan_unpoison(&info->dlpi_phdr[header_index], sizeof(info->dlpi_phdr[header_index]));
if (info->dlpi_phdr[header_index].p_type != PT_DYNAMIC)
continue;

/* Get a pointer to the first entry of the dynamic section.
* It's address is the shared lib's address + the virtual address
*/
const ElfW(Dyn) * dyn_begin = reinterpret_cast<const ElfW(Dyn) *>(info->dlpi_addr + info->dlpi_phdr[header_index].p_vaddr);
__msan_unpoison(&dyn_begin, sizeof(dyn_begin));

/// For unknown reason, addresses are sometimes relative sometimes absolute.
auto correct_address = [](ElfW(Addr) base, ElfW(Addr) ptr)
Expand All @@ -122,44 +127,53 @@ void collectSymbolsFromProgramHeaders(
*/

size_t sym_cnt = 0;
for (const auto * it = dyn_begin; it->d_tag != DT_NULL; ++it)
{
ElfW(Addr) base_address = correct_address(info->dlpi_addr, it->d_un.d_ptr);

// TODO: this branch leads to invalid address of the hash table. Need further investigation.
// if (it->d_tag == DT_HASH)
// {
// const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(base_address);
// sym_cnt = hash[1];
// break;
// }
if (it->d_tag == DT_GNU_HASH)
const auto * it = dyn_begin;
while (true)
{
/// This code based on Musl-libc.
__msan_unpoison(it, sizeof(*it));
if (it->d_tag != DT_NULL)
break;

const uint32_t * buckets = nullptr;
const uint32_t * hashval = nullptr;
ElfW(Addr) base_address = correct_address(info->dlpi_addr, it->d_un.d_ptr);

const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(base_address);
if (it->d_tag == DT_GNU_HASH)
{
/// This code based on Musl-libc.

buckets = hash + 4 + (hash[2] * sizeof(size_t) / 4);
const uint32_t * buckets = nullptr;
const uint32_t * hashval = nullptr;

for (ElfW(Word) i = 0; i < hash[0]; ++i)
if (buckets[i] > sym_cnt)
sym_cnt = buckets[i];
const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(base_address);

if (sym_cnt)
{
sym_cnt -= hash[1];
hashval = buckets + hash[0] + sym_cnt;
do
__msan_unpoison(&hash[0], sizeof(*hash));
__msan_unpoison(&hash[1], sizeof(*hash));
__msan_unpoison(&hash[2], sizeof(*hash));

buckets = hash + 4 + (hash[2] * sizeof(size_t) / 4);

__msan_unpoison(buckets, hash[0] * sizeof(buckets[0]));

for (ElfW(Word) i = 0; i < hash[0]; ++i)
if (buckets[i] > sym_cnt)
sym_cnt = buckets[i];

if (sym_cnt)
{
++sym_cnt;
sym_cnt -= hash[1];
hashval = buckets + hash[0] + sym_cnt;
__msan_unpoison(&hashval, sizeof(hashval));
do
{
++sym_cnt;
}
while (!(*hashval++ & 1));
}
while (!(*hashval++ & 1));

break;
}

break;
++it;
}
}

Expand Down Expand Up @@ -190,13 +204,16 @@ void collectSymbolsFromProgramHeaders(
/* Get the pointer to the first entry of the symbol table */
const ElfW(Sym) * elf_sym = reinterpret_cast<const ElfW(Sym) *>(base_address);

__msan_unpoison(elf_sym, sym_cnt * sizeof(*elf_sym));

/* Iterate over the symbol table */
for (ElfW(Word) sym_index = 0; sym_index < ElfW(Word)(sym_cnt); ++sym_index)
{
/* Get the name of the sym_index-th symbol.
* This is located at the address of st_name relative to the beginning of the string table.
*/
const char * sym_name = &strtab[elf_sym[sym_index].st_name];
__msan_unpoison_string(sym_name);

if (!sym_name)
continue;
Expand All @@ -223,13 +240,18 @@ void collectSymbolsFromProgramHeaders(
#if !defined USE_MUSL
String getBuildIDFromProgramHeaders(dl_phdr_info * info)
{
__msan_unpoison(&info->dlpi_phnum, sizeof(info->dlpi_phnum));
__msan_unpoison(&info->dlpi_phdr, sizeof(info->dlpi_phdr));
for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index)
{
const ElfPhdr & phdr = info->dlpi_phdr[header_index];
__msan_unpoison(&phdr, sizeof(phdr));
if (phdr.p_type != PT_NOTE)
continue;

return Elf::getBuildID(reinterpret_cast<const char *>(info->dlpi_addr + phdr.p_vaddr), phdr.p_memsz);
std::string_view view(reinterpret_cast<const char *>(info->dlpi_addr + phdr.p_vaddr), phdr.p_memsz);
__msan_unpoison(view.data(), view.size());
return Elf::getBuildID(view.data(), view.size());
}
return {};
}
Expand Down Expand Up @@ -318,6 +340,7 @@ void collectSymbolsFromELF(
build_id = our_build_id;
#else
/// MSan does not know that the program segments in memory are initialized.
__msan_unpoison(info, sizeof(*info));
__msan_unpoison_string(info->dlpi_name);

object_name = info->dlpi_name;
Expand Down
2 changes: 1 addition & 1 deletion src/Compression/CompressionCodecEncrypted.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ void CompressionCodecEncrypted::Configuration::loadImpl(

/// If there is only one key with non zero ID, curren_key_id should be defined.
if (new_params->keys_storage[method].size() == 1 && !new_params->keys_storage[method].contains(0))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Config has one key with non zero id. сurrent_key_id is required");
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Config has one key with non zero id. current_key_id is required");
}

/// Try to find which key will be used for encryption. If there is no current_key and only one key without id
Expand Down
4 changes: 2 additions & 2 deletions src/Disks/DiskEncrypted.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -433,10 +433,10 @@ void DiskEncrypted::applyNewSettings(
{
auto new_settings = parseDiskEncryptedSettings(name, config, config_prefix, disk_map);
if (new_settings->wrapped_disk != delegate)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Сhanging wrapped disk on the fly is not supported. Disk {}", name);
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Changing wrapped disk on the fly is not supported. Disk {}", name);

if (new_settings->disk_path != disk_path)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Сhanging disk path on the fly is not supported. Disk {}", name);
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Changing disk path on the fly is not supported. Disk {}", name);

current_settings.set(std::move(new_settings));
IDisk::applyNewSettings(config, context, config_prefix, disk_map);
Expand Down
4 changes: 2 additions & 2 deletions src/IO/examples/read_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ int readAndPrint(DB::ReadBuffer & in)
int main(int, char **)
{
{
std::string s = "-123456 123.456 вася пе\\tтя\t'\\'xyz\\\\'";
std::string s = "-123456 123.456 вася pe\\ttya\t'\\'xyz\\\\'";
DB::ReadBufferFromString in(s);
if (readAndPrint(in))
std::cout << "readAndPrint from ReadBufferFromString failed" << std::endl;
Expand All @@ -49,7 +49,7 @@ int main(int, char **)

std::shared_ptr<DB::ReadBufferFromOwnString> in;
{
std::string s = "-123456 123.456 вася пе\\tтя\t'\\'xyz\\\\'";
std::string s = "-123456 123.456 вася pe\\ttya\t'\\'xyz\\\\'";
in = std::make_shared<DB::ReadBufferFromOwnString>(s);
}
if (readAndPrint(*in))
Expand Down
2 changes: 1 addition & 1 deletion src/IO/examples/write_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ int main(int, char **)
{
DB::Int64 a = -123456;
DB::Float64 b = 123.456;
DB::String c = "вася пе\tтя";
DB::String c = "вася pe\ttya";
DB::String d = "'xyz\\";

std::stringstream s; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
Expand Down
2 changes: 1 addition & 1 deletion src/IO/examples/write_buffer_perf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ int main(int, char **)
{
DB::Int64 a = -123456;
DB::Float64 b = 123.456;
DB::String c = "вася пе\tтя";
DB::String c = "вася pe\ttya";
DB::String d = "'xyz\\";

std::ofstream s("test");
Expand Down
2 changes: 1 addition & 1 deletion src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ Chunk ParquetMetadataInputFormat::generate()
else if (name == names[3])
{
auto column = types[3]->createColumn();
/// Version сan be only PARQUET_1_0 or PARQUET_2_LATEST (which is 2.6).
/// Version can be only PARQUET_1_0 or PARQUET_2_LATEST (which is 2.6).
String version = metadata->version() == parquet::ParquetVersion::PARQUET_1_0 ? "1.0" : "2.6";
assert_cast<ColumnString &>(*column).insertData(version.data(), version.size());
res.addColumn(std::move(column));
Expand Down
4 changes: 4 additions & 0 deletions utils/check-style/check-style
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# (simple regexps) to check if the code is likely to have basic style violations.
# and then to run formatter only for the specified files.

LC_ALL="en_US.UTF-8"
ROOT_PATH=$(git rev-parse --show-toplevel)
EXCLUDE_DIRS='build/|integration/|widechar_width/|glibc-compatibility/|poco/|memcpy/|consistent-hashing|benchmark|tests/.*.cpp|utils/keeper-bench/example.yaml'

Expand Down Expand Up @@ -413,3 +414,6 @@ find $ROOT_PATH/tests/queries/1_stateful -name '*.sql' -or -name '*.sh' | grep -

# Check for bad punctuation: whitespace before comma.
find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '\w ,' | grep -v 'bad punctuation is ok here' && echo "^ There is bad punctuation: whitespace before comma. You should write it like this: 'Hello, world!'"

# Cyrillic characters hiding inside Latin.
find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '[a-zA-Z][а-яА-ЯёЁ]|[а-яА-ЯёЁ][a-zA-Z]' && echo "^ Cyrillic characters found in unexpected place."

0 comments on commit a1b83c5

Please sign in to comment.