Skip to content

Commit

Permalink
Merge branch 'master' into flush-async-insert-queue
Browse files Browse the repository at this point in the history
  • Loading branch information
CurtizJ authored Jul 25, 2023
2 parents 79cc818 + e133db7 commit 4531b10
Show file tree
Hide file tree
Showing 143 changed files with 3,399 additions and 1,221 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -340,3 +340,6 @@
[submodule "contrib/c-ares"]
path = contrib/c-ares
url = https://github.com/c-ares/c-ares.git
[submodule "contrib/incbin"]
path = contrib/incbin
url = https://github.com/graphitemaster/incbin.git
8 changes: 7 additions & 1 deletion base/poco/Foundation/include/Poco/URI.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class Foundation_API URI
URI();
/// Creates an empty URI.

explicit URI(const std::string & uri);
explicit URI(const std::string & uri, bool disable_url_encoding = false);
/// Parses an URI from the given string. Throws a
/// SyntaxException if the uri is not valid.

Expand Down Expand Up @@ -350,13 +350,19 @@ class Foundation_API URI
static const std::string ILLEGAL;

private:
void encodePath(std::string & encodedStr) const;
void decodePath(const std::string & encodedStr);


std::string _scheme;
std::string _userInfo;
std::string _host;
unsigned short _port;
std::string _path;
std::string _query;
std::string _fragment;

bool _disable_url_encoding = false;
};


Expand Down
39 changes: 29 additions & 10 deletions base/poco/Foundation/src/URI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ URI::URI():
}


URI::URI(const std::string& uri):
_port(0)
URI::URI(const std::string& uri, bool decode_and_encode_path):
_port(0), _disable_url_encoding(decode_and_encode_path)
{
parse(uri);
}
Expand Down Expand Up @@ -107,7 +107,8 @@ URI::URI(const URI& uri):
_port(uri._port),
_path(uri._path),
_query(uri._query),
_fragment(uri._fragment)
_fragment(uri._fragment),
_disable_url_encoding(uri._disable_url_encoding)
{
}

Expand All @@ -119,7 +120,8 @@ URI::URI(const URI& baseURI, const std::string& relativeURI):
_port(baseURI._port),
_path(baseURI._path),
_query(baseURI._query),
_fragment(baseURI._fragment)
_fragment(baseURI._fragment),
_disable_url_encoding(baseURI._disable_url_encoding)
{
resolve(relativeURI);
}
Expand Down Expand Up @@ -151,6 +153,7 @@ URI& URI::operator = (const URI& uri)
_path = uri._path;
_query = uri._query;
_fragment = uri._fragment;
_disable_url_encoding = uri._disable_url_encoding;
}
return *this;
}
Expand Down Expand Up @@ -181,6 +184,7 @@ void URI::swap(URI& uri)
std::swap(_path, uri._path);
std::swap(_query, uri._query);
std::swap(_fragment, uri._fragment);
std::swap(_disable_url_encoding, uri._disable_url_encoding);
}


Expand All @@ -201,7 +205,7 @@ std::string URI::toString() const
std::string uri;
if (isRelative())
{
encode(_path, RESERVED_PATH, uri);
encodePath(uri);
}
else
{
Expand All @@ -217,7 +221,7 @@ std::string URI::toString() const
{
if (!auth.empty() && _path[0] != '/')
uri += '/';
encode(_path, RESERVED_PATH, uri);
encodePath(uri);
}
else if (!_query.empty() || !_fragment.empty())
{
Expand Down Expand Up @@ -313,7 +317,7 @@ void URI::setAuthority(const std::string& authority)
void URI::setPath(const std::string& path)
{
_path.clear();
decode(path, _path);
decodePath(path);
}


Expand Down Expand Up @@ -418,7 +422,7 @@ void URI::setPathEtc(const std::string& pathEtc)
std::string URI::getPathEtc() const
{
std::string pathEtc;
encode(_path, RESERVED_PATH, pathEtc);
encodePath(pathEtc);
if (!_query.empty())
{
pathEtc += '?';
Expand All @@ -436,7 +440,7 @@ std::string URI::getPathEtc() const
std::string URI::getPathAndQuery() const
{
std::string pathAndQuery;
encode(_path, RESERVED_PATH, pathAndQuery);
encodePath(pathAndQuery);
if (!_query.empty())
{
pathAndQuery += '?';
Expand Down Expand Up @@ -681,6 +685,21 @@ void URI::decode(const std::string& str, std::string& decodedStr, bool plusAsSpa
}
}

void URI::encodePath(std::string & encodedStr) const
{
if (_disable_url_encoding)
encodedStr = _path;
else
encode(_path, RESERVED_PATH, encodedStr);
}

void URI::decodePath(const std::string & encodedStr)
{
if (_disable_url_encoding)
_path = encodedStr;
else
decode(encodedStr, _path);
}

bool URI::isWellKnownPort() const
{
Expand Down Expand Up @@ -820,7 +839,7 @@ void URI::parsePath(std::string::const_iterator& it, const std::string::const_it
{
std::string path;
while (it != end && *it != '?' && *it != '#') path += *it++;
decode(path, _path);
decodePath(path);
}


Expand Down
58 changes: 0 additions & 58 deletions cmake/embed_binary.cmake

This file was deleted.

2 changes: 1 addition & 1 deletion contrib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -164,13 +164,13 @@ add_contrib (libpq-cmake libpq)
add_contrib (nuraft-cmake NuRaft)
add_contrib (fast_float-cmake fast_float)
add_contrib (datasketches-cpp-cmake datasketches-cpp)
add_contrib (incbin-cmake incbin)

option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
if (ENABLE_NLP)
add_contrib (libstemmer-c-cmake libstemmer_c)
add_contrib (wordnet-blast-cmake wordnet-blast)
add_contrib (lemmagen-c-cmake lemmagen-c)
add_contrib (nlp-data-cmake nlp-data)
add_contrib (cld2-cmake cld2)
endif()

Expand Down
53 changes: 33 additions & 20 deletions contrib/cctz-cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cctz")

set (SRCS
Expand All @@ -23,41 +22,55 @@ if (OS_FREEBSD)
endif ()

# Related to time_zones table:
# StorageSystemTimeZones.generated.cpp is autogenerated each time during a build
# data in this file will be used to populate the system.time_zones table, this is specific to OS_LINUX
# as the library that's built using embedded tzdata is also specific to OS_LINUX
set(SYSTEM_STORAGE_TZ_FILE "${PROJECT_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp")
# TimeZones.generated.cpp is autogenerated each time during a build
set(TIMEZONES_FILE "${CMAKE_CURRENT_BINARY_DIR}/TimeZones.generated.cpp")
# remove existing copies so that its generated fresh on each build.
file(REMOVE ${SYSTEM_STORAGE_TZ_FILE})
file(REMOVE ${TIMEZONES_FILE})

# get the list of timezones from tzdata shipped with cctz
set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo")
file(STRINGS "${LIBRARY_DIR}/testdata/version" TZDATA_VERSION)
set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}")
message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}")

set(TIMEZONE_RESOURCE_FILES)

# each file in that dir (except of tab and localtime) store the info about timezone
execute_process(COMMAND
bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | LC_ALL=C sort | paste -sd ';' -"
OUTPUT_STRIP_TRAILING_WHITESPACE
OUTPUT_VARIABLE TIMEZONES)

file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {\n" )
file(APPEND ${TIMEZONES_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
file(APPEND ${TIMEZONES_FILE} "#include <incbin.h>\n")

set (COUNTER 1)
foreach(TIMEZONE ${TIMEZONES})
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " \"${TIMEZONE}\",\n")
list(APPEND TIMEZONE_RESOURCE_FILES "${TIMEZONE}")
file(APPEND ${TIMEZONES_FILE} "INCBIN(resource_timezone${COUNTER}, \"${TZDIR}/${TIMEZONE}\");\n")
MATH(EXPR COUNTER "${COUNTER}+1")
endforeach(TIMEZONE)
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " nullptr};\n")
clickhouse_embed_binaries(
TARGET tzdata
RESOURCE_DIR "${TZDIR}"
RESOURCES ${TIMEZONE_RESOURCE_FILES}
)
add_dependencies(_cctz tzdata)
target_link_libraries(_cctz INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:tzdata> -Wl,${NO_WHOLE_ARCHIVE}")

file(APPEND ${TIMEZONES_FILE} "const char * auto_time_zones[] {\n" )

foreach(TIMEZONE ${TIMEZONES})
file(APPEND ${TIMEZONES_FILE} " \"${TIMEZONE}\",\n")
MATH(EXPR COUNTER "${COUNTER}+1")
endforeach(TIMEZONE)

file(APPEND ${TIMEZONES_FILE} " nullptr\n};\n\n")

file(APPEND ${TIMEZONES_FILE} "#include <string_view>\n\n")
file(APPEND ${TIMEZONES_FILE} "std::string_view getTimeZone(const char * name)\n{\n" )

set (COUNTER 1)
foreach(TIMEZONE ${TIMEZONES})
file(APPEND ${TIMEZONES_FILE} " if (std::string_view(\"${TIMEZONE}\") == name) return { reinterpret_cast<const char *>(gresource_timezone${COUNTER}Data), gresource_timezone${COUNTER}Size };\n")
MATH(EXPR COUNTER "${COUNTER}+1")
endforeach(TIMEZONE)

file(APPEND ${TIMEZONES_FILE} " return {};\n")
file(APPEND ${TIMEZONES_FILE} "}\n")

add_library (tzdata ${TIMEZONES_FILE})
target_link_libraries(tzdata ch_contrib::incbin)
target_link_libraries(_cctz tzdata)

add_library(ch_contrib::cctz ALIAS _cctz)
1 change: 1 addition & 0 deletions contrib/incbin
Submodule incbin added at 6e576c
8 changes: 8 additions & 0 deletions contrib/incbin-cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/incbin")
add_library(_incbin INTERFACE)
target_include_directories(_incbin SYSTEM INTERFACE ${LIBRARY_DIR})
add_library(ch_contrib::incbin ALIAS _incbin)

# Warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled.
# Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning."
target_compile_definitions(_incbin INTERFACE INCBIN_SILENCE_BITCODE_WARNING)
15 changes: 0 additions & 15 deletions contrib/nlp-data-cmake/CMakeLists.txt

This file was deleted.

1 change: 1 addition & 0 deletions docker/test/fasttest/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ function clone_submodules
contrib/simdjson
contrib/liburing
contrib/libfiu
contrib/incbin
)

git submodule sync
Expand Down
3 changes: 3 additions & 0 deletions docker/test/stateless/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
set -e -x -a

# Choose random timezone for this test run.
#
# NOTE: that clickhouse-test will randomize session_timezone by itself as well
# (it will choose between default server timezone and something specific).
TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)"
echo "Choosen random timezone $TZ"
ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone
Expand Down
1 change: 1 addition & 0 deletions docs/en/engines/table-engines/special/url.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,4 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da
## Storage Settings {#storage-settings}

- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
- [disable_url_encoding](/docs/en/operations/settings/settings.md#disable_url_encoding) -allows to disable decoding/encoding path in uri. Disabled by default.
6 changes: 6 additions & 0 deletions docs/en/operations/settings/settings.md
Original file line number Diff line number Diff line change
Expand Up @@ -3468,6 +3468,12 @@ Possible values:

Default value: `0`.

## disable_url_encoding {#disable_url_encoding}

Allows to disable decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables.

Disabled by default.

## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously}

Adds a modifier `SYNC` to all `DROP` and `DETACH` queries.
Expand Down
2 changes: 2 additions & 0 deletions docs/en/sql-reference/aggregate-functions/reference/any.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@ To get a determinate result, you can use the ‘min’ or ‘max’ function ins
In some cases, you can rely on the order of execution. This applies to cases when SELECT comes from a subquery that uses ORDER BY.

When a `SELECT` query has the `GROUP BY` clause or at least one aggregate function, ClickHouse (in contrast to MySQL) requires that all expressions in the `SELECT`, `HAVING`, and `ORDER BY` clauses be calculated from keys or from aggregate functions. In other words, each column selected from the table must be used either in keys or inside aggregate functions. To get behavior like in MySQL, you can put the other columns in the `any` aggregate function.

- Alias: `any_value`
Loading

0 comments on commit 4531b10

Please sign in to comment.