diff --git a/base/poco/Foundation/include/Poco/URI.h b/base/poco/Foundation/include/Poco/URI.h
index 1880af4ccd2d..eba8109253db 100644
--- a/base/poco/Foundation/include/Poco/URI.h
+++ b/base/poco/Foundation/include/Poco/URI.h
@@ -57,7 +57,7 @@ class Foundation_API URI
URI();
/// Creates an empty URI.
- explicit URI(const std::string & uri);
+ explicit URI(const std::string & uri, bool disable_url_encoding = false);
/// Parses an URI from the given string. Throws a
/// SyntaxException if the uri is not valid.
@@ -350,6 +350,10 @@ class Foundation_API URI
static const std::string ILLEGAL;
private:
+ void encodePath(std::string & encodedStr) const;
+ void decodePath(const std::string & encodedStr);
+
+
std::string _scheme;
std::string _userInfo;
std::string _host;
@@ -357,6 +361,8 @@ class Foundation_API URI
std::string _path;
std::string _query;
std::string _fragment;
+
+ bool _disable_url_encoding = false;
};
diff --git a/base/poco/Foundation/src/URI.cpp b/base/poco/Foundation/src/URI.cpp
index 5543e02b2791..3354c69d1883 100644
--- a/base/poco/Foundation/src/URI.cpp
+++ b/base/poco/Foundation/src/URI.cpp
@@ -36,8 +36,8 @@ URI::URI():
}
-URI::URI(const std::string& uri):
- _port(0)
+URI::URI(const std::string& uri, bool decode_and_encode_path):
+ _port(0), _disable_url_encoding(decode_and_encode_path)
{
parse(uri);
}
@@ -107,7 +107,8 @@ URI::URI(const URI& uri):
_port(uri._port),
_path(uri._path),
_query(uri._query),
- _fragment(uri._fragment)
+ _fragment(uri._fragment),
+ _disable_url_encoding(uri._disable_url_encoding)
{
}
@@ -119,7 +120,8 @@ URI::URI(const URI& baseURI, const std::string& relativeURI):
_port(baseURI._port),
_path(baseURI._path),
_query(baseURI._query),
- _fragment(baseURI._fragment)
+ _fragment(baseURI._fragment),
+ _disable_url_encoding(baseURI._disable_url_encoding)
{
resolve(relativeURI);
}
@@ -151,6 +153,7 @@ URI& URI::operator = (const URI& uri)
_path = uri._path;
_query = uri._query;
_fragment = uri._fragment;
+ _disable_url_encoding = uri._disable_url_encoding;
}
return *this;
}
@@ -181,6 +184,7 @@ void URI::swap(URI& uri)
std::swap(_path, uri._path);
std::swap(_query, uri._query);
std::swap(_fragment, uri._fragment);
+ std::swap(_disable_url_encoding, uri._disable_url_encoding);
}
@@ -201,7 +205,7 @@ std::string URI::toString() const
std::string uri;
if (isRelative())
{
- encode(_path, RESERVED_PATH, uri);
+ encodePath(uri);
}
else
{
@@ -217,7 +221,7 @@ std::string URI::toString() const
{
if (!auth.empty() && _path[0] != '/')
uri += '/';
- encode(_path, RESERVED_PATH, uri);
+ encodePath(uri);
}
else if (!_query.empty() || !_fragment.empty())
{
@@ -313,7 +317,7 @@ void URI::setAuthority(const std::string& authority)
void URI::setPath(const std::string& path)
{
_path.clear();
- decode(path, _path);
+ decodePath(path);
}
@@ -418,7 +422,7 @@ void URI::setPathEtc(const std::string& pathEtc)
std::string URI::getPathEtc() const
{
std::string pathEtc;
- encode(_path, RESERVED_PATH, pathEtc);
+ encodePath(pathEtc);
if (!_query.empty())
{
pathEtc += '?';
@@ -436,7 +440,7 @@ std::string URI::getPathEtc() const
std::string URI::getPathAndQuery() const
{
std::string pathAndQuery;
- encode(_path, RESERVED_PATH, pathAndQuery);
+ encodePath(pathAndQuery);
if (!_query.empty())
{
pathAndQuery += '?';
@@ -681,6 +685,21 @@ void URI::decode(const std::string& str, std::string& decodedStr, bool plusAsSpa
}
}
+void URI::encodePath(std::string & encodedStr) const
+{
+ if (_disable_url_encoding)
+ encodedStr = _path;
+ else
+ encode(_path, RESERVED_PATH, encodedStr);
+}
+
+void URI::decodePath(const std::string & encodedStr)
+{
+ if (_disable_url_encoding)
+ _path = encodedStr;
+ else
+ decode(encodedStr, _path);
+}
bool URI::isWellKnownPort() const
{
@@ -820,7 +839,7 @@ void URI::parsePath(std::string::const_iterator& it, const std::string::const_it
{
std::string path;
while (it != end && *it != '?' && *it != '#') path += *it++;
- decode(path, _path);
+ decodePath(path);
}
diff --git a/contrib/idxd-config b/contrib/idxd-config
index f6605c41a735..a836ce0e4205 160000
--- a/contrib/idxd-config
+++ b/contrib/idxd-config
@@ -1 +1 @@
-Subproject commit f6605c41a735e3fdfef2d2d18655a33af6490b99
+Subproject commit a836ce0e42052a69bffbbc14239ab4097f3b77f1
diff --git a/contrib/qpl b/contrib/qpl
index 3f8f5cea2773..faaf19350459 160000
--- a/contrib/qpl
+++ b/contrib/qpl
@@ -1 +1 @@
-Subproject commit 3f8f5cea27739f5261e8fd577dc233ffe88bf679
+Subproject commit faaf19350459c076e66bb5df11743c3fade59b73
diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh
index 4926967d2d2a..9217fcfddd97 100644
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@@ -14,6 +14,7 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
# Stress tests and upgrade check uses similar code that was placed
# in a separate bash library. See tests/ci/stress_tests.lib
+source /usr/share/clickhouse-test/ci/attach_gdb.lib
source /usr/share/clickhouse-test/ci/stress_tests.lib
install_packages package_folder
@@ -52,7 +53,7 @@ azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
start
-shellcheck disable=SC2086 # No quotes because I want to split it into words.
+# shellcheck disable=SC2086 # No quotes because I want to split it into words.
/s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS
chmod 777 -R /var/lib/clickhouse
clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary"
diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh
index b80613093428..13c352d5d41e 100644
--- a/docker/test/upgrade/run.sh
+++ b/docker/test/upgrade/run.sh
@@ -16,6 +16,7 @@ ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_pre
# Stress tests and upgrade check uses similar code that was placed
# in a separate bash library. See tests/ci/stress_tests.lib
+source /usr/share/clickhouse-test/ci/attach_gdb.lib
source /usr/share/clickhouse-test/ci/stress_tests.lib
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
@@ -61,6 +62,7 @@ configure
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
+rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
start
@@ -90,6 +92,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
+rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
start
diff --git a/docs/en/development/building_and_benchmarking_deflate_qpl.md b/docs/en/development/building_and_benchmarking_deflate_qpl.md
index 0501c1cbdcb1..4e01b41ab3c1 100644
--- a/docs/en/development/building_and_benchmarking_deflate_qpl.md
+++ b/docs/en/development/building_and_benchmarking_deflate_qpl.md
@@ -7,12 +7,8 @@ description: How to build Clickhouse and run benchmark with DEFLATE_QPL Codec
# Build Clickhouse with DEFLATE_QPL
-- Make sure your target machine meet the QPL required [prerequisites](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#prerequisites)
-- Pass the following flag to CMake when building ClickHouse:
-
-``` bash
-cmake -DENABLE_QPL=1 ..
-```
+- Make sure your host machine meet the QPL required [prerequisites](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#prerequisites)
+- deflate_qpl is enabled by default during cmake build. In case you accidentally change it, please double-check build flag: ENABLE_QPL=1
- For generic requirements, please refer to Clickhouse generic [build instructions](/docs/en/development/build.md)
diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md
index 0e2b48ef6a69..051945538b2d 100644
--- a/docs/en/engines/table-engines/integrations/s3.md
+++ b/docs/en/engines/table-engines/integrations/s3.md
@@ -57,7 +57,8 @@ Notice that the S3 endpoint in the `ENGINE` configuration uses the parameter tok
:::note
As shown in the example, querying from S3 tables that are partitioned is
-not directly supported at this time, but can be accomplished by querying the bucket contents with a wildcard.
+not directly supported at this time, but can be accomplished by querying the individual partitions
+using the S3 table function.
The primary use-case for writing
partitioned data in S3 is to enable transferring that data into another
@@ -127,23 +128,7 @@ FROM s3('http://minio:10000/clickhouse//test_45.csv', 'minioadmin', 'minioadminp
└────┴────┴────┘
```
-#### Select from all partitions
-
-```sql
-SELECT *
-FROM s3('http://minio:10000/clickhouse//**', 'minioadmin', 'minioadminpassword', 'CSV')
-```
-```response
-┌─c1─┬─c2─┬─c3─┐
-│ 3 │ 2 │ 1 │
-└────┴────┴────┘
-┌─c1─┬─c2─┬─c3─┐
-│ 1 │ 2 │ 3 │
-└────┴────┴────┘
-┌─c1─┬─c2─┬─c3─┐
-│ 78 │ 43 │ 45 │
-└────┴────┴────┘
-```
+#### Limitation
You may naturally try to `Select * from p`, but as noted above, this query will fail; use the preceding query.
diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md
index 26d4975954fa..f556df0a0887 100644
--- a/docs/en/engines/table-engines/special/url.md
+++ b/docs/en/engines/table-engines/special/url.md
@@ -106,3 +106,4 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da
## Storage Settings {#storage-settings}
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
+- [disable_url_encoding](/docs/en/operations/settings/settings.md#disable_url_encoding) -allows to disable decoding/encoding path in uri. Disabled by default.
diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md
index 3a7f6d4d8547..37821f0fee1d 100644
--- a/docs/en/interfaces/http.md
+++ b/docs/en/interfaces/http.md
@@ -56,7 +56,7 @@ Connection: Close
Content-Type: text/tab-separated-values; charset=UTF-8
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
-X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
1
```
@@ -286,9 +286,9 @@ Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you
You can receive information about the progress of a query in `X-ClickHouse-Progress` response headers. To do this, enable [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Example of the header sequence:
``` text
-X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"}
-X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"}
-X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"}
+X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"}
+X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"}
+X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"}
```
Possible header fields:
@@ -416,7 +416,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
< X-ClickHouse-Format: Template
< X-ClickHouse-Timezone: Asia/Shanghai
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
# HELP "Query" "Number of executing queries"
# TYPE "Query" counter
@@ -581,7 +581,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
Say Hi!%
@@ -621,7 +621,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
< Content-Type: text/plain; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
%
@@ -673,7 +673,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
Absolute Path File
* Connection #0 to host localhost left intact
@@ -692,7 +692,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
Relative Path File
* Connection #0 to host localhost left intact
diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md
index d3e21cb23645..d1d9fa542ab1 100644
--- a/docs/en/operations/configuration-files.md
+++ b/docs/en/operations/configuration-files.md
@@ -65,6 +65,40 @@ XML substitution example:
Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element.
+## Encrypting Configuration {#encryption}
+
+You can use symmetric encryption to encrypt a configuration element, for example, a password field. To do so, first configure the [encryption codec](../sql-reference/statements/create/table.md#encryption-codecs), then add attribute `encryption_codec` with the name of the encryption codec as value to the element to encrypt.
+
+Unlike attributes `from_zk`, `from_env` and `incl` (or element `include`), no substitution, i.e. decryption of the encrypted value, is performed in the preprocessed file. Decryption happens only at runtime in the server process.
+
+Example:
+
+```xml
+
+
+
+ 00112233445566778899aabbccddeeff
+
+
+
+ admin
+ 961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85
+
+
+```
+
+To get the encrypted value `encrypt_decrypt` example application may be used.
+
+Example:
+
+``` bash
+./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV abcd
+```
+
+``` text
+961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85
+```
+
## User Settings {#user-settings}
The `config.xml` file can specify a separate config with user settings, profiles, and quotas. The relative path to this config is set in the `users_config` element. By default, it is `users.xml`. If `users_config` is omitted, the user settings, profiles, and quotas are specified directly in `config.xml`.
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 580b51a984d2..8dfb6c0d2250 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -3468,6 +3468,12 @@ Possible values:
Default value: `0`.
+## disable_url_encoding {#disable_url_encoding}
+
+Allows to disable decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables.
+
+Disabled by default.
+
## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously}
Adds a modifier `SYNC` to all `DROP` and `DETACH` queries.
diff --git a/docs/en/sql-reference/aggregate-functions/reference/any.md b/docs/en/sql-reference/aggregate-functions/reference/any.md
index db19f524b312..f79fe66c05d7 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/any.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/any.md
@@ -12,3 +12,5 @@ To get a determinate result, you can use the ‘min’ or ‘max’ function ins
In some cases, you can rely on the order of execution. This applies to cases when SELECT comes from a subquery that uses ORDER BY.
When a `SELECT` query has the `GROUP BY` clause or at least one aggregate function, ClickHouse (in contrast to MySQL) requires that all expressions in the `SELECT`, `HAVING`, and `ORDER BY` clauses be calculated from keys or from aggregate functions. In other words, each column selected from the table must be used either in keys or inside aggregate functions. To get behavior like in MySQL, you can put the other columns in the `any` aggregate function.
+
+- Alias: `any_value`
diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index 9890d257e84b..527ce2434c05 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -575,6 +575,42 @@ Alias:
Like `substring` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
+
+## substringIndex(s, delim, count)
+
+Returns the substring of `s` before `count` occurrences of the delimiter `delim`, as in Spark or MySQL.
+
+**Syntax**
+
+```sql
+substringIndex(s, delim, count)
+```
+Alias: `SUBSTRING_INDEX`
+
+
+**Arguments**
+
+- s: The string to extract substring from. [String](../../sql-reference/data-types/string.md).
+- delim: The character to split. [String](../../sql-reference/data-types/string.md).
+- count: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md)
+
+**Example**
+
+``` sql
+SELECT substringIndex('www.clickhouse.com', '.', 2)
+```
+
+Result:
+```
+┌─substringIndex('www.clickhouse.com', '.', 2)─┐
+│ www.clickhouse │
+└──────────────────────────────────────────────┘
+```
+
+## substringIndexUTF8(s, delim, count)
+
+Like `substringIndex` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
+
## appendTrailingCharIfAbsent
Appends character `c` to string `s` if `s` is non-empty and does not end with character `c`.
diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md
index dae2c7dd1d3b..6ceb9b5849e7 100644
--- a/docs/en/sql-reference/statements/alter/column.md
+++ b/docs/en/sql-reference/statements/alter/column.md
@@ -213,7 +213,7 @@ Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC
Syntax:
```sql
-ALTER TABLE table_name MODIFY column_name REMOVE property;
+ALTER TABLE table_name MODIFY COLUMN column_name REMOVE property;
```
**Example**
diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md
index 2ab43f1b895e..677ed0119608 100644
--- a/docs/en/sql-reference/table-functions/url.md
+++ b/docs/en/sql-reference/table-functions/url.md
@@ -56,6 +56,7 @@ Character `|` inside patterns is used to specify failover addresses. They are it
## Storage Settings {#storage-settings}
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
+- [disable_url_encoding](/docs/en/operations/settings/settings.md#disable_url_encoding) - allows to disable decoding/encoding path in uri. Disabled by default.
**See Also**
diff --git a/docs/en/sql-reference/transactions.md b/docs/en/sql-reference/transactions.md
index 68fbfe0b22a8..cb89a091d684 100644
--- a/docs/en/sql-reference/transactions.md
+++ b/docs/en/sql-reference/transactions.md
@@ -3,23 +3,46 @@ slug: /en/guides/developer/transactional
---
# Transactional (ACID) support
-INSERT into one partition* in one table* of MergeTree* family up to max_insert_block_size rows* is transactional (ACID):
-- Atomic: INSERT is succeeded or rejected as a whole: if confirmation is sent to the client, all rows INSERTed; if error is sent to the client, no rows INSERTed.
+## Case 1: INSERT into one partition, of one table, of the MergeTree* family
+
+This is transactional (ACID) if the inserted rows are packed and inserted as a single block (see Notes):
+- Atomic: an INSERT succeeds or is rejected as a whole: if a confirmation is sent to the client, then all rows were inserted; if an error is sent to the client, then no rows were inserted.
- Consistent: if there are no table constraints violated, then all rows in an INSERT are inserted and the INSERT succeeds; if constraints are violated, then no rows are inserted.
-- Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as if before INSERT or after successful INSERT; no partial state is seen;
-- Durable: successful INSERT is written to the filesystem before answering to the client, on single replica or multiple replicas (controlled by the `insert_quorum` setting), and ClickHouse can ask the OS to sync the filesystem data on the storage media (controlled by the `fsync_after_insert` setting).
-* If table has many partitions and INSERT covers many partitions–then insertion into every partition is transactional on its own;
-* INSERT into multiple tables with one statement is possible if materialized views are involved;
-* INSERT into Distributed table is not transactional as a whole, while insertion into every shard is transactional;
-* another example: insert into Buffer tables is neither atomic nor isolated or consistent or durable;
-* atomicity is ensured even if `async_insert` is enabled, but it can be turned off by the wait_for_async_insert setting;
-* max_insert_block_size is 1 000 000 by default and can be adjusted as needed;
-* if client did not receive the answer from the server, the client does not know if transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties;
-* ClickHouse is using MVCC with snapshot isolation internally;
-* all ACID properties are valid even in case of server kill / crash;
-* either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in typical setup;
-* "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency)
-* this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc.
+- Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as it was before the INSERT attempt, or after the successful INSERT; no partial state is seen
+- Durable: a successful INSERT is written to the filesystem before answering to the client, on a single replica or multiple replicas (controlled by the `insert_quorum` setting), and ClickHouse can ask the OS to sync the filesystem data on the storage media (controlled by the `fsync_after_insert` setting).
+- INSERT into multiple tables with one statement is possible if materialized views are involved (the INSERT from the client is to a table which has associate materialized views).
+
+## Case 2: INSERT into multiple partitions, of one table, of the MergeTree* family
+
+Same as Case 1 above, with this detail:
+- If table has many partitions and INSERT covers many partitions–then insertion into every partition is transactional on its own
+
+
+## Case 3: INSERT into one distributed table of the MergeTree* family
+
+Same as Case 1 above, with this detail:
+- INSERT into Distributed table is not transactional as a whole, while insertion into every shard is transactional
+
+## Case 4: Using a Buffer table
+
+- insert into Buffer tables is neither atomic nor isolated nor consistent nor durable
+
+## Case 5: Using async_insert
+
+Same as Case 1 above, with this detail:
+- atomicity is ensured even if `async_insert` is enabled and `wait_for_async_insert` is set to 1 (the default), but if `wait_for_async_insert` is set to 0, then atomicity is not ensured.
+
+## Notes
+- rows inserted from the client in some data format are packed into a single block when:
+ - the insert format is row-based (like CSV, TSV, Values, JSONEachRow, etc) and the data contains less then `max_insert_block_size` rows (~1 000 000 by default) or less then `min_chunk_bytes_for_parallel_parsing` bytes (10 MB by default) in case of parallel parsing is used (enabled by default)
+ - the insert format is column-based (like Native, Parquet, ORC, etc) and the data contains only one block of data
+- the size of the inserted block in general may depend on many settings (for example: `max_block_size`, `max_insert_block_size`, `min_insert_block_size_rows`, `min_insert_block_size_bytes`, `preferred_block_size_bytes`, etc)
+- if the client did not receive an answer from the server, the client does not know if the transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties
+- ClickHouse is using MVCC with snapshot isolation internally
+- all ACID properties are valid even in the case of server kill/crash
+- either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in the typical setup
+- "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency)
+- this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc. (see the next section on Transactions, Commit, and Rollback)
## Transactions, Commit, and Rollback
diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md
index b8c5ee77f0c4..981f1c7b5a21 100644
--- a/docs/ru/interfaces/http.md
+++ b/docs/ru/interfaces/http.md
@@ -50,7 +50,7 @@ Connection: Close
Content-Type: text/tab-separated-values; charset=UTF-8
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
-X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
1
```
@@ -266,9 +266,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812
Прогресс выполнения запроса можно отслеживать с помощью заголовков ответа `X-ClickHouse-Progress`. Для этого включите [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Пример последовательности заголовков:
``` text
-X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"}
-X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"}
-X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"}
+X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"}
+X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"}
+X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"}
```
Возможные поля заголовка:
@@ -529,7 +529,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
Say Hi!%
@@ -569,7 +569,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
< Content-Type: text/plain; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
%
@@ -621,7 +621,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
Absolute Path File
* Connection #0 to host localhost left intact
@@ -640,7 +640,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
Relative Path File
* Connection #0 to host localhost left intact
diff --git a/docs/ru/operations/configuration-files.md b/docs/ru/operations/configuration-files.md
index 2b824ce91bd9..01a91bd41c64 100644
--- a/docs/ru/operations/configuration-files.md
+++ b/docs/ru/operations/configuration-files.md
@@ -85,6 +85,40 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
Сервер следит за изменениями конфигурационных файлов, а также файлов и ZooKeeper-узлов, которые были использованы при выполнении подстановок и переопределений, и перезагружает настройки пользователей и кластеров на лету. То есть, можно изменять кластера, пользователей и их настройки без перезапуска сервера.
+## Шифрование {#encryption}
+
+Вы можете использовать симметричное шифрование для зашифровки элемента конфигурации, например, поля password. Чтобы это сделать, сначала настройте [кодек шифрования](../sql-reference/statements/create/table.md#encryption-codecs), затем добавьте аттибут`encryption_codec` с именем кодека шифрования как значение к элементу, который надо зашифровать.
+
+В отличии от аттрибутов `from_zk`, `from_env` и `incl` (или элемента `include`), подстановка, т.е. расшифровка зашифрованного значения, не выподняется в файле предобработки. Расшифровка происходит только во время исполнения в серверном процессе.
+
+Пример:
+
+```xml
+
+
+
+ 00112233445566778899aabbccddeeff
+
+
+
+ admin
+ 961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85
+
+
+```
+
+Чтобы получить зашифрованное значение может быть использовано приложение-пример `encrypt_decrypt` .
+
+Пример:
+
+``` bash
+./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV abcd
+```
+
+``` text
+961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85
+```
+
## Примеры записи конфигурации на YAML {#example}
Здесь можно рассмотреть пример реальной конфигурации записанной на YAML: [config.yaml.example](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.yaml.example).
diff --git a/docs/ru/sql-reference/statements/alter/column.md b/docs/ru/sql-reference/statements/alter/column.md
index a8ace2130750..92be30b101a7 100644
--- a/docs/ru/sql-reference/statements/alter/column.md
+++ b/docs/ru/sql-reference/statements/alter/column.md
@@ -182,7 +182,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)
Синтаксис:
```sql
-ALTER TABLE table_name MODIFY column_name REMOVE property;
+ALTER TABLE table_name MODIFY COLUMN column_name REMOVE property;
```
**Пример**
diff --git a/docs/zh/interfaces/http.md b/docs/zh/interfaces/http.md
index c7a0f355a92d..f84768beccc4 100644
--- a/docs/zh/interfaces/http.md
+++ b/docs/zh/interfaces/http.md
@@ -53,7 +53,7 @@ Connection: Close
Content-Type: text/tab-separated-values; charset=UTF-8
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
-X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
1
```
@@ -262,9 +262,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812
您可以在`X-ClickHouse-Progress`响应头中收到查询进度的信息。为此,启用[Http Header携带进度](../operations/settings/settings.md#settings-send_progress_in_http_headers)。示例:
``` text
-X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"}
-X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"}
-X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"}
+X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"}
+X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"}
+X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"}
```
显示字段信息:
@@ -363,7 +363,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
< X-ClickHouse-Format: Template
< X-ClickHouse-Timezone: Asia/Shanghai
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
# HELP "Query" "Number of executing queries"
# TYPE "Query" counter
@@ -521,7 +521,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
Say Hi!%
@@ -561,7 +561,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
< Content-Type: text/plain; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
%
@@ -613,7 +613,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
Absolute Path File
* Connection #0 to host localhost left intact
@@ -632,7 +632,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
Relative Path File
* Connection #0 to host localhost left intact
diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt
index 317e35959aa6..abf31a7a499e 100644
--- a/programs/keeper/CMakeLists.txt
+++ b/programs/keeper/CMakeLists.txt
@@ -80,6 +80,7 @@ if (BUILD_STANDALONE_KEEPER)
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBuffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferFromFile.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedWriteBuffer.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecEncrypted.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecLZ4.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecMultiple.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecNone.cpp
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index b57521476b34..960b65746332 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -747,6 +747,7 @@ try
std::lock_guard lock(servers_lock);
metrics.reserve(servers_to_start_before_tables.size() + servers.size());
+
for (const auto & server : servers_to_start_before_tables)
metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()});
@@ -1476,16 +1477,18 @@ try
/// Load global settings from default_profile and system_profile.
global_context->setDefaultProfiles(config());
- const Settings & settings = global_context->getSettingsRef();
/// Initialize background executors after we load default_profile config.
/// This is needed to load proper values of background_pool_size etc.
global_context->initializeBackgroundExecutorsIfNeeded();
- if (settings.async_insert_threads)
+ if (server_settings.async_insert_threads)
+ {
global_context->setAsynchronousInsertQueue(std::make_shared(
global_context,
- settings.async_insert_threads));
+ server_settings.async_insert_threads,
+ server_settings.async_insert_queue_flush_on_shutdown));
+ }
size_t mark_cache_size = server_settings.mark_cache_size;
String mark_cache_policy = server_settings.mark_cache_policy;
diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h
index 455fa58806d1..374a1dd04a42 100644
--- a/src/Access/Common/AccessType.h
+++ b/src/Access/Common/AccessType.h
@@ -182,6 +182,7 @@ enum class AccessType
M(SYSTEM_SYNC_FILE_CACHE, "SYNC FILE CACHE", GLOBAL, SYSTEM) \
M(SYSTEM_FLUSH_DISTRIBUTED, "FLUSH DISTRIBUTED", TABLE, SYSTEM_FLUSH) \
M(SYSTEM_FLUSH_LOGS, "FLUSH LOGS", GLOBAL, SYSTEM_FLUSH) \
+ M(SYSTEM_FLUSH_ASYNC_INSERT_QUEUE, "FLUSH ASYNC INSERT QUEUE", GLOBAL, SYSTEM_FLUSH) \
M(SYSTEM_FLUSH, "", GROUP, SYSTEM) \
M(SYSTEM_THREAD_FUZZER, "SYSTEM START THREAD FUZZER, SYSTEM STOP THREAD FUZZER, START THREAD FUZZER, STOP THREAD FUZZER", GLOBAL, SYSTEM) \
M(SYSTEM_UNFREEZE, "SYSTEM UNFREEZE", GLOBAL, SYSTEM) \
diff --git a/src/AggregateFunctions/AggregateFunctionAny.cpp b/src/AggregateFunctions/AggregateFunctionAny.cpp
index 7f57062126bf..fc8f50efabec 100644
--- a/src/AggregateFunctions/AggregateFunctionAny.cpp
+++ b/src/AggregateFunctions/AggregateFunctionAny.cpp
@@ -49,6 +49,7 @@ void registerAggregateFunctionsAny(AggregateFunctionFactory & factory)
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
factory.registerFunction("any", { createAggregateFunctionAny, properties });
+ factory.registerAlias("any_value", "any", AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("anyLast", { createAggregateFunctionAnyLast, properties });
factory.registerFunction("anyHeavy", { createAggregateFunctionAnyHeavy, properties });
diff --git a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp b/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp
index 1ed6c83af7da..1489db558579 100644
--- a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp
@@ -66,7 +66,7 @@ AggregateFunctionPtr createAggregateFunctionSimpleLinearRegression(
#undef FOR_LEASTSQR_TYPES
#undef DISPATCH
- throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT ,
+ throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal types ({}, {}) of arguments of aggregate function {}, must "
"be Native Ints, Native UInts or Floats", x_arg->getName(), y_arg->getName(), name);
}
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index f5390037e6bf..3e964d5c6a36 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -1195,6 +1195,8 @@ void ClientBase::onProfileEvents(Block & block)
thread_times[host_name].system_ms = value;
else if (event_name == MemoryTracker::USAGE_EVENT_NAME)
thread_times[host_name].memory_usage = value;
+ else if (event_name == MemoryTracker::PEAK_USAGE_EVENT_NAME)
+ thread_times[host_name].peak_memory_usage = value;
}
progress_indication.updateThreadEventData(thread_times);
diff --git a/src/Common/ConcurrentBoundedQueue.h b/src/Common/ConcurrentBoundedQueue.h
index 01910c4caff1..922607da813f 100644
--- a/src/Common/ConcurrentBoundedQueue.h
+++ b/src/Common/ConcurrentBoundedQueue.h
@@ -110,7 +110,7 @@ class ConcurrentBoundedQueue
/// Returns false if queue is finished
[[nodiscard]] bool pushFront(const T & x)
{
- return emplaceImpl* back= */ false>(/* timeout_milliseconds= */ std::nullopt , x);
+ return emplaceImpl* back= */ false>(/* timeout_milliseconds= */ std::nullopt, x);
}
/// Returns false if queue is finished
diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp
index bda181eceebf..db3c6909b214 100644
--- a/src/Common/Config/ConfigProcessor.cpp
+++ b/src/Common/Config/ConfigProcessor.cpp
@@ -26,6 +26,14 @@
#include
#include
+#if USE_SSL
+#include
+#include
+#include
+#include
+#include
+#endif
+
#define PREPROCESSED_SUFFIX "-preprocessed"
namespace fs = std::filesystem;
@@ -39,6 +47,9 @@ namespace ErrorCodes
{
extern const int FILE_DOESNT_EXIST;
extern const int CANNOT_LOAD_CONFIG;
+#if USE_SSL
+ extern const int BAD_ARGUMENTS;
+#endif
}
/// For cutting preprocessed path to this base
@@ -177,6 +188,72 @@ static void mergeAttributes(Element & config_element, Element & with_element)
with_element_attributes->release();
}
+#if USE_SSL
+
+std::string ConfigProcessor::encryptValue(const std::string & codec_name, const std::string & value)
+{
+ EncryptionMethod method = getEncryptionMethod(codec_name);
+ CompressionCodecEncrypted codec(method);
+
+ Memory<> memory;
+ memory.resize(codec.getCompressedReserveSize(static_cast(value.size())));
+ auto bytes_written = codec.compress(value.data(), static_cast(value.size()), memory.data());
+ auto encrypted_value = std::string(memory.data(), bytes_written);
+ std::string hex_value;
+ boost::algorithm::hex(encrypted_value.begin(), encrypted_value.end(), std::back_inserter(hex_value));
+ return hex_value;
+}
+
+std::string ConfigProcessor::decryptValue(const std::string & codec_name, const std::string & value)
+{
+ EncryptionMethod method = getEncryptionMethod(codec_name);
+ CompressionCodecEncrypted codec(method);
+
+ Memory<> memory;
+ std::string encrypted_value;
+
+ try
+ {
+ boost::algorithm::unhex(value, std::back_inserter(encrypted_value));
+ }
+ catch (const std::exception &)
+ {
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read encrypted text, check for valid characters [0-9a-fA-F] and length");
+ }
+
+ memory.resize(codec.readDecompressedBlockSize(encrypted_value.data()));
+ codec.decompress(encrypted_value.data(), static_cast(encrypted_value.size()), memory.data());
+ std::string decrypted_value = std::string(memory.data(), memory.size());
+ return decrypted_value;
+}
+
+void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root)
+{
+ for (Node * node = config_root->firstChild(); node; node = node->nextSibling())
+ {
+ if (node->nodeType() == Node::ELEMENT_NODE)
+ {
+ Element & element = dynamic_cast(*node);
+ if (element.hasAttribute("encryption_codec"))
+ {
+ const NodeListPtr children = element.childNodes();
+ if (children->length() != 1)
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "Encrypted node {} cannot contain nested elements", node->nodeName());
+
+ Node * text_node = node->firstChild();
+ if (text_node->nodeType() != Node::TEXT_NODE)
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "Encrypted node {} should have text node", node->nodeName());
+
+ auto encryption_codec = element.getAttribute("encryption_codec");
+ text_node->setNodeValue(decryptValue(encryption_codec, text_node->getNodeValue()));
+ }
+ decryptRecursive(node);
+ }
+ }
+}
+
+#endif
+
void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, const Node * with_root)
{
const NodeListPtr with_nodes = with_root->childNodes();
@@ -694,7 +771,19 @@ ConfigProcessor::LoadedConfig ConfigProcessor::loadConfigWithZooKeeperIncludes(
return LoadedConfig{configuration, has_zk_includes, !processed_successfully, config_xml, path};
}
-void ConfigProcessor::savePreprocessedConfig(const LoadedConfig & loaded_config, std::string preprocessed_dir)
+#if USE_SSL
+
+void ConfigProcessor::decryptEncryptedElements(LoadedConfig & loaded_config)
+{
+ CompressionCodecEncrypted::Configuration::instance().tryLoad(*loaded_config.configuration, "encryption_codecs");
+ Node * config_root = getRootNode(loaded_config.preprocessed_xml.get());
+ decryptRecursive(config_root);
+ loaded_config.configuration = new Poco::Util::XMLConfiguration(loaded_config.preprocessed_xml);
+}
+
+#endif
+
+void ConfigProcessor::savePreprocessedConfig(LoadedConfig & loaded_config, std::string preprocessed_dir)
{
try
{
@@ -749,6 +838,12 @@ void ConfigProcessor::savePreprocessedConfig(const LoadedConfig & loaded_config,
{
LOG_WARNING(log, "Couldn't save preprocessed config to {}: {}", preprocessed_path, e.displayText());
}
+
+#if USE_SSL
+ std::string preprocessed_file_name = fs::path(preprocessed_path).filename();
+ if (preprocessed_file_name == "config.xml" || preprocessed_file_name == std::format("config{}.xml", PREPROCESSED_SUFFIX))
+ decryptEncryptedElements(loaded_config);
+#endif
}
void ConfigProcessor::setConfigPath(const std::string & config_path)
diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h
index eefe65ef06cd..060ef49d36a3 100644
--- a/src/Common/Config/ConfigProcessor.h
+++ b/src/Common/Config/ConfigProcessor.h
@@ -97,7 +97,7 @@ class ConfigProcessor
/// Save preprocessed config to specified directory.
/// If preprocessed_dir is empty - calculate from loaded_config.path + /preprocessed_configs/
- void savePreprocessedConfig(const LoadedConfig & loaded_config, std::string preprocessed_dir);
+ void savePreprocessedConfig(LoadedConfig & loaded_config, std::string preprocessed_dir);
/// Set path of main config.xml. It will be cut from all configs placed to preprocessed_configs/
static void setConfigPath(const std::string & config_path);
@@ -109,6 +109,14 @@ class ConfigProcessor
/// Is the file named as result of config preprocessing, not as original files.
static bool isPreprocessedFile(const std::string & config_path);
+#if USE_SSL
+ /// Encrypt text value
+ static std::string encryptValue(const std::string & codec_name, const std::string & value);
+
+ /// Decrypt value
+ static std::string decryptValue(const std::string & codec_name, const std::string & value);
+#endif
+
static inline const auto SUBSTITUTION_ATTRS = {"incl", "from_zk", "from_env"};
private:
@@ -127,6 +135,13 @@ class ConfigProcessor
using NodePtr = Poco::AutoPtr;
+#if USE_SSL
+ void decryptRecursive(Poco::XML::Node * config_root);
+
+ /// Decrypt elements in config with specified encryption attributes
+ void decryptEncryptedElements(LoadedConfig & loaded_config);
+#endif
+
void mergeRecursive(XMLDocumentPtr config, Poco::XML::Node * config_root, const Poco::XML::Node * with_root);
void merge(XMLDocumentPtr config, XMLDocumentPtr with);
diff --git a/src/Common/Config/YAMLParser.cpp b/src/Common/Config/YAMLParser.cpp
index d96049197260..72706cb98ba7 100644
--- a/src/Common/Config/YAMLParser.cpp
+++ b/src/Common/Config/YAMLParser.cpp
@@ -110,9 +110,23 @@ namespace
}
else
{
- Poco::AutoPtr xml_key = xml_document->createElement(key);
- parent_xml_node.appendChild(xml_key);
- processNode(value_node, *xml_key);
+ if (key == "#text" && value_node.IsScalar())
+ {
+ for (Node * child_node = parent_xml_node.firstChild(); child_node; child_node = child_node->nextSibling())
+ if (child_node->nodeType() == Node::TEXT_NODE)
+ throw Exception(ErrorCodes::CANNOT_PARSE_YAML,
+ "YAMLParser has encountered node with several text nodes "
+ "and cannot continue parsing of the file");
+ std::string value = value_node.as();
+ Poco::AutoPtr xml_value = xml_document->createTextNode(value);
+ parent_xml_node.appendChild(xml_value);
+ }
+ else
+ {
+ Poco::AutoPtr xml_key = xml_document->createElement(key);
+ parent_xml_node.appendChild(xml_key);
+ processNode(value_node, *xml_key);
+ }
}
}
break;
diff --git a/src/Common/HashTable/TwoLevelStringHashTable.h b/src/Common/HashTable/TwoLevelStringHashTable.h
index ea1914348b27..54c208c5b603 100644
--- a/src/Common/HashTable/TwoLevelStringHashTable.h
+++ b/src/Common/HashTable/TwoLevelStringHashTable.h
@@ -113,13 +113,19 @@ class TwoLevelStringHashTable : private boost::noncopyable
if ((reinterpret_cast(p) & 2048) == 0)
{
memcpy(&n[0], p, 8);
- n[0] &= -1ULL >> s;
+ if constexpr (std::endian::native == std::endian::little)
+ n[0] &= -1ULL >> s;
+ else
+ n[0] &= -1ULL << s;
}
else
{
const char * lp = x.data + x.size - 8;
memcpy(&n[0], lp, 8);
- n[0] >>= s;
+ if constexpr (std::endian::native == std::endian::little)
+ n[0] >>= s;
+ else
+ n[0] <<= s;
}
auto res = hash(k8);
auto buck = getBucketFromHash(res);
@@ -131,7 +137,10 @@ class TwoLevelStringHashTable : private boost::noncopyable
memcpy(&n[0], p, 8);
const char * lp = x.data + x.size - 8;
memcpy(&n[1], lp, 8);
- n[1] >>= s;
+ if constexpr (std::endian::native == std::endian::little)
+ n[1] >>= s;
+ else
+ n[1] <<= s;
auto res = hash(k16);
auto buck = getBucketFromHash(res);
keyHolderDiscardKey(key_holder);
@@ -142,7 +151,10 @@ class TwoLevelStringHashTable : private boost::noncopyable
memcpy(&n[0], p, 16);
const char * lp = x.data + x.size - 8;
memcpy(&n[2], lp, 8);
- n[2] >>= s;
+ if constexpr (std::endian::native == std::endian::little)
+ n[2] >>= s;
+ else
+ n[2] <<= s;
auto res = hash(k24);
auto buck = getBucketFromHash(res);
keyHolderDiscardKey(key_holder);
diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h
index 4e29d40c953d..3ea1ea8702c4 100644
--- a/src/Common/MemoryTracker.h
+++ b/src/Common/MemoryTracker.h
@@ -95,6 +95,7 @@ class MemoryTracker
public:
static constexpr auto USAGE_EVENT_NAME = "MemoryTrackerUsage";
+ static constexpr auto PEAK_USAGE_EVENT_NAME = "MemoryTrackerPeakUsage";
explicit MemoryTracker(VariableContext level_ = VariableContext::Thread);
explicit MemoryTracker(MemoryTracker * parent_, VariableContext level_ = VariableContext::Thread);
diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index f18a67fa5652..ab7c740ced7f 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -58,8 +58,8 @@
M(TableFunctionExecute, "Number of table function calls.") \
M(MarkCacheHits, "Number of times an entry has been found in the mark cache, so we didn't have to load a mark file.") \
M(MarkCacheMisses, "Number of times an entry has not been found in the mark cache, so we had to load a mark file in memory, which is a costly operation, adding to query latency.") \
- M(QueryCacheHits, "Number of times a query result has been found in the query cache (and query computation was avoided).") \
- M(QueryCacheMisses, "Number of times a query result has not been found in the query cache (and required query computation).") \
+ M(QueryCacheHits, "Number of times a query result has been found in the query cache (and query computation was avoided). Only updated for SELECT queries with SETTING use_query_cache = 1.") \
+ M(QueryCacheMisses, "Number of times a query result has not been found in the query cache (and required query computation). Only updated for SELECT queries with SETTING use_query_cache = 1.") \
M(CreatedReadBufferOrdinary, "Number of times ordinary read buffer was created for reading data (while choosing among other read methods).") \
M(CreatedReadBufferDirectIO, "Number of times a read buffer with O_DIRECT was created for reading data (while choosing among other read methods).") \
M(CreatedReadBufferDirectIOFailed, "Number of times a read buffer with O_DIRECT was attempted to be created for reading data (while choosing among other read methods), but the OS did not allow it (due to lack of filesystem support or other reasons) and we fallen back to the ordinary reading method.") \
diff --git a/src/Common/ProgressIndication.cpp b/src/Common/ProgressIndication.cpp
index 61b60060430a..960d864660cb 100644
--- a/src/Common/ProgressIndication.cpp
+++ b/src/Common/ProgressIndication.cpp
@@ -83,7 +83,7 @@ ProgressIndication::MemoryUsage ProgressIndication::getMemoryUsage() const
[](MemoryUsage const & acc, auto const & host_data)
{
UInt64 host_usage = host_data.second.memory_usage;
- return MemoryUsage{.total = acc.total + host_usage, .max = std::max(acc.max, host_usage)};
+ return MemoryUsage{.total = acc.total + host_usage, .max = std::max(acc.max, host_usage), .peak = std::max(acc.peak, host_data.second.peak_memory_usage)};
});
}
@@ -101,6 +101,9 @@ void ProgressIndication::writeFinalProgress()
<< formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.)";
else
std::cout << ". ";
+ auto peak_memory_usage = getMemoryUsage().peak;
+ if (peak_memory_usage >= 0)
+ std::cout << "\nPeak memory usage (for query) " << formatReadableSizeWithBinarySuffix(peak_memory_usage) << ".";
}
void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message)
@@ -152,7 +155,7 @@ void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message)
std::string profiling_msg;
double cpu_usage = getCPUUsage();
- auto [memory_usage, max_host_usage] = getMemoryUsage();
+ auto [memory_usage, max_host_usage, peak_usage] = getMemoryUsage();
if (cpu_usage > 0 || memory_usage > 0)
{
diff --git a/src/Common/ProgressIndication.h b/src/Common/ProgressIndication.h
index af5d69c0255d..3776b1d8ae19 100644
--- a/src/Common/ProgressIndication.h
+++ b/src/Common/ProgressIndication.h
@@ -22,6 +22,9 @@ struct ThreadEventData
UInt64 user_ms = 0;
UInt64 system_ms = 0;
UInt64 memory_usage = 0;
+
+ // -1 used as flag 'is not show for old servers'
+ Int64 peak_memory_usage = -1;
};
using HostToTimesMap = std::unordered_map;
@@ -64,6 +67,7 @@ class ProgressIndication
{
UInt64 total = 0;
UInt64 max = 0;
+ Int64 peak = -1;
};
MemoryUsage getMemoryUsage() const;
diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt
index d095ab3a1bee..90a238c9800c 100644
--- a/src/Common/examples/CMakeLists.txt
+++ b/src/Common/examples/CMakeLists.txt
@@ -82,3 +82,8 @@ endif()
clickhouse_add_executable (interval_tree interval_tree.cpp)
target_link_libraries (interval_tree PRIVATE dbms)
+
+if (ENABLE_SSL)
+ clickhouse_add_executable (encrypt_decrypt encrypt_decrypt.cpp)
+ target_link_libraries (encrypt_decrypt PRIVATE dbms)
+endif()
diff --git a/src/Common/examples/encrypt_decrypt.cpp b/src/Common/examples/encrypt_decrypt.cpp
new file mode 100644
index 000000000000..503802016cbb
--- /dev/null
+++ b/src/Common/examples/encrypt_decrypt.cpp
@@ -0,0 +1,61 @@
+#include
+#include
+#include
+#include
+
+/** This test program encrypts or decrypts text values using a symmetric encryption codec like AES_128_GCM_SIV or AES_256_GCM_SIV.
+ * Keys for codecs are loaded from section of configuration file.
+ *
+ * How to use:
+ * ./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV text_to_encrypt
+ */
+
+int main(int argc, char ** argv)
+{
+ try
+ {
+ if (argc != 5)
+ {
+ std::cerr << "Usage:" << std::endl
+ << " " << argv[0] << " path action codec value" << std::endl
+ << "path: path to configuration file." << std::endl
+ << "action: -e for encryption and -d for decryption." << std::endl
+ << "codec: AES_128_GCM_SIV or AES_256_GCM_SIV." << std::endl << std::endl
+ << "Example:" << std::endl
+ << " ./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV text_to_encrypt";
+ return 3;
+ }
+
+ std::string action = argv[2];
+ std::string codec_name = argv[3];
+ std::string value = argv[4];
+
+ DB::ConfigProcessor processor(argv[1], false, true);
+ auto loaded_config = processor.loadConfig();
+ DB::CompressionCodecEncrypted::Configuration::instance().tryLoad(*loaded_config.configuration, "encryption_codecs");
+
+ if (action == "-e")
+ std::cout << processor.encryptValue(codec_name, value) << std::endl;
+ else if (action == "-d")
+ std::cout << processor.decryptValue(codec_name, value) << std::endl;
+ else
+ std::cerr << "Unknown action: " << action << std::endl;
+ }
+ catch (Poco::Exception & e)
+ {
+ std::cerr << "Exception: " << e.displayText() << std::endl;
+ return 1;
+ }
+ catch (std::exception & e)
+ {
+ std::cerr << "std::exception: " << e.what() << std::endl;
+ return 3;
+ }
+ catch (...)
+ {
+ std::cerr << "Some exception" << std::endl;
+ return 2;
+ }
+
+ return 0;
+}
diff --git a/src/Common/parseRemoteDescription.cpp b/src/Common/parseRemoteDescription.cpp
index 0bcd62d30c7e..8ea3f4a0aa5f 100644
--- a/src/Common/parseRemoteDescription.cpp
+++ b/src/Common/parseRemoteDescription.cpp
@@ -52,20 +52,8 @@ static bool parseNumber(const String & description, size_t l, size_t r, size_t &
}
-/* Parse a string that generates shards and replicas. Separator - one of two characters | or ,
- * depending on whether shards or replicas are generated.
- * For example:
- * host1,host2,... - generates set of shards from host1, host2, ...
- * host1|host2|... - generates set of replicas from host1, host2, ...
- * abc{8..10}def - generates set of shards abc8def, abc9def, abc10def.
- * abc{08..10}def - generates set of shards abc08def, abc09def, abc10def.
- * abc{x,yy,z}def - generates set of shards abcxdef, abcyydef, abczdef.
- * abc{x|yy|z} def - generates set of replicas abcxdef, abcyydef, abczdef.
- * abc{1..9}de{f,g,h} - is a direct product, 27 shards.
- * abc{1..9}de{0|1} - is a direct product, 9 shards, in each 2 replicas.
- */
-std::vector
-parseRemoteDescription(const String & description, size_t l, size_t r, char separator, size_t max_addresses, const String & func_name)
+std::vector parseRemoteDescription(
+ const String & description, size_t l, size_t r, char separator, size_t max_addresses, const String & func_name)
{
std::vector res;
std::vector cur;
diff --git a/src/Common/parseRemoteDescription.h b/src/Common/parseRemoteDescription.h
index e3e4a3f523c2..d97558c47283 100644
--- a/src/Common/parseRemoteDescription.h
+++ b/src/Common/parseRemoteDescription.h
@@ -3,7 +3,7 @@
#include
namespace DB
{
-/* Parse a string that generates shards and replicas. Separator - one of two characters | or ,
+/* Parse a string that generates shards and replicas. Separator - one of two characters '|' or ','
* depending on whether shards or replicas are generated.
* For example:
* host1,host2,... - generates set of shards from host1, host2, ...
diff --git a/src/Common/tests/gtest_sensitive_data_masker.cpp b/src/Common/tests/gtest_sensitive_data_masker.cpp
index 92c4edbac2a5..f36c41546845 100644
--- a/src/Common/tests/gtest_sensitive_data_masker.cpp
+++ b/src/Common/tests/gtest_sensitive_data_masker.cpp
@@ -27,7 +27,7 @@ TEST(Common, SensitiveDataMasker)
{
Poco::AutoPtr empty_xml_config = new Poco::Util::XMLConfiguration();
- DB::SensitiveDataMasker masker(*empty_xml_config , "");
+ DB::SensitiveDataMasker masker(*empty_xml_config, "");
masker.addMaskingRule("all a letters", "a+", "--a--");
masker.addMaskingRule("all b letters", "b+", "--b--");
masker.addMaskingRule("all d letters", "d+", "--d--");
@@ -45,7 +45,7 @@ TEST(Common, SensitiveDataMasker)
masker.printStats();
#endif
- DB::SensitiveDataMasker masker2(*empty_xml_config , "");
+ DB::SensitiveDataMasker masker2(*empty_xml_config, "");
masker2.addMaskingRule("hide root password", "qwerty123", "******");
masker2.addMaskingRule("hide SSN", "[0-9]{3}-[0-9]{2}-[0-9]{4}", "000-00-0000");
masker2.addMaskingRule("hide email", "[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,4}", "hidden@hidden.test");
@@ -58,7 +58,7 @@ TEST(Common, SensitiveDataMasker)
"SELECT id FROM mysql('localhost:3308', 'database', 'table', 'root', '******') WHERE "
"ssn='000-00-0000' or email='hidden@hidden.test'");
- DB::SensitiveDataMasker maskerbad(*empty_xml_config , "");
+ DB::SensitiveDataMasker maskerbad(*empty_xml_config, "");
// gtest has not good way to check exception content, so just do it manually (see https://github.com/google/googletest/issues/952 )
try
diff --git a/src/Compression/CompressionCodecDeflateQpl.cpp b/src/Compression/CompressionCodecDeflateQpl.cpp
index 859a9c2463ae..5dcd6008b517 100644
--- a/src/Compression/CompressionCodecDeflateQpl.cpp
+++ b/src/Compression/CompressionCodecDeflateQpl.cpp
@@ -398,6 +398,14 @@ UInt32 CompressionCodecDeflateQpl::doCompressData(const char * source, UInt32 so
return res;
}
+inline void touchBufferWithZeroFilling(char * buffer, UInt32 buffer_size)
+{
+ for (char * p = buffer; p < buffer + buffer_size; p += ::getPageSize()/(sizeof(*p)))
+ {
+ *p = 0;
+ }
+}
+
void CompressionCodecDeflateQpl::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
{
/// QPL library is using AVX-512 with some shuffle operations.
@@ -405,6 +413,10 @@ void CompressionCodecDeflateQpl::doDecompressData(const char * source, UInt32 so
#if defined(MEMORY_SANITIZER)
__msan_unpoison(dest, uncompressed_size);
#endif
+/// Device IOTLB miss has big perf. impact for IAA accelerators.
+/// To avoid page fault, we need touch buffers related to accelerator in advance.
+ touchBufferWithZeroFilling(dest, uncompressed_size);
+
switch (getDecompressMode())
{
case CodecMode::Synchronous:
diff --git a/src/Compression/CompressionCodecEncrypted.cpp b/src/Compression/CompressionCodecEncrypted.cpp
index 022bbd583e46..3f4e35a78a46 100644
--- a/src/Compression/CompressionCodecEncrypted.cpp
+++ b/src/Compression/CompressionCodecEncrypted.cpp
@@ -28,6 +28,17 @@ namespace DB
namespace ErrorCodes
{
extern const int OPENSSL_ERROR;
+ extern const int BAD_ARGUMENTS;
+}
+
+EncryptionMethod getEncryptionMethod(const std::string & name)
+{
+ if (name == "AES_128_GCM_SIV")
+ return AES_128_GCM_SIV;
+ else if (name == "AES_256_GCM_SIV")
+ return AES_256_GCM_SIV;
+ else
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", name);
}
namespace
@@ -63,7 +74,7 @@ uint8_t getMethodCode(EncryptionMethod Method)
}
else
{
- throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", getMethodName(Method));
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", getMethodName(Method));
}
}
@@ -79,7 +90,6 @@ namespace ErrorCodes
{
extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
extern const int LOGICAL_ERROR;
- extern const int BAD_ARGUMENTS;
extern const int INCORRECT_DATA;
}
@@ -104,7 +114,7 @@ UInt64 methodKeySize(EncryptionMethod Method)
}
else
{
- throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", getMethodName(Method));
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", getMethodName(Method));
}
}
@@ -129,7 +139,7 @@ auto getMethod(EncryptionMethod Method)
}
else
{
- throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", getMethodName(Method));
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", getMethodName(Method));
}
}
@@ -205,7 +215,7 @@ auto getMethod(EncryptionMethod Method)
}
else
{
- throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", getMethodName(Method));
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", getMethodName(Method));
}
}
@@ -578,7 +588,7 @@ String CompressionCodecEncrypted::Configuration::getKey(EncryptionMethod method,
if (current_params->keys_storage[method].contains(key_id))
key = current_params->keys_storage[method].at(key_id);
else
- throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no key {} in config", key_id);
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no key {} in config for {} encryption codec", key_id, getMethodName(method));
return key;
}
diff --git a/src/Compression/CompressionCodecEncrypted.h b/src/Compression/CompressionCodecEncrypted.h
index 0f680dbcb090..fafcf4af507e 100644
--- a/src/Compression/CompressionCodecEncrypted.h
+++ b/src/Compression/CompressionCodecEncrypted.h
@@ -18,6 +18,9 @@ enum EncryptionMethod
MAX_ENCRYPTION_METHOD
};
+/// Get method for string name. Throw exception for wrong name.
+EncryptionMethod getEncryptionMethod(const std::string & name);
+
/** This codec encrypts and decrypts blocks with AES-128 in
* GCM-SIV mode (RFC-8452), which is the only cipher currently
* supported. Although it is implemented as a compression codec
diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp
index 94fc07bcc4a3..79929c4e66e3 100644
--- a/src/Coordination/ZooKeeperDataReader.cpp
+++ b/src/Coordination/ZooKeeperDataReader.cpp
@@ -40,7 +40,7 @@ void deserializeSnapshotMagic(ReadBuffer & in)
Coordination::read(dbid, in);
static constexpr int32_t SNP_HEADER = 1514885966; /// "ZKSN"
if (magic_header != SNP_HEADER)
- throw Exception(ErrorCodes::CORRUPTED_DATA ,"Incorrect magic header in file, expected {}, got {}", SNP_HEADER, magic_header);
+ throw Exception(ErrorCodes::CORRUPTED_DATA, "Incorrect magic header in file, expected {}, got {}", SNP_HEADER, magic_header);
}
int64_t deserializeSessionAndTimeout(KeeperStorage & storage, ReadBuffer & in)
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 1a9f226041b6..ca27cbdbf19d 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -48,6 +48,8 @@ namespace DB
M(UInt64, merges_mutations_memory_usage_soft_limit, 0, "Limit on total memory usage for merges and mutations. Zero means Unlimited.", 0) \
M(Double, merges_mutations_memory_usage_to_ram_ratio, 0.5, "Same as merges_mutations_memory_usage_soft_limit but in to ram ratio. Allows to lower memory limit on low-memory systems.", 0) \
M(Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0) \
+ M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \
+ M(Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0) \
\
M(UInt64, max_concurrent_queries, 0, "Limit on total number of concurrently executed queries. Zero means Unlimited.", 0) \
M(UInt64, max_concurrent_insert_queries, 0, "Limit on total number of concurrently insert queries. Zero means Unlimited.", 0) \
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 24be644ee550..992135daebd3 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -534,7 +534,6 @@ class IColumn;
M(Bool, convert_query_to_cnf, false, "Convert SELECT query to CNF", 0) \
M(Bool, optimize_or_like_chain, false, "Optimize multiple OR LIKE into multiMatchAny. This optimization should not be enabled by default, because it defies index analysis in some cases.", 0) \
M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \
- M(Bool, optimize_duplicate_order_by_and_distinct, false, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \
M(Bool, optimize_redundant_functions_in_order_by, true, "Remove functions from ORDER BY if its argument is also in ORDER BY", 0) \
M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \
M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \
@@ -623,6 +622,7 @@ class IColumn;
M(Bool, engine_file_allow_create_multiple_files, false, "Enables or disables creating a new file on each insert in file engine tables if format has suffix.", 0) \
M(Bool, engine_file_skip_empty_files, false, "Allows to skip empty files in file table engine", 0) \
M(Bool, engine_url_skip_empty_files, false, "Allows to skip empty files in url table engine", 0) \
+ M(Bool, disable_url_encoding, false, " Allows to disable decoding/encoding path in uri in URL table engine", 0) \
M(Bool, allow_experimental_database_replicated, false, "Allow to create databases with Replicated engine", 0) \
M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \
M(Bool, database_replicated_enforce_synchronous_settings, false, "Enforces synchronous waiting for some queries (see also database_atomic_wait_for_drop_and_detach_synchronously, mutation_sync, alter_sync). Not recommended to enable these settings.", 0) \
@@ -659,7 +659,8 @@ class IColumn;
M(UInt64, limit, 0, "Limit on read rows from the most 'end' result for select query, default 0 means no limit length", 0) \
M(UInt64, offset, 0, "Offset on read rows from the most 'end' result for select query", 0) \
\
- M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \
+ M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function `range` per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \
+ M(UInt64, function_sleep_max_microseconds_per_block, 3000000, "Maximum number of microseconds the function `sleep` is allowed to sleep for each block. If a user called it with a larger value, it throws an exception. It is a safety threshold.", 0) \
M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \
\
M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::pread, "Method of reading data from storage file, one of: read, pread, mmap. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local).", 0) \
@@ -674,7 +675,6 @@ class IColumn;
M(UInt64, merge_tree_min_bytes_per_task_for_remote_reading, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes to read per task.", 0) \
M(Bool, merge_tree_use_const_size_tasks_for_remote_reading, true, "Whether to use constant size tasks for reading from a remote table.", 0) \
\
- M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \
M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \
M(Bool, wait_for_async_insert, true, "If true wait for processing of asynchronous insertion", 0) \
M(Seconds, wait_for_async_insert_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "Timeout for waiting for processing asynchronous insertion", 0) \
@@ -820,6 +820,7 @@ class IColumn;
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_distributed_schedule_pool_size, 16) \
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_remote_read_network_bandwidth_for_server, 0) \
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_remote_write_network_bandwidth_for_server, 0) \
+ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, async_insert_threads, 16) \
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_replicated_fetches_network_bandwidth_for_server, 0) \
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_replicated_sends_network_bandwidth_for_server, 0) \
/* ---- */ \
@@ -831,6 +832,7 @@ class IColumn;
MAKE_OBSOLETE(M, Seconds, drain_timeout, 3) \
MAKE_OBSOLETE(M, UInt64, backup_threads, 16) \
MAKE_OBSOLETE(M, UInt64, restore_threads, 16) \
+ MAKE_OBSOLETE(M, Bool, optimize_duplicate_order_by_and_distinct, false) \
/** The section above is for obsolete settings. Do not add anything there. */
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 2886cdd288d0..70b702f1b330 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -80,6 +80,7 @@ namespace SettingsChangesHistory
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
static std::map settings_changes_history =
{
+ {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}},
{"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."},
{"http_receive_timeout", 180, 30, "See http_send_timeout."}}},
{"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."},
diff --git a/src/Core/tests/gtest_settings.cpp b/src/Core/tests/gtest_settings.cpp
index cbeb84ef2e7f..a6d8763bfb80 100644
--- a/src/Core/tests/gtest_settings.cpp
+++ b/src/Core/tests/gtest_settings.cpp
@@ -121,7 +121,7 @@ GTEST_TEST(SettingMySQLDataTypesSupport, SetString)
ASSERT_EQ(Field("decimal,datetime64"), setting);
// comma with spaces
- setting = " datetime64 , decimal ";
+ setting = " datetime64 , decimal "; /// bad punctuation is ok here
ASSERT_TRUE(setting.changed);
ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
@@ -166,4 +166,3 @@ GTEST_TEST(SettingMySQLDataTypesSupport, SetInvalidString)
ASSERT_TRUE(setting.changed);
ASSERT_EQ(0, setting.value.getValue());
}
-
diff --git a/src/DataTypes/NumberTraits.h b/src/DataTypes/NumberTraits.h
index 6b068b0d8b1f..cf283d3358c3 100644
--- a/src/DataTypes/NumberTraits.h
+++ b/src/DataTypes/NumberTraits.h
@@ -174,7 +174,7 @@ template struct ResultOfBitNot
* Float, [U]Int -> Float
* Decimal, Decimal -> Decimal
* UUID, UUID -> UUID
- * UInt64 , Int -> Error
+ * UInt64, Int -> Error
* Float, [U]Int64 -> Error
*/
template
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index d3b3d4b545fd..ed56edd7503a 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -524,6 +524,7 @@ void DatabaseReplicated::startupTables(ThreadPool & thread_pool, LoadingStrictne
ddl_worker = std::make_unique(this, getContext());
ddl_worker->startup();
+ ddl_worker_initialized = true;
}
bool DatabaseReplicated::checkDigestValid(const ContextPtr & local_context, bool debug_check /* = true */) const
@@ -1155,6 +1156,7 @@ void DatabaseReplicated::stopReplication()
void DatabaseReplicated::shutdown()
{
stopReplication();
+ ddl_worker_initialized = false;
ddl_worker = nullptr;
DatabaseAtomic::shutdown();
}
@@ -1299,7 +1301,7 @@ bool DatabaseReplicated::canExecuteReplicatedMetadataAlter() const
/// It may update the metadata digest (both locally and in ZooKeeper)
/// before DatabaseReplicatedDDLWorker::initializeReplication() has finished.
/// We should not update metadata until the database is initialized.
- return ddl_worker && ddl_worker->isCurrentlyActive();
+ return ddl_worker_initialized && ddl_worker->isCurrentlyActive();
}
void DatabaseReplicated::detachTablePermanently(ContextPtr local_context, const String & table_name)
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 8e33f482ac12..7ba91e480851 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -134,6 +134,7 @@ class DatabaseReplicated : public DatabaseAtomic
std::atomic_bool is_readonly = true;
std::atomic_bool is_probably_dropped = false;
std::atomic_bool is_recovering = false;
+ std::atomic_bool ddl_worker_initialized = false;
std::unique_ptr ddl_worker;
UInt32 max_log_ptr_at_creation = 0;
diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp
index bb98e2bd3bbd..4ba793d858dc 100644
--- a/src/Databases/DatabasesCommon.cpp
+++ b/src/Databases/DatabasesCommon.cpp
@@ -292,7 +292,7 @@ void DatabaseWithOwnTablesBase::shutdown()
for (const auto & kv : tables_snapshot)
{
- kv.second->flush();
+ kv.second->flushAndPrepareForShutdown();
}
for (const auto & kv : tables_snapshot)
diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp
index 0b7352e9cbb6..b12ffc555d4d 100644
--- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp
+++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp
@@ -322,7 +322,7 @@ void buildSingleAttribute(
/** Transforms
- * PRIMARY KEY Attr1 ,..., AttrN
+ * PRIMARY KEY Attr1, ..., AttrN
* to the next configuration
* Attr1
* or
diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h
index cd40880ba540..291a287919d7 100644
--- a/src/Functions/FunctionsComparison.h
+++ b/src/Functions/FunctionsComparison.h
@@ -1183,15 +1183,9 @@ class FunctionComparison : public IFunction
|| (left_tuple && right_tuple && left_tuple->getElements().size() == right_tuple->getElements().size())
|| (arguments[0]->equals(*arguments[1]))))
{
- try
- {
- getLeastSupertype(arguments);
- }
- catch (const Exception &)
- {
+ if (!tryGetLeastSupertype(arguments))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal types of arguments ({}, {})"
" of function {}", arguments[0]->getName(), arguments[1]->getName(), getName());
- }
}
if (left_tuple && right_tuple)
diff --git a/src/Functions/FunctionsStringHash.cpp b/src/Functions/FunctionsStringHash.cpp
index d6873d9490e0..ff8ff2d26517 100644
--- a/src/Functions/FunctionsStringHash.cpp
+++ b/src/Functions/FunctionsStringHash.cpp
@@ -292,8 +292,8 @@ struct SimHashImpl
continue;
// we need to store the new word hash value to the oldest location.
- // for example, N = 5, array |a0|a1|a2|a3|a4|, now , a0 is the oldest location,
- // so we need to store new word hash into location of a0, then ,this array become
+ // for example, N = 5, array |a0|a1|a2|a3|a4|, now, a0 is the oldest location,
+ // so we need to store new word hash into location of a0, then this array become
// |a5|a1|a2|a3|a4|, next time, a1 become the oldest location, we need to store new
// word hash value into location of a1, then array become |a5|a6|a2|a3|a4|
words[offset] = BytesRef{word_start, length};
@@ -793,4 +793,3 @@ REGISTER_FUNCTION(StringHash)
factory.registerFunction();
}
}
-
diff --git a/src/Functions/GatherUtils/sliceHasImplAnyAll.h b/src/Functions/GatherUtils/sliceHasImplAnyAll.h
index 21c80b742fdb..99bf1a7cc334 100644
--- a/src/Functions/GatherUtils/sliceHasImplAnyAll.h
+++ b/src/Functions/GatherUtils/sliceHasImplAnyAll.h
@@ -375,14 +375,14 @@ bool sliceHasImplAnyAllImplInt16(
_mm256_or_si256(
_mm256_andnot_si256(
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8)),
- _mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data ,first_data, 1), _mm256_set_epi8(7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8)))),
+ _mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8)))),
_mm256_andnot_si256(
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6)),
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6))))),
_mm256_or_si256(
_mm256_andnot_si256(
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4)),
- _mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data ,first_data ,1), _mm256_set_epi8(3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4)))),
+ _mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4)))),
_mm256_andnot_si256(
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2)),
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2))))))
diff --git a/src/Functions/GregorianDate.cpp b/src/Functions/GregorianDate.cpp
new file mode 100644
index 000000000000..f28194781c22
--- /dev/null
+++ b/src/Functions/GregorianDate.cpp
@@ -0,0 +1,376 @@
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
+ extern const int CANNOT_PARSE_DATE;
+ extern const int CANNOT_FORMAT_DATETIME;
+ extern const int LOGICAL_ERROR;
+}
+
+namespace
+{
+ inline constexpr bool is_leap_year(int32_t year)
+ {
+ return (year % 4 == 0) && ((year % 400 == 0) || (year % 100 != 0));
+ }
+
+ inline constexpr uint8_t monthLength(bool is_leap_year, uint8_t month)
+ {
+ switch (month)
+ {
+ case 1: return 31;
+ case 2: return is_leap_year ? 29 : 28;
+ case 3: return 31;
+ case 4: return 30;
+ case 5: return 31;
+ case 6: return 30;
+ case 7: return 31;
+ case 8: return 31;
+ case 9: return 30;
+ case 10: return 31;
+ case 11: return 30;
+ case 12: return 31;
+ default:
+ std::terminate();
+ }
+ }
+
+ /** Integer division truncated toward negative infinity.
+ */
+ template
+ inline constexpr I div(I x, J y)
+ {
+ const auto y_cast = static_cast(y);
+ if (x > 0 && y_cast < 0)
+ return ((x - 1) / y_cast) - 1;
+ else if (x < 0 && y_cast > 0)
+ return ((x + 1) / y_cast) - 1;
+ else
+ return x / y_cast;
+ }
+
+ /** Integer modulus, satisfying div(x, y)*y + mod(x, y) == x.
+ */
+ template
+ inline constexpr I mod(I x, J y)
+ {
+ const auto y_cast = static_cast(y);
+ const auto r = x % y_cast;
+ if ((x > 0 && y_cast < 0) || (x < 0 && y_cast > 0))
+ return r == 0 ? static_cast(0) : r + y_cast;
+ else
+ return r;
+ }
+
+ /** Like std::min(), but the type of operands may differ.
+ */
+ template
+ inline constexpr I min(I x, J y)
+ {
+ const auto y_cast = static_cast(y);
+ return x < y_cast ? x : y_cast;
+ }
+
+ inline char readDigit(ReadBuffer & in)
+ {
+ char c;
+ if (!in.read(c))
+ throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot parse input: expected a digit at the end of stream");
+ else if (c < '0' || c > '9')
+ throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot read input: expected a digit but got something else");
+ else
+ return c - '0';
+ }
+
+ inline bool tryReadDigit(ReadBuffer & in, char & c)
+ {
+ if (in.read(c) && c >= '0' && c <= '9')
+ {
+ c -= '0';
+ return true;
+ }
+
+ return false;
+ }
+}
+
+void GregorianDate::init(ReadBuffer & in)
+{
+ year_ = readDigit(in) * 1000
+ + readDigit(in) * 100
+ + readDigit(in) * 10
+ + readDigit(in);
+
+ assertChar('-', in);
+
+ month_ = readDigit(in) * 10
+ + readDigit(in);
+
+ assertChar('-', in);
+
+ day_of_month_ = readDigit(in) * 10
+ + readDigit(in);
+
+ assertEOF(in);
+
+ if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > monthLength(is_leap_year(year_), month_))
+ throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date, out of range (year: {}, month: {}, day_of_month: {}).");
+}
+
+bool GregorianDate::tryInit(ReadBuffer & in)
+{
+ char c[8];
+
+ if ( !tryReadDigit(in, c[0])
+ || !tryReadDigit(in, c[1])
+ || !tryReadDigit(in, c[2])
+ || !tryReadDigit(in, c[3])
+ || !checkChar('-', in)
+ || !tryReadDigit(in, c[4])
+ || !tryReadDigit(in, c[5])
+ || !checkChar('-', in)
+ || !tryReadDigit(in, c[6])
+ || !tryReadDigit(in, c[7])
+ || !in.eof())
+ {
+ return false;
+ }
+
+ year_ = c[0] * 1000 + c[1] * 100 + c[2] * 10 + c[3];
+ month_ = c[4] * 10 + c[5];
+ day_of_month_ = c[6] * 10 + c[7];
+
+ if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > monthLength(is_leap_year(year_), month_))
+ return false;
+
+ return true;
+}
+
+GregorianDate::GregorianDate(ReadBuffer & in)
+{
+ init(in);
+}
+
+void GregorianDate::init(int64_t modified_julian_day)
+{
+ const OrdinalDate ord(modified_julian_day);
+ const MonthDay md(is_leap_year(ord.year()), ord.dayOfYear());
+
+ year_ = ord.year();
+ month_ = md.month();
+ day_of_month_ = md.dayOfMonth();
+}
+
+bool GregorianDate::tryInit(int64_t modified_julian_day)
+{
+ OrdinalDate ord;
+ if (!ord.tryInit(modified_julian_day))
+ return false;
+
+ MonthDay md(is_leap_year(ord.year()), ord.dayOfYear());
+
+ year_ = ord.year();
+ month_ = md.month();
+ day_of_month_ = md.dayOfMonth();
+
+ return true;
+}
+
+GregorianDate::GregorianDate(int64_t modified_julian_day)
+{
+ init(modified_julian_day);
+}
+
+int64_t GregorianDate::toModifiedJulianDay() const
+{
+ const MonthDay md(month_, day_of_month_);
+
+ const auto day_of_year = md.dayOfYear(is_leap_year(year_));
+
+ const OrdinalDate ord(year_, day_of_year);
+ return ord.toModifiedJulianDay();
+}
+
+bool GregorianDate::tryToModifiedJulianDay(int64_t & res) const
+{
+ const MonthDay md(month_, day_of_month_);
+ const auto day_of_year = md.dayOfYear(is_leap_year(year_));
+ OrdinalDate ord;
+
+ if (!ord.tryInit(year_, day_of_year))
+ return false;
+
+ res = ord.toModifiedJulianDay();
+ return true;
+}
+
+template
+ReturnType GregorianDate::writeImpl(WriteBuffer & buf) const
+{
+ if (year_ < 0 || year_ > 9999)
+ {
+ if constexpr (std::is_same_v)
+ throw Exception(ErrorCodes::CANNOT_FORMAT_DATETIME,
+ "Impossible to stringify: year too big or small: {}", year_);
+ else
+ return false;
+ }
+ else
+ {
+ auto y = year_;
+ writeChar('0' + y / 1000, buf); y %= 1000;
+ writeChar('0' + y / 100, buf); y %= 100;
+ writeChar('0' + y / 10, buf); y %= 10;
+ writeChar('0' + y , buf);
+
+ writeChar('-', buf);
+
+ auto m = month_;
+ writeChar('0' + m / 10, buf); m %= 10;
+ writeChar('0' + m , buf);
+
+ writeChar('-', buf);
+
+ auto d = day_of_month_;
+ writeChar('0' + d / 10, buf); d %= 10;
+ writeChar('0' + d , buf);
+ }
+
+ return ReturnType(true);
+}
+
+std::string GregorianDate::toString() const
+{
+ WriteBufferFromOwnString buf;
+ write(buf);
+ return buf.str();
+}
+
+void OrdinalDate::init(int32_t year, uint16_t day_of_year)
+{
+ year_ = year;
+ day_of_year_ = day_of_year;
+
+ if (day_of_year < 1 || day_of_year > (is_leap_year(year) ? 366 : 365))
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid ordinal date: {}-{}", year, day_of_year);
+}
+
+bool OrdinalDate::tryInit(int32_t year, uint16_t day_of_year)
+{
+ year_ = year;
+ day_of_year_ = day_of_year;
+
+ return !(day_of_year < 1 || day_of_year > (is_leap_year(year) ? 366 : 365));
+}
+
+void OrdinalDate::init(int64_t modified_julian_day)
+{
+ if (!tryInit(modified_julian_day))
+ throw Exception(
+ ErrorCodes::CANNOT_FORMAT_DATETIME,
+ "Value cannot be represented as date because it's out of range");
+}
+
+bool OrdinalDate::tryInit(int64_t modified_julian_day)
+{
+ /// This function supports day number from -678941 to 2973119 (which represent 0000-01-01 and 9999-12-31 respectively).
+
+ if (modified_julian_day < -678941)
+ return false;
+
+ if (modified_julian_day > 2973119)
+ return false;
+
+ const auto a = modified_julian_day + 678575;
+ const auto quad_cent = div(a, 146097);
+ const auto b = mod(a, 146097);
+ const auto cent = min(div(b, 36524), 3);
+ const auto c = b - cent * 36524;
+ const auto quad = div(c, 1461);
+ const auto d = mod(c, 1461);
+ const auto y = min(div(d, 365), 3);
+
+ day_of_year_ = d - y * 365 + 1;
+ year_ = static_cast(quad_cent * 400 + cent * 100 + quad * 4 + y + 1);
+
+ return true;
+}
+
+
+OrdinalDate::OrdinalDate(int32_t year, uint16_t day_of_year)
+{
+ init(year, day_of_year);
+}
+
+OrdinalDate::OrdinalDate(int64_t modified_julian_day)
+{
+ init(modified_julian_day);
+}
+
+int64_t OrdinalDate::toModifiedJulianDay() const noexcept
+{
+ const auto y = year_ - 1;
+
+ return day_of_year_
+ + 365 * y
+ + div(y, 4)
+ - div(y, 100)
+ + div(y, 400)
+ - 678576;
+}
+
+MonthDay::MonthDay(uint8_t month, uint8_t day_of_month)
+ : month_(month)
+ , day_of_month_(day_of_month)
+{
+ if (month < 1 || month > 12)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid month: {}", month);
+ /* We can't validate day_of_month here, because we don't know if
+ * it's a leap year. */
+}
+
+MonthDay::MonthDay(bool is_leap_year, uint16_t day_of_year)
+{
+ if (day_of_year < 1 || day_of_year > (is_leap_year ? 366 : 365))
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of year: {}{}",
+ (is_leap_year ? "leap, " : "non-leap, "), day_of_year);
+
+ month_ = 1;
+ uint16_t d = day_of_year;
+ while (true)
+ {
+ const auto len = monthLength(is_leap_year, month_);
+ if (d <= len)
+ break;
+ ++month_;
+ d -= len;
+ }
+ day_of_month_ = d;
+}
+
+uint16_t MonthDay::dayOfYear(bool is_leap_year) const
+{
+ if (day_of_month_ < 1 || day_of_month_ > monthLength(is_leap_year, month_))
+ {
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of month: {}{}-{}",
+ (is_leap_year ? "leap, " : "non-leap, "), month_, day_of_month_);
+ }
+ const auto k = month_ <= 2 ? 0 : is_leap_year ? -1 :-2;
+ return (367 * month_ - 362) / 12 + k + day_of_month_;
+}
+
+template void GregorianDate::writeImpl(WriteBuffer & buf) const;
+template bool GregorianDate::writeImpl(WriteBuffer & buf) const;
+
+}
diff --git a/src/Functions/GregorianDate.h b/src/Functions/GregorianDate.h
index 63bc443fa31a..2528223443e6 100644
--- a/src/Functions/GregorianDate.h
+++ b/src/Functions/GregorianDate.h
@@ -1,408 +1,155 @@
#pragma once
-#include
-#include
#include
-#include
-#include
-#include
-#include
-
-#include
namespace DB
{
- namespace ErrorCodes
- {
- extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
- extern const int CANNOT_PARSE_DATE;
- extern const int CANNOT_FORMAT_DATETIME;
- extern const int LOGICAL_ERROR;
- }
-
- /** Proleptic Gregorian calendar date. YearT is an integral type
- * which should be at least 32 bits wide, and should preferably
- * be signed.
- */
- template
- class GregorianDate
- {
- public:
- /** Construct from date in text form 'YYYY-MM-DD' by reading from
- * ReadBuffer.
- */
- explicit GregorianDate(ReadBuffer & in);
-
- /** Construct from Modified Julian Day. The type T is an
- * integral type which should be at least 32 bits wide, and
- * should preferably signed.
- */
- explicit GregorianDate(is_integer auto modified_julian_day);
-
- /** Convert to Modified Julian Day. The type T is an integral type
- * which should be at least 32 bits wide, and should preferably
- * signed.
- */
- template
- T toModifiedJulianDay() const;
-
- /** Write the date in text form 'YYYY-MM-DD' to a buffer.
- */
- void write(WriteBuffer & buf) const;
-
- /** Convert to a string in text form 'YYYY-MM-DD'.
- */
- std::string toString() const;
-
- YearT year() const noexcept
- {
- return year_;
- }
-
- uint8_t month() const noexcept
- {
- return month_;
- }
-
- uint8_t day_of_month() const noexcept /// NOLINT
- {
- return day_of_month_;
- }
-
- private:
- YearT year_; /// NOLINT
- uint8_t month_; /// NOLINT
- uint8_t day_of_month_; /// NOLINT
- };
-
- /** ISO 8601 Ordinal Date. YearT is an integral type which should
- * be at least 32 bits wide, and should preferably signed.
- */
- template
- class OrdinalDate
- {
- public:
- OrdinalDate(YearT year, uint16_t day_of_year);
-
- /** Construct from Modified Julian Day. The type T is an
- * integral type which should be at least 32 bits wide, and
- * should preferably signed.
- */
- template
- explicit OrdinalDate(DayT modified_julian_day);
-
- /** Convert to Modified Julian Day. The type T is an integral
- * type which should be at least 32 bits wide, and should
- * preferably be signed.
- */
- template
- T toModifiedJulianDay() const noexcept;
-
- YearT year() const noexcept
- {
- return year_;
- }
-
- uint16_t dayOfYear() const noexcept
- {
- return day_of_year_;
- }
-
- private:
- YearT year_; /// NOLINT
- uint16_t day_of_year_; /// NOLINT
- };
-
- class MonthDay
- {
- public:
- /** Construct from month and day. */
- MonthDay(uint8_t month, uint8_t day_of_month);
-
- /** Construct from day of year in Gregorian or Julian
- * calendars to month and day.
- */
- MonthDay(bool is_leap_year, uint16_t day_of_year);
-
- /** Convert month and day in Gregorian or Julian calendars to
- * day of year.
- */
- uint16_t dayOfYear(bool is_leap_year) const;
-
- uint8_t month() const noexcept
- {
- return month_;
- }
-
- uint8_t day_of_month() const noexcept /// NOLINT
- {
- return day_of_month_;
- }
-
- private:
- uint8_t month_; /// NOLINT
- uint8_t day_of_month_; /// NOLINT
- };
-}
-/* Implementation */
+class ReadBuffer;
+class WriteBuffer;
-namespace gd
+/// Proleptic Gregorian calendar date.
+class GregorianDate
{
- using namespace DB;
+public:
+ GregorianDate() {}
- template
- static inline constexpr bool is_leap_year(YearT year)
- {
- return (year % 4 == 0) && ((year % 400 == 0) || (year % 100 != 0));
- }
+ void init(ReadBuffer & in);
+ bool tryInit(ReadBuffer & in);
- static inline constexpr uint8_t monthLength(bool is_leap_year, uint8_t month)
- {
- switch (month)
- {
- case 1: return 31;
- case 2: return is_leap_year ? 29 : 28;
- case 3: return 31;
- case 4: return 30;
- case 5: return 31;
- case 6: return 30;
- case 7: return 31;
- case 8: return 31;
- case 9: return 30;
- case 10: return 31;
- case 11: return 30;
- case 12: return 31;
- default:
- std::terminate();
- }
- }
+ /** Construct from date in text form 'YYYY-MM-DD' by reading from
+ * ReadBuffer.
+ */
+ explicit GregorianDate(ReadBuffer & in);
+
+ void init(int64_t modified_julian_day);
+ bool tryInit(int64_t modified_julian_day);
+
+ /** Construct from Modified Julian Day. The type T is an
+ * integral type which should be at least 32 bits wide, and
+ * should preferably signed.
+ */
+ explicit GregorianDate(int64_t modified_julian_day);
+
+ /** Convert to Modified Julian Day. The type T is an integral type
+ * which should be at least 32 bits wide, and should preferably
+ * signed.
+ */
+ int64_t toModifiedJulianDay() const;
+ bool tryToModifiedJulianDay(int64_t & res) const;
- /** Integer division truncated toward negative infinity.
+ /** Write the date in text form 'YYYY-MM-DD' to a buffer.
*/
- template
- static inline constexpr I div(I x, J y)
+ void write(WriteBuffer & buf) const
{
- const auto y_cast = static_cast(y);
- if (x > 0 && y_cast < 0)
- return ((x - 1) / y_cast) - 1;
- else if (x < 0 && y_cast > 0)
- return ((x + 1) / y_cast) - 1;
- else
- return x / y_cast;
+ writeImpl(buf);
}
- /** Integer modulus, satisfying div(x, y)*y + mod(x, y) == x.
- */
- template
- static inline constexpr I mod(I x, J y)
+ bool tryWrite(WriteBuffer & buf) const
{
- const auto y_cast = static_cast(y);
- const auto r = x % y_cast;
- if ((x > 0 && y_cast < 0) || (x < 0 && y_cast > 0))
- return r == 0 ? static_cast(0) : r + y_cast;
- else
- return r;
+ return writeImpl(buf);
}
- /** Like std::min(), but the type of operands may differ.
+ /** Convert to a string in text form 'YYYY-MM-DD'.
*/
- template
- static inline constexpr I min(I x, J y)
+ std::string toString() const;
+
+ int32_t year() const noexcept
{
- const auto y_cast = static_cast(y);
- return x < y_cast ? x : y_cast;
+ return year_;
}
- static inline char readDigit(ReadBuffer & in)
+ uint8_t month() const noexcept
{
- char c;
- if (!in.read(c))
- throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot parse input: expected a digit at the end of stream");
- else if (c < '0' || c > '9')
- throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot read input: expected a digit but got something else");
- else
- return c - '0';
+ return month_;
}
-}
-namespace DB
-{
- template
- GregorianDate::GregorianDate(ReadBuffer & in)
+ uint8_t dayOfMonth() const noexcept
{
- year_ = gd::readDigit(in) * 1000
- + gd::readDigit(in) * 100
- + gd::readDigit(in) * 10
- + gd::readDigit(in);
+ return day_of_month_;
+ }
- assertChar('-', in);
+private:
+ int32_t year_ = 0;
+ uint8_t month_ = 0;
+ uint8_t day_of_month_ = 0;
- month_ = gd::readDigit(in) * 10
- + gd::readDigit(in);
+ template
+ ReturnType writeImpl(WriteBuffer & buf) const;
+};
- assertChar('-', in);
+/** ISO 8601 Ordinal Date.
+ */
+class OrdinalDate
+{
+public:
+ OrdinalDate() {}
- day_of_month_ = gd::readDigit(in) * 10
- + gd::readDigit(in);
+ void init(int32_t year, uint16_t day_of_year);
+ bool tryInit(int32_t year, uint16_t day_of_year);
- assertEOF(in);
+ void init(int64_t modified_julian_day);
+ bool tryInit(int64_t modified_julian_day);
- if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > gd::monthLength(gd::is_leap_year(year_), month_))
- throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date: {}", toString());
- }
+ OrdinalDate(int32_t year, uint16_t day_of_year);
- template
- GregorianDate::GregorianDate(is_integer auto modified_julian_day)
- {
- const OrdinalDate ord(modified_julian_day);
- const MonthDay md(gd::is_leap_year(ord.year()), ord.dayOfYear());
- year_ = ord.year();
- month_ = md.month();
- day_of_month_ = md.day_of_month();
- }
+ /** Construct from Modified Julian Day. The type T is an
+ * integral type which should be at least 32 bits wide, and
+ * should preferably signed.
+ */
+ explicit OrdinalDate(int64_t modified_julian_day);
- template
- template
- T GregorianDate::toModifiedJulianDay() const
- {
- const MonthDay md(month_, day_of_month_);
- const auto day_of_year = md.dayOfYear(gd::is_leap_year(year_));
- const OrdinalDate ord(year_, day_of_year);
- return ord.template toModifiedJulianDay();
- }
+ /** Convert to Modified Julian Day. The type T is an integral
+ * type which should be at least 32 bits wide, and should
+ * preferably be signed.
+ */
+ int64_t toModifiedJulianDay() const noexcept;
- template
- void GregorianDate::write(WriteBuffer & buf) const
+ int32_t year() const noexcept
{
- if (year_ < 0 || year_ > 9999)
- {
- throw Exception(ErrorCodes::CANNOT_FORMAT_DATETIME,
- "Impossible to stringify: year too big or small: {}", DB::toString(year_));
- }
- else
- {
- auto y = year_;
- writeChar('0' + y / 1000, buf); y %= 1000;
- writeChar('0' + y / 100, buf); y %= 100;
- writeChar('0' + y / 10, buf); y %= 10;
- writeChar('0' + y , buf);
-
- writeChar('-', buf);
-
- auto m = month_;
- writeChar('0' + m / 10, buf); m %= 10;
- writeChar('0' + m , buf);
-
- writeChar('-', buf);
-
- auto d = day_of_month_;
- writeChar('0' + d / 10, buf); d %= 10;
- writeChar('0' + d , buf);
- }
+ return year_;
}
- template
- std::string GregorianDate::toString() const
+ uint16_t dayOfYear() const noexcept
{
- WriteBufferFromOwnString buf;
- write(buf);
- return buf.str();
+ return day_of_year_;
}
- template
- OrdinalDate::OrdinalDate(YearT year, uint16_t day_of_year)
- : year_(year)
- , day_of_year_(day_of_year)
- {
- if (day_of_year < 1 || day_of_year > (gd::is_leap_year(year) ? 366 : 365))
- {
- throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid ordinal date: {}-{}", toString(year), toString(day_of_year));
- }
- }
+private:
+ int32_t year_ = 0;
+ uint16_t day_of_year_ = 0;
+};
- template
- template
- OrdinalDate::OrdinalDate(DayT modified_julian_day)
- {
- /// This function supports day number from -678941 to 2973119 (which represent 0000-01-01 and 9999-12-31 respectively).
-
- if constexpr (is_signed_v && std::numeric_limits::lowest() < -678941)
- if (modified_julian_day < -678941)
- throw Exception(
- ErrorCodes::CANNOT_FORMAT_DATETIME,
- "Value cannot be represented as date because it's out of range");
-
- if constexpr (std::numeric_limits::max() > 2973119)
- if (modified_julian_day > 2973119)
- throw Exception(
- ErrorCodes::CANNOT_FORMAT_DATETIME,
- "Value cannot be represented as date because it's out of range");
-
- const auto a = modified_julian_day + 678575;
- const auto quad_cent = gd::div(a, 146097);
- const auto b = gd::mod(a, 146097);
- const auto cent = gd::min(gd::div(b, 36524), 3);
- const auto c = b - cent * 36524;
- const auto quad = gd::div(c, 1461);
- const auto d = gd::mod(c, 1461);
- const auto y = gd::min(gd::div(d, 365), 3);
-
- day_of_year_ = d - y * 365 + 1;
- year_ = static_cast(quad_cent * 400 + cent * 100 + quad * 4 + y + 1);
- }
+class MonthDay
+{
+public:
+ /** Construct from month and day. */
+ MonthDay(uint8_t month, uint8_t day_of_month);
- template
- template
- T OrdinalDate::toModifiedJulianDay() const noexcept
- {
- const auto y = year_ - 1;
- return day_of_year_
- + 365 * y
- + gd::div(y, 4)
- - gd::div(y, 100)
- + gd::div(y, 400)
- - 678576;
- }
+ /** Construct from day of year in Gregorian or Julian
+ * calendars to month and day.
+ */
+ MonthDay(bool is_leap_year, uint16_t day_of_year);
- inline MonthDay::MonthDay(uint8_t month, uint8_t day_of_month)
- : month_(month)
- , day_of_month_(day_of_month)
- {
- if (month < 1 || month > 12)
- throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid month: {}", DB::toString(month));
- /* We can't validate day_of_month here, because we don't know if
- * it's a leap year. */
- }
+ /** Convert month and day in Gregorian or Julian calendars to
+ * day of year.
+ */
+ uint16_t dayOfYear(bool is_leap_year) const;
- inline MonthDay::MonthDay(bool is_leap_year, uint16_t day_of_year)
+ uint8_t month() const noexcept
{
- if (day_of_year < 1 || day_of_year > (is_leap_year ? 366 : 365))
- throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of year: {}{}",
- (is_leap_year ? "leap, " : "non-leap, "), DB::toString(day_of_year));
-
- month_ = 1;
- uint16_t d = day_of_year;
- while (true)
- {
- const auto len = gd::monthLength(is_leap_year, month_);
- if (d <= len)
- break;
- month_++;
- d -= len;
- }
- day_of_month_ = d;
+ return month_;
}
- inline uint16_t MonthDay::dayOfYear(bool is_leap_year) const
+ uint8_t dayOfMonth() const noexcept
{
- if (day_of_month_ < 1 || day_of_month_ > gd::monthLength(is_leap_year, month_))
- {
- throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of month: {}{}-{}",
- (is_leap_year ? "leap, " : "non-leap, "), DB::toString(month_), DB::toString(day_of_month_));
- }
- const auto k = month_ <= 2 ? 0 : is_leap_year ? -1 :-2;
- return (367 * month_ - 362) / 12 + k + day_of_month_;
+ return day_of_month_;
}
+
+private:
+ uint8_t month_ = 0;
+ uint8_t day_of_month_ = 0;
+};
+
}
diff --git a/src/Functions/currentDatabase.cpp b/src/Functions/currentDatabase.cpp
index b1a3cbf58568..b7fd6c4fecc7 100644
--- a/src/Functions/currentDatabase.cpp
+++ b/src/Functions/currentDatabase.cpp
@@ -54,7 +54,8 @@ class FunctionCurrentDatabase : public IFunction
REGISTER_FUNCTION(CurrentDatabase)
{
factory.registerFunction();
- factory.registerAlias("DATABASE", "currentDatabase", FunctionFactory::CaseInsensitive);
+ factory.registerAlias("DATABASE", FunctionCurrentDatabase::name, FunctionFactory::CaseInsensitive);
+ factory.registerAlias("current_database", FunctionCurrentDatabase::name, FunctionFactory::CaseInsensitive);
}
}
diff --git a/src/Functions/currentSchemas.cpp b/src/Functions/currentSchemas.cpp
new file mode 100644
index 000000000000..322e719eb170
--- /dev/null
+++ b/src/Functions/currentSchemas.cpp
@@ -0,0 +1,88 @@
+#include
+#include
+#include
+#include
+#include
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+namespace
+{
+
+class FunctionCurrentSchemas : public IFunction
+{
+ const String db_name;
+
+public:
+ static constexpr auto name = "currentSchemas";
+ static FunctionPtr create(ContextPtr context)
+ {
+ return std::make_shared(context->getCurrentDatabase());
+ }
+
+ explicit FunctionCurrentSchemas(const String & db_name_) :
+ db_name{db_name_}
+ {
+ }
+
+ String getName() const override
+ {
+ return name;
+ }
+
+ size_t getNumberOfArguments() const override
+ {
+ return 1;
+ }
+
+ DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+ {
+ // For compatibility, function implements the same signature as Postgres'
+ const bool argument_is_valid = arguments.size() == 1 && isBool(arguments.front());
+ if (!argument_is_valid)
+ throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument for function {} must be bool", getName());
+
+ return std::make_shared(std::make_shared());
+ }
+
+ bool isDeterministic() const override { return false; }
+
+ bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
+
+ ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
+ {
+ return DataTypeArray(std::make_shared())
+ .createColumnConst(input_rows_count, Array { db_name });
+ }
+};
+
+}
+
+REGISTER_FUNCTION(CurrentSchema)
+{
+ factory.registerFunction(FunctionDocumentation
+ {
+ .description=R"(
+Returns a single-element array with the name of the current database
+
+Requires a boolean parameter, but it is ignored actually. It is required just for compatibility with the implementation of this function in other DB engines.
+
+[example:common]
+)",
+ .examples{
+ {"common", "SELECT current_schemas(true);", "['default']"}
+ }
+ },
+ FunctionFactory::CaseInsensitive);
+ factory.registerAlias("current_schemas", FunctionCurrentSchemas::name, FunctionFactory::CaseInsensitive);
+
+}
+
+}
diff --git a/src/Functions/fromModifiedJulianDay.cpp b/src/Functions/fromModifiedJulianDay.cpp
index 8e76bb27ff1e..695d1b7d63c1 100644
--- a/src/Functions/fromModifiedJulianDay.cpp
+++ b/src/Functions/fromModifiedJulianDay.cpp
@@ -13,12 +13,12 @@
#include
#include
+
namespace DB
{
namespace ErrorCodes
{
- extern const int CANNOT_FORMAT_DATETIME;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
@@ -56,25 +56,14 @@ namespace DB
{
if constexpr (nullOnErrors)
{
- try
- {
- const GregorianDate<> gd(vec_from[i]);
- gd.write(write_buffer);
- (*vec_null_map_to)[i] = false;
- }
- catch (const Exception & e)
- {
- if (e.code() == ErrorCodes::CANNOT_FORMAT_DATETIME)
- (*vec_null_map_to)[i] = true;
- else
- throw;
- }
+ GregorianDate gd;
+ (*vec_null_map_to)[i] = !(gd.tryInit(vec_from[i]) && gd.tryWrite(write_buffer));
writeChar(0, write_buffer);
offsets_to[i] = write_buffer.count();
}
else
{
- const GregorianDate<> gd(vec_from[i]);
+ GregorianDate gd(vec_from[i]);
gd.write(write_buffer);
writeChar(0, write_buffer);
offsets_to[i] = write_buffer.count();
diff --git a/src/Functions/getTypeSerializationStreams.cpp b/src/Functions/getTypeSerializationStreams.cpp
index 2b13f0f140d8..da9fce70ee9a 100644
--- a/src/Functions/getTypeSerializationStreams.cpp
+++ b/src/Functions/getTypeSerializationStreams.cpp
@@ -65,15 +65,7 @@ class FunctionGetTypeSerializationStreams : public IFunction
if (!arg_string)
return argument.type;
- try
- {
- DataTypePtr type = DataTypeFactory::instance().get(arg_string->getDataAt(0).toString());
- return type;
- }
- catch (const DB::Exception &)
- {
- return argument.type;
- }
+ return DataTypeFactory::instance().get(arg_string->getDataAt(0).toString());
}
};
diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp
index c3fbc08c4a95..2381def91515 100644
--- a/src/Functions/parseDateTime.cpp
+++ b/src/Functions/parseDateTime.cpp
@@ -398,7 +398,7 @@ namespace
static Int32 daysSinceEpochFromDayOfYear(Int32 year_, Int32 day_of_year_)
{
if (!isDayOfYearValid(year_, day_of_year_))
- throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid day of year, year:{} day of year:{}", year_, day_of_year_);
+ throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid day of year, out of range (year: {} day of year: {})", year_, day_of_year_);
Int32 res = daysSinceEpochFromDate(year_, 1, 1);
res += day_of_year_ - 1;
@@ -408,7 +408,7 @@ namespace
static Int32 daysSinceEpochFromDate(Int32 year_, Int32 month_, Int32 day_)
{
if (!isDateValid(year_, month_, day_))
- throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid date, year:{} month:{} day:{}", year_, month_, day_);
+ throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid date, out of range (year: {} month: {} day_of_month: {})", year_, month_, day_);
Int32 res = cumulativeYearDays[year_ - 1970];
res += isLeapYear(year_) ? cumulativeLeapDays[month_ - 1] : cumulativeDays[month_ - 1];
diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h
index d19608603081..fba8293e5ffe 100644
--- a/src/Functions/sleep.h
+++ b/src/Functions/sleep.h
@@ -9,7 +9,8 @@
#include
#include
#include
-#include
+#include
+
namespace ProfileEvents
{
@@ -40,11 +41,17 @@ enum class FunctionSleepVariant
template
class FunctionSleep : public IFunction
{
+private:
+ UInt64 max_microseconds;
public:
static constexpr auto name = variant == FunctionSleepVariant::PerBlock ? "sleep" : "sleepEachRow";
- static FunctionPtr create(ContextPtr)
+ static FunctionPtr create(ContextPtr context)
+ {
+ return std::make_shared>(context->getSettingsRef().function_sleep_max_microseconds_per_block);
+ }
+
+ FunctionSleep(UInt64 max_microseconds_) : max_microseconds(max_microseconds_)
{
- return std::make_shared>();
}
/// Get the name of the function.
@@ -105,13 +112,19 @@ class FunctionSleep : public IFunction
if (size > 0)
{
/// When sleeping, the query cannot be cancelled. For ability to cancel query, we limit sleep time.
- if (seconds > 3.0) /// The choice is arbitrary
- throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is 3 seconds. Requested: {}", toString(seconds));
+ if (max_microseconds && seconds * 1e6 > max_microseconds)
+ throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is {} microseconds. Requested: {}", max_microseconds, seconds);
if (!dry_run)
{
UInt64 count = (variant == FunctionSleepVariant::PerBlock ? 1 : size);
UInt64 microseconds = static_cast(seconds * count * 1e6);
+
+ if (max_microseconds && microseconds > max_microseconds)
+ throw Exception(ErrorCodes::TOO_SLOW,
+ "The maximum sleep time is {} microseconds. Requested: {} microseconds per block (of size {})",
+ max_microseconds, microseconds, size);
+
sleepForMicroseconds(microseconds);
ProfileEvents::increment(ProfileEvents::SleepFunctionCalls, count);
ProfileEvents::increment(ProfileEvents::SleepFunctionMicroseconds, microseconds);
diff --git a/src/Functions/substringIndex.cpp b/src/Functions/substringIndex.cpp
new file mode 100644
index 000000000000..5f3f054b6240
--- /dev/null
+++ b/src/Functions/substringIndex.cpp
@@ -0,0 +1,302 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int ILLEGAL_COLUMN;
+ extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+ extern const int BAD_ARGUMENTS;
+}
+
+namespace
+{
+
+ template
+ class FunctionSubstringIndex : public IFunction
+ {
+ public:
+ static constexpr auto name = is_utf8 ? "substringIndexUTF8" : "substringIndex";
+
+
+ static FunctionPtr create(ContextPtr) { return std::make_shared(); }
+
+ String getName() const override { return name; }
+
+ size_t getNumberOfArguments() const override { return 3; }
+
+ bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+
+ bool useDefaultImplementationForConstants() const override { return true; }
+ ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
+
+ DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+ {
+ if (!isString(arguments[0]))
+ throw Exception(
+ ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+ "Illegal type {} of first argument of function {}, String expected",
+ arguments[0]->getName(),
+ getName());
+
+ if (!isString(arguments[1]))
+ throw Exception(
+ ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+ "Illegal type {} of second argument of function {}, String expected",
+ arguments[1]->getName(),
+ getName());
+
+ if (!isNativeInteger(arguments[2]))
+ throw Exception(
+ ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+ "Illegal type {} of third argument of function {}, Integer expected",
+ arguments[2]->getName(),
+ getName());
+
+ return std::make_shared();
+ }
+
+ ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
+ {
+ ColumnPtr column_string = arguments[0].column;
+ ColumnPtr column_delim = arguments[1].column;
+ ColumnPtr column_count = arguments[2].column;
+
+ const ColumnConst * column_delim_const = checkAndGetColumnConst(column_delim.get());
+ if (!column_delim_const)
+ throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument to {} must be a constant String", getName());
+
+ String delim = column_delim_const->getValue();
+ if constexpr (!is_utf8)
+ {
+ if (delim.size() != 1)
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single character", getName());
+ }
+ else
+ {
+ if (UTF8::countCodePoints(reinterpret_cast(delim.data()), delim.size()) != 1)
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single UTF-8 character", getName());
+ }
+
+ auto column_res = ColumnString::create();
+ ColumnString::Chars & vec_res = column_res->getChars();
+ ColumnString::Offsets & offsets_res = column_res->getOffsets();
+
+ const ColumnConst * column_string_const = checkAndGetColumnConst(column_string.get());
+ if (column_string_const)
+ {
+ String str = column_string_const->getValue();
+ constantVector(str, delim, column_count.get(), vec_res, offsets_res);
+ }
+ else
+ {
+ const auto * col_str = checkAndGetColumn(column_string.get());
+ if (!col_str)
+ throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument to {} must be a String", getName());
+
+ bool is_count_const = isColumnConst(*column_count);
+ if (is_count_const)
+ {
+ Int64 count = column_count->getInt(0);
+ vectorConstant(col_str, delim, count, vec_res, offsets_res);
+ }
+ else
+ vectorVector(col_str, delim, column_count.get(), vec_res, offsets_res);
+ }
+ return column_res;
+ }
+
+ protected:
+ static void vectorVector(
+ const ColumnString * str_column,
+ const String & delim,
+ const IColumn * count_column,
+ ColumnString::Chars & res_data,
+ ColumnString::Offsets & res_offsets)
+ {
+ size_t rows = str_column->size();
+ res_data.reserve(str_column->getChars().size() / 2);
+ res_offsets.reserve(rows);
+
+ std::unique_ptr searcher
+ = !is_utf8 ? nullptr : std::make_unique(delim.data(), delim.size());
+
+ for (size_t i = 0; i < rows; ++i)
+ {
+ StringRef str_ref = str_column->getDataAt(i);
+ Int64 count = count_column->getInt(i);
+
+ StringRef res_ref;
+ if constexpr (!is_utf8)
+ res_ref = substringIndex(str_ref, delim[0], count);
+ else
+ res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, count);
+
+ appendToResultColumn(res_ref, res_data, res_offsets);
+ }
+ }
+
+ static void vectorConstant(
+ const ColumnString * str_column,
+ const String & delim,
+ Int64 count,
+ ColumnString::Chars & res_data,
+ ColumnString::Offsets & res_offsets)
+ {
+ size_t rows = str_column->size();
+ res_data.reserve(str_column->getChars().size() / 2);
+ res_offsets.reserve(rows);
+
+ std::unique_ptr searcher
+ = !is_utf8 ? nullptr : std::make_unique(delim.data(), delim.size());
+
+ for (size_t i = 0; i < rows; ++i)
+ {
+ StringRef str_ref = str_column->getDataAt(i);
+
+ StringRef res_ref;
+ if constexpr (!is_utf8)
+ res_ref = substringIndex(str_ref, delim[0], count);
+ else
+ res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, count);
+
+ appendToResultColumn(res_ref, res_data, res_offsets);
+ }
+ }
+
+ static void constantVector(
+ const String & str,
+ const String & delim,
+ const IColumn * count_column,
+ ColumnString::Chars & res_data,
+ ColumnString::Offsets & res_offsets)
+ {
+ size_t rows = count_column->size();
+ res_data.reserve(str.size() * rows / 2);
+ res_offsets.reserve(rows);
+
+ std::unique_ptr searcher
+ = !is_utf8 ? nullptr : std::make_unique(delim.data(), delim.size());
+
+ StringRef str_ref{str.data(), str.size()};
+ for (size_t i = 0; i < rows; ++i)
+ {
+ Int64 count = count_column->getInt(i);
+
+ StringRef res_ref;
+ if constexpr (!is_utf8)
+ res_ref = substringIndex(str_ref, delim[0], count);
+ else
+ res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, count);
+
+ appendToResultColumn(res_ref, res_data, res_offsets);
+ }
+ }
+
+ static void appendToResultColumn(const StringRef & res_ref, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets)
+ {
+ size_t res_offset = res_data.size();
+ res_data.resize(res_offset + res_ref.size + 1);
+ memcpy(&res_data[res_offset], res_ref.data, res_ref.size);
+ res_offset += res_ref.size;
+ res_data[res_offset] = 0;
+ ++res_offset;
+
+ res_offsets.emplace_back(res_offset);
+ }
+
+ static StringRef substringIndexUTF8(
+ const PositionCaseSensitiveUTF8::SearcherInBigHaystack * searcher, const StringRef & str_ref, const String & delim, Int64 count)
+ {
+ if (count == 0)
+ return {str_ref.data, 0};
+
+ const auto * begin = reinterpret_cast(str_ref.data);
+ const auto * end = reinterpret_cast(str_ref.data + str_ref.size);
+ const auto * pos = begin;
+ if (count > 0)
+ {
+ Int64 i = 0;
+ while (i < count)
+ {
+ pos = searcher->search(pos, end - pos);
+
+ if (pos != end)
+ {
+ pos += delim.size();
+ ++i;
+ }
+ else
+ return str_ref;
+ }
+ return {begin, static_cast(pos - begin - delim.size())};
+ }
+ else
+ {
+ Int64 total = 0;
+ while (pos < end && end != (pos = searcher->search(pos, end - pos)))
+ {
+ pos += delim.size();
+ ++total;
+ }
+
+ if (total + count < 0)
+ return str_ref;
+
+ pos = begin;
+ Int64 i = 0;
+ Int64 count_from_left = total + 1 + count;
+ while (i < count_from_left && pos < end && end != (pos = searcher->search(pos, end - pos)))
+ {
+ pos += delim.size();
+ ++i;
+ }
+ return {pos, static_cast(end - pos)};
+ }
+ }
+
+ static StringRef substringIndex(const StringRef & str_ref, char delim, Int64 count)
+ {
+ if (count == 0)
+ return {str_ref.data, 0};
+
+ const auto * pos = count > 0 ? str_ref.data : str_ref.data + str_ref.size - 1;
+ const auto * end = count > 0 ? str_ref.data + str_ref.size : str_ref.data - 1;
+ int d = count > 0 ? 1 : -1;
+
+ for (; count; pos += d)
+ {
+ if (pos == end)
+ return str_ref;
+ if (*pos == delim)
+ count -= d;
+ }
+ pos -= d;
+ return {
+ d > 0 ? str_ref.data : pos + 1, static_cast(d > 0 ? pos - str_ref.data : str_ref.data + str_ref.size - pos - 1)};
+ }
+ };
+}
+
+
+REGISTER_FUNCTION(SubstringIndex)
+{
+ factory.registerFunction>(); /// substringIndex
+ factory.registerFunction>(); /// substringIndexUTF8
+
+ factory.registerAlias("SUBSTRING_INDEX", "substringIndex", FunctionFactory::CaseInsensitive);
+}
+
+
+}
diff --git a/src/Functions/toModifiedJulianDay.cpp b/src/Functions/toModifiedJulianDay.cpp
index 0d854bcc1106..907c7570ce2d 100644
--- a/src/Functions/toModifiedJulianDay.cpp
+++ b/src/Functions/toModifiedJulianDay.cpp
@@ -17,8 +17,6 @@ namespace DB
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
- extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
- extern const int CANNOT_PARSE_DATE;
}
template
@@ -78,27 +76,18 @@ namespace DB
if constexpr (nullOnErrors)
{
- try
- {
- const GregorianDate<> date(read_buffer);
- vec_to[i] = date.toModifiedJulianDay();
- vec_null_map_to[i] = false;
- }
- catch (const Exception & e)
- {
- if (e.code() == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED || e.code() == ErrorCodes::CANNOT_PARSE_DATE)
- {
- vec_to[i] = static_cast(0);
- vec_null_map_to[i] = true;
- }
- else
- throw;
- }
+ GregorianDate date;
+
+ int64_t res = 0;
+ bool success = date.tryInit(read_buffer) && date.tryToModifiedJulianDay(res);
+
+ vec_to[i] = static_cast(res);
+ vec_null_map_to[i] = !success;
}
else
{
- const GregorianDate<> date(read_buffer);
- vec_to[i] = date.toModifiedJulianDay();
+ const GregorianDate date(read_buffer);
+ vec_to[i] = static_cast(date.toModifiedJulianDay());
}
}
diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp
index 1fc0e3adf96c..e03701327b13 100644
--- a/src/Functions/transform.cpp
+++ b/src/Functions/transform.cpp
@@ -156,7 +156,7 @@ namespace
{
initialize(arguments, result_type);
- const auto * in = arguments.front().column.get();
+ const auto * in = arguments[0].column.get();
if (isColumnConst(*in))
return executeConst(arguments, result_type, input_rows_count);
@@ -165,6 +165,10 @@ namespace
if (!cache.default_column && arguments.size() == 4)
default_non_const = castColumn(arguments[3], result_type);
+ ColumnPtr in_casted = arguments[0].column;
+ if (arguments.size() == 3)
+ in_casted = castColumn(arguments[0], result_type);
+
auto column_result = result_type->createColumn();
if (cache.is_empty)
{
@@ -174,30 +178,30 @@ namespace
}
else if (cache.table_num_to_idx)
{
- if (!executeNum>(in, *column_result, default_non_const)
- && !executeNum>(in, *column_result, default_non_const)
- && !executeNum>(in, *column_result, default_non_const)
- && !executeNum>(in, *column_result, default_non_const)
- && !executeNum>(in, *column_result, default_non_const)
- && !executeNum>(in, *column_result, default_non_const)
- && !executeNum>(in, *column_result, default_non_const)
- && !executeNum>(in, *column_result, default_non_const)
- && !executeNum>(in, *column_result, default_non_const)
- && !executeNum>(in, *column_result, default_non_const)
- && !executeNum>(in, *column_result, default_non_const)
- && !executeNum>(in, *column_result, default_non_const))
+ if (!executeNum>(in, *column_result, default_non_const, *in_casted)
+ && !executeNum>(in, *column_result, default_non_const, *in_casted)
+ && !executeNum>(in, *column_result, default_non_const, *in_casted)
+ && !executeNum>(in, *column_result, default_non_const, *in_casted)
+ && !executeNum>(in, *column_result, default_non_const, *in_casted)
+ && !executeNum>(in, *column_result, default_non_const, *in_casted)
+ && !executeNum>(in, *column_result, default_non_const, *in_casted)
+ && !executeNum>(in, *column_result, default_non_const, *in_casted)
+ && !executeNum>(in, *column_result, default_non_const, *in_casted)
+ && !executeNum>(in, *column_result, default_non_const, *in_casted)
+ && !executeNum>(in, *column_result, default_non_const, *in_casted)
+ && !executeNum>(in, *column_result, default_non_const, *in_casted))
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", in->getName(), getName());
}
}
else if (cache.table_string_to_idx)
{
- if (!executeString(in, *column_result, default_non_const))
- executeContiguous(in, *column_result, default_non_const);
+ if (!executeString(in, *column_result, default_non_const, *in_casted))
+ executeContiguous(in, *column_result, default_non_const, *in_casted);
}
else if (cache.table_anything_to_idx)
{
- executeAnything(in, *column_result, default_non_const);
+ executeAnything(in, *column_result, default_non_const, *in_casted);
}
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "State of the function `transform` is not initialized");
@@ -218,7 +222,7 @@ namespace
return impl->execute(args, result_type, input_rows_count);
}
- void executeAnything(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const) const
+ void executeAnything(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const
{
const size_t size = in->size();
const auto & table = *cache.table_anything_to_idx;
@@ -236,11 +240,11 @@ namespace
else if (default_non_const)
column_result.insertFrom(*default_non_const, i);
else
- column_result.insertFrom(*in, i);
+ column_result.insertFrom(in_casted, i);
}
}
- void executeContiguous(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const) const
+ void executeContiguous(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const
{
const size_t size = in->size();
const auto & table = *cache.table_string_to_idx;
@@ -255,12 +259,12 @@ namespace
else if (default_non_const)
column_result.insertFrom(*default_non_const, i);
else
- column_result.insertFrom(*in, i);
+ column_result.insertFrom(in_casted, i);
}
}
template
- bool executeNum(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const) const
+ bool executeNum(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const
{
const auto * const in = checkAndGetColumn(in_untyped);
if (!in)
@@ -297,7 +301,7 @@ namespace
else if (default_non_const)
column_result.insertFrom(*default_non_const, i);
else
- column_result.insertFrom(*in, i);
+ column_result.insertFrom(in_casted, i);
}
}
return true;
@@ -451,7 +455,7 @@ namespace
}
}
- bool executeString(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const) const
+ bool executeString(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const
{
const auto * const in = checkAndGetColumn(in_untyped);
if (!in)
@@ -488,7 +492,7 @@ namespace
else if (default_non_const)
column_result.insertFrom(*default_non_const, 0);
else
- column_result.insertFrom(*in, i);
+ column_result.insertFrom(in_casted, i);
}
}
return true;
@@ -654,13 +658,13 @@ namespace
std::unique_ptr table_string_to_idx;
std::unique_ptr table_anything_to_idx;
- bool is_empty = false;
-
ColumnPtr from_column;
ColumnPtr to_column;
ColumnPtr default_column;
- std::atomic initialized{false};
+ bool is_empty = false;
+ bool initialized = false;
+
std::mutex mutex;
};
@@ -693,13 +697,12 @@ namespace
/// Can be called from different threads. It works only on the first call.
void initialize(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const
{
+ std::lock_guard lock(cache.mutex);
if (cache.initialized)
return;
const DataTypePtr & from_type = arguments[0].type;
- std::lock_guard lock(cache.mutex);
-
if (from_type->onlyNull())
{
cache.is_empty = true;
diff --git a/src/Functions/tupleHammingDistance.cpp b/src/Functions/tupleHammingDistance.cpp
index adc063bfa816..ffdf8c93f159 100644
--- a/src/Functions/tupleHammingDistance.cpp
+++ b/src/Functions/tupleHammingDistance.cpp
@@ -1,5 +1,4 @@
#include
-#include
#include
#include
#include
@@ -86,7 +85,7 @@ class FunctionTupleHammingDistance : public ITupleFunction
auto plus_elem = plus->build({left_type, right_type});
res_type = plus_elem->getResultType();
}
- catch (DB::Exception & e)
+ catch (Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
diff --git a/src/Functions/vectorFunctions.cpp b/src/Functions/vectorFunctions.cpp
index db907af972d6..d53d39e2f3be 100644
--- a/src/Functions/vectorFunctions.cpp
+++ b/src/Functions/vectorFunctions.cpp
@@ -95,7 +95,7 @@ class FunctionTupleOperator : public ITupleFunction
auto elem_func = func->build(ColumnsWithTypeAndName{left, right});
types[i] = elem_func->getResultType();
}
- catch (DB::Exception & e)
+ catch (Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
@@ -181,7 +181,7 @@ class FunctionTupleNegate : public ITupleFunction
auto elem_negate = negate->build(ColumnsWithTypeAndName{cur});
types[i] = elem_negate->getResultType();
}
- catch (DB::Exception & e)
+ catch (Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
@@ -258,7 +258,7 @@ class FunctionTupleOperatorByNumber : public ITupleFunction
auto elem_func = func->build(ColumnsWithTypeAndName{cur, p_column});
types[i] = elem_func->getResultType();
}
- catch (DB::Exception & e)
+ catch (Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
@@ -363,7 +363,7 @@ class FunctionDotProduct : public ITupleFunction
auto plus_elem = plus->build({left_type, right_type});
res_type = plus_elem->getResultType();
}
- catch (DB::Exception & e)
+ catch (Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
@@ -467,7 +467,7 @@ class FunctionDateOrDateTimeOperationTupleOfIntervals : public ITupleFunction
auto plus_elem = plus->build({left, right});
res_type = plus_elem->getResultType();
}
- catch (DB::Exception & e)
+ catch (Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
@@ -740,7 +740,7 @@ class FunctionLNorm : public ITupleFunction
auto plus_elem = plus->build({left_type, right_type});
res_type = plus_elem->getResultType();
}
- catch (DB::Exception & e)
+ catch (Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
@@ -842,7 +842,7 @@ class FunctionLNorm : public ITupleFunction
auto plus_elem = plus->build({left_type, right_type});
res_type = plus_elem->getResultType();
}
- catch (DB::Exception & e)
+ catch (Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
@@ -993,7 +993,7 @@ class FunctionLNorm : public ITupleFunction
auto max_elem = max->build({left_type, right_type});
res_type = max_elem->getResultType();
}
- catch (DB::Exception & e)
+ catch (Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
@@ -1103,7 +1103,7 @@ class FunctionLNorm : public ITupleFunction
auto plus_elem = plus->build({left_type, right_type});
res_type = plus_elem->getResultType();
}
- catch (DB::Exception & e)
+ catch (Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
diff --git a/src/IO/Progress.cpp b/src/IO/Progress.cpp
index bf42cdf91d63..620d2f0f7622 100644
--- a/src/IO/Progress.cpp
+++ b/src/IO/Progress.cpp
@@ -69,12 +69,14 @@ void ProgressValues::write(WriteBuffer & out, UInt64 client_revision) const
}
}
-void ProgressValues::writeJSON(WriteBuffer & out) const
+void ProgressValues::writeJSON(WriteBuffer & out, bool add_braces) const
{
/// Numbers are written in double quotes (as strings) to avoid loss of precision
/// of 64-bit integers after interpretation by JavaScript.
- writeCString("{\"read_rows\":\"", out);
+ if (add_braces)
+ writeCString("{", out);
+ writeCString("\"read_rows\":\"", out);
writeText(read_rows, out);
writeCString("\",\"read_bytes\":\"", out);
writeText(read_bytes, out);
@@ -88,7 +90,9 @@ void ProgressValues::writeJSON(WriteBuffer & out) const
writeText(result_rows, out);
writeCString("\",\"result_bytes\":\"", out);
writeText(result_bytes, out);
- writeCString("\"}", out);
+ writeCString("\"", out);
+ if (add_braces)
+ writeCString("}", out);
}
bool Progress::incrementPiecewiseAtomically(const Progress & rhs)
@@ -230,9 +234,9 @@ void Progress::write(WriteBuffer & out, UInt64 client_revision) const
getValues().write(out, client_revision);
}
-void Progress::writeJSON(WriteBuffer & out) const
+void Progress::writeJSON(WriteBuffer & out, bool add_braces) const
{
- getValues().writeJSON(out);
+ getValues().writeJSON(out, add_braces);
}
}
diff --git a/src/IO/Progress.h b/src/IO/Progress.h
index c21b1b854b0b..936ed5ea3421 100644
--- a/src/IO/Progress.h
+++ b/src/IO/Progress.h
@@ -32,7 +32,7 @@ struct ProgressValues
void read(ReadBuffer & in, UInt64 server_revision);
void write(WriteBuffer & out, UInt64 client_revision) const;
- void writeJSON(WriteBuffer & out) const;
+ void writeJSON(WriteBuffer & out, bool add_braces = true) const;
};
struct ReadProgress
@@ -40,9 +40,10 @@ struct ReadProgress
UInt64 read_rows = 0;
UInt64 read_bytes = 0;
UInt64 total_rows_to_read = 0;
+ UInt64 total_bytes_to_read = 0;
- ReadProgress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0)
- : read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_) {}
+ ReadProgress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0, UInt64 total_bytes_to_read_ = 0)
+ : read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_), total_bytes_to_read(total_bytes_to_read_) {}
};
struct WriteProgress
@@ -98,8 +99,8 @@ struct Progress
Progress() = default;
- Progress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0)
- : read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_) {}
+ Progress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0, UInt64 total_bytes_to_read_ = 0)
+ : read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_), total_bytes_to_read(total_bytes_to_read_) {}
explicit Progress(ReadProgress read_progress)
: read_rows(read_progress.read_rows), read_bytes(read_progress.read_bytes), total_rows_to_read(read_progress.total_rows_to_read) {}
@@ -118,7 +119,7 @@ struct Progress
void write(WriteBuffer & out, UInt64 client_revision) const;
/// Progress in JSON format (single line, without whitespaces) is used in HTTP headers.
- void writeJSON(WriteBuffer & out) const;
+ void writeJSON(WriteBuffer & out, bool add_braces = true) const;
/// Each value separately is changed atomically (but not whole object).
bool incrementPiecewiseAtomically(const Progress & rhs);
diff --git a/src/IO/ReadBufferFromFileBase.cpp b/src/IO/ReadBufferFromFileBase.cpp
index 4181615bc52f..4ac3f984f788 100644
--- a/src/IO/ReadBufferFromFileBase.cpp
+++ b/src/IO/ReadBufferFromFileBase.cpp
@@ -42,7 +42,7 @@ void ReadBufferFromFileBase::setProgressCallback(ContextPtr context)
setProfileCallback([file_progress_callback](const ProfileInfo & progress)
{
- file_progress_callback(FileProgress(progress.bytes_read, 0));
+ file_progress_callback(FileProgress(progress.bytes_read));
});
}
diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp
index 6d1c0f7aafa1..eea801ce65ed 100644
--- a/src/IO/ReadWriteBufferFromHTTP.cpp
+++ b/src/IO/ReadWriteBufferFromHTTP.cpp
@@ -305,12 +305,12 @@ void ReadWriteBufferFromHTTPBase::callWithRedirects(Poco::N
current_session = session;
call(current_session, response, method_, throw_on_all_errors, for_object_info);
- Poco::URI prev_uri = uri;
+ saved_uri_redirect = uri;
while (isRedirect(response.getStatus()))
{
- Poco::URI uri_redirect = getUriAfterRedirect(prev_uri, response);
- prev_uri = uri_redirect;
+ Poco::URI uri_redirect = getUriAfterRedirect(*saved_uri_redirect, response);
+ saved_uri_redirect = uri_redirect;
if (remote_host_filter)
remote_host_filter->checkURL(uri_redirect);
diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp
index 1a367a8199d6..fd825720ac95 100644
--- a/src/IO/S3/PocoHTTPClient.cpp
+++ b/src/IO/S3/PocoHTTPClient.cpp
@@ -258,7 +258,7 @@ void PocoHTTPClient::addMetric(const Aws::Http::HttpRequest & request, S3MetricT
void PocoHTTPClient::makeRequestInternal(
Aws::Http::HttpRequest & request,
std::shared_ptr & response,
- Aws::Utils::RateLimits::RateLimiterInterface * readLimiter ,
+ Aws::Utils::RateLimits::RateLimiterInterface * readLimiter,
Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const
{
/// Most sessions in pool are already connected and it is not possible to set proxy host/port to a connected session.
diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp
index c7d4b87694be..36cd32910b51 100644
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@@ -2020,7 +2020,8 @@ template NO_INLINE
Aggregator::convertToBlockImplFinal(Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, size_t) const
{
- const size_t max_block_size = params.max_block_size;
+ /// +1 for nullKeyData, if `data` doesn't have it - not a problem, just some memory for one excessive row will be preallocated
+ const size_t max_block_size = (return_single_block ? data.size() : std::min(params.max_block_size, data.size())) + 1;
const bool final = true;
ConvertToBlockRes res;
@@ -2097,7 +2098,8 @@ template
Aggregator::ConvertToBlockRes NO_INLINE
Aggregator::convertToBlockImplNotFinal(Method & method, Table & data, Arenas & aggregates_pools, size_t) const
{
- const size_t max_block_size = params.max_block_size;
+ /// +1 for nullKeyData, if `data` doesn't have it - not a problem, just some memory for one excessive row will be preallocated
+ const size_t max_block_size = (return_single_block ? data.size() : std::min(params.max_block_size, data.size())) + 1;
const bool final = false;
ConvertToBlockRes res;
diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h
index 05b34e8460fc..29096a38be62 100644
--- a/src/Interpreters/Aggregator.h
+++ b/src/Interpreters/Aggregator.h
@@ -292,7 +292,7 @@ struct AggregationMethodStringNoCache
{
}
- using State = ColumnsHashing::HashMethodString;
+ using State = ColumnsHashing::HashMethodString;
static const bool low_cardinality_optimization = false;
static const bool one_key_nullable_optimization = nullable;
diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp
index 8d0f18cc3054..93239bfa1fcc 100644
--- a/src/Interpreters/AsynchronousInsertQueue.cpp
+++ b/src/Interpreters/AsynchronousInsertQueue.cpp
@@ -149,9 +149,10 @@ void AsynchronousInsertQueue::InsertData::Entry::finish(std::exception_ptr excep
}
}
-AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_)
+AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_, bool flush_on_shutdown_)
: WithContext(context_)
, pool_size(pool_size_)
+ , flush_on_shutdown(flush_on_shutdown_)
, queue_shards(pool_size)
, pool(CurrentMetrics::AsynchronousInsertThreads, CurrentMetrics::AsynchronousInsertThreadsActive, pool_size)
{
@@ -164,8 +165,6 @@ AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t poo
AsynchronousInsertQueue::~AsynchronousInsertQueue()
{
- /// TODO: add a setting for graceful shutdown.
-
LOG_TRACE(log, "Shutting down the asynchronous insertion queue");
shutdown = true;
@@ -177,17 +176,18 @@ AsynchronousInsertQueue::~AsynchronousInsertQueue()
assert(dump_by_first_update_threads[i].joinable());
dump_by_first_update_threads[i].join();
+ if (flush_on_shutdown)
+ {
+ for (auto & [_, elem] : shard.queue)
+ scheduleDataProcessingJob(elem.key, std::move(elem.data), getContext());
+ }
+ else
{
- std::lock_guard lock(shard.mutex);
for (auto & [_, elem] : shard.queue)
- {
for (const auto & entry : elem.data->entries)
- {
entry->finish(std::make_exception_ptr(Exception(
ErrorCodes::TIMEOUT_EXCEEDED, "Wait for async insert timeout exceeded)")));
- }
- }
}
}
@@ -232,7 +232,10 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
/// to avoid buffering of huge amount of data in memory.
auto read_buf = getReadBufferFromASTInsertQuery(query);
- LimitReadBuffer limit_buf(*read_buf, settings.async_insert_max_data_size, /* throw_exception */ false, /* exact_limit */ {});
+
+ LimitReadBuffer limit_buf(
+ *read_buf, settings.async_insert_max_data_size,
+ /*throw_exception=*/ false, /*exact_limit=*/ {});
WriteBufferFromString write_buf(bytes);
copyData(limit_buf, write_buf);
@@ -284,18 +287,19 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
assert(data);
data->size_in_bytes += entry_data_size;
- ++data->query_number;
data->entries.emplace_back(entry);
insert_future = entry->getFuture();
LOG_TRACE(log, "Have {} pending inserts with total {} bytes of data for query '{}'",
data->entries.size(), data->size_in_bytes, key.query_str);
+ bool has_enough_bytes = data->size_in_bytes >= key.settings.async_insert_max_data_size;
+ bool has_enough_queries = data->entries.size() >= key.settings.async_insert_max_query_number && key.settings.async_insert_deduplicate;
+
/// Here we check whether we hit the limit on maximum data size in the buffer.
/// And use setting from query context.
/// It works, because queries with the same set of settings are already grouped together.
- if (data->size_in_bytes >= key.settings.async_insert_max_data_size
- || (data->query_number >= key.settings.async_insert_max_query_number && key.settings.async_insert_deduplicate))
+ if (!flush_stopped && (has_enough_bytes || has_enough_queries))
{
data_to_process = std::move(data);
shard.iterators.erase(it);
@@ -319,6 +323,51 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
};
}
+void AsynchronousInsertQueue::flushAll()
+{
+ std::lock_guard flush_lock(flush_mutex);
+
+ LOG_DEBUG(log, "Requested to flush asynchronous insert queue");
+
+ /// Disable background flushes to avoid adding new elements to the queue.
+ flush_stopped = true;
+ std::vector queues_to_flush(pool_size);
+
+ for (size_t i = 0; i < pool_size; ++i)
+ {
+ std::lock_guard lock(queue_shards[i].mutex);
+ queues_to_flush[i] = std::move(queue_shards[i].queue);
+ queue_shards[i].iterators.clear();
+ }
+
+ size_t total_queries = 0;
+ size_t total_bytes = 0;
+ size_t total_entries = 0;
+
+ for (auto & queue : queues_to_flush)
+ {
+ total_queries += queue.size();
+ for (auto & [_, entry] : queue)
+ {
+ total_bytes += entry.data->size_in_bytes;
+ total_entries += entry.data->entries.size();
+ scheduleDataProcessingJob(entry.key, std::move(entry.data), getContext());
+ }
+ }
+
+ /// Note that jobs scheduled before the call of 'flushAll' are not counted here.
+ LOG_DEBUG(log,
+ "Will wait for finishing of {} flushing jobs (about {} inserts, {} bytes, {} distinct queries)",
+ pool.active(), total_entries, total_bytes, total_queries);
+
+ /// Wait until all jobs are finished. That includes also jobs
+ /// that were scheduled before the call of 'flushAll'.
+ pool.wait();
+
+ LOG_DEBUG(log, "Finished flushing of asynchronous insert queue");
+ flush_stopped = false;
+}
+
void AsynchronousInsertQueue::processBatchDeadlines(size_t shard_num)
{
auto & shard = queue_shards[shard_num];
@@ -344,6 +393,9 @@ void AsynchronousInsertQueue::processBatchDeadlines(size_t shard_num)
if (shutdown)
return;
+ if (flush_stopped)
+ continue;
+
const auto now = std::chrono::steady_clock::now();
while (true)
diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h
index 8530a453cd61..577752af45a0 100644
--- a/src/Interpreters/AsynchronousInsertQueue.h
+++ b/src/Interpreters/AsynchronousInsertQueue.h
@@ -19,7 +19,7 @@ class AsynchronousInsertQueue : public WithContext
public:
using Milliseconds = std::chrono::milliseconds;
- AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_);
+ AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_, bool flush_on_shutdown_);
~AsynchronousInsertQueue();
struct PushResult
@@ -40,6 +40,8 @@ class AsynchronousInsertQueue : public WithContext
std::unique_ptr insert_data_buffer;
};
+ /// Force flush the whole queue.
+ void flushAll();
PushResult push(ASTPtr query, ContextPtr query_context);
size_t getPoolSize() const { return pool_size; }
@@ -100,9 +102,7 @@ class AsynchronousInsertQueue : public WithContext
using EntryPtr = std::shared_ptr;
std::list entries;
-
size_t size_in_bytes = 0;
- size_t query_number = 0;
};
using InsertDataPtr = std::unique_ptr;
@@ -130,6 +130,8 @@ class AsynchronousInsertQueue : public WithContext
};
const size_t pool_size;
+ const bool flush_on_shutdown;
+
std::vector queue_shards;
/// Logic and events behind queue are as follows:
@@ -141,6 +143,10 @@ class AsynchronousInsertQueue : public WithContext
/// (async_insert_max_data_size setting). If so, then again we dump the data.
std::atomic shutdown{false};
+ std::atomic flush_stopped{false};
+
+ /// A mutex that prevents concurrent forced flushes of queue.
+ mutable std::mutex flush_mutex;
/// Dump the data only inside this pool.
ThreadPool pool;
diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h
index de10a445d010..b90acd1d576d 100644
--- a/src/Interpreters/Cluster.h
+++ b/src/Interpreters/Cluster.h
@@ -144,12 +144,6 @@ class Cluster
UInt32 shard_index_ = 0,
UInt32 replica_index_ = 0);
- Address(
- const String & host_port_,
- const ClusterConnectionParameters & params,
- UInt32 shard_index_,
- UInt32 replica_index_);
-
Address(
const DatabaseReplicaInfo & info,
const ClusterConnectionParameters & params,
diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
index 0cf3f3609944..953e38d56cd5 100644
--- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
+++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
@@ -124,6 +124,7 @@ void SelectStreamFactory::createForShard(
{
remote_shards.emplace_back(Shard{
.query = query_ast,
+ .main_table = main_table,
.header = header,
.shard_info = shard_info,
.lazy = lazy,
diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/src/Interpreters/ClusterProxy/SelectStreamFactory.h
index 030c0b77dd52..1cc5a3b1a77e 100644
--- a/src/Interpreters/ClusterProxy/SelectStreamFactory.h
+++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.h
@@ -50,6 +50,8 @@ class SelectStreamFactory
{
/// Query and header may be changed depending on shard.
ASTPtr query;
+ /// Used to check the table existence on remote node
+ StorageID main_table;
Block header;
Cluster::ShardInfo shard_info;
diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index 3dea52faf461..2fed626ffb7e 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -35,7 +35,12 @@ namespace ErrorCodes
namespace ClusterProxy
{
-ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info, Poco::Logger * log)
+ContextMutablePtr updateSettingsForCluster(bool interserver_mode,
+ ContextPtr context,
+ const Settings & settings,
+ const StorageID & main_table,
+ const SelectQueryInfo * query_info,
+ Poco::Logger * log)
{
Settings new_settings = settings;
new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.max_execution_time);
@@ -43,7 +48,7 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr c
/// If "secret" (in remote_servers) is not in use,
/// user on the shard is not the same as the user on the initiator,
/// hence per-user limits should not be applied.
- if (cluster.getSecret().empty())
+ if (!interserver_mode)
{
/// Does not matter on remote servers, because queries are sent under different user.
new_settings.max_concurrent_queries_for_user = 0;
@@ -170,17 +175,15 @@ void executeQuery(
std::vector plans;
SelectStreamFactory::Shards remote_shards;
- auto new_context = updateSettingsForCluster(*query_info.getCluster(), context, settings, main_table, &query_info, log);
+ auto new_context = updateSettingsForCluster(!query_info.getCluster()->getSecret().empty(), context, settings, main_table, &query_info, log);
new_context->increaseDistributedDepth();
size_t shards = query_info.getCluster()->getShardCount();
for (const auto & shard_info : query_info.getCluster()->getShardsInfo())
{
- ASTPtr query_ast_for_shard;
- if (query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1)
+ ASTPtr query_ast_for_shard = query_ast->clone();
+ if (sharding_key_expr && query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1)
{
- query_ast_for_shard = query_ast->clone();
-
OptimizeShardingKeyRewriteInVisitor::Data visitor_data{
sharding_key_expr,
sharding_key_expr->getSampleBlock().getByPosition(0).type,
@@ -191,8 +194,6 @@ void executeQuery(
OptimizeShardingKeyRewriteInVisitor visitor(visitor_data);
visitor.visit(query_ast_for_shard);
}
- else
- query_ast_for_shard = query_ast->clone();
if (shard_filter_generator)
{
diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h
index 41f6da55686b..511914e99e47 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.h
+++ b/src/Interpreters/ClusterProxy/executeQuery.h
@@ -34,8 +34,12 @@ class SelectStreamFactory;
/// - optimize_skip_unused_shards_nesting
///
/// @return new Context with adjusted settings
-ContextMutablePtr updateSettingsForCluster(
- const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info = nullptr, Poco::Logger * log = nullptr);
+ContextMutablePtr updateSettingsForCluster(bool interserver_mode,
+ ContextPtr context,
+ const Settings & settings,
+ const StorageID & main_table,
+ const SelectQueryInfo * query_info = nullptr,
+ Poco::Logger * log = nullptr);
using AdditionalShardFilterGenerator = std::function;
/// Execute a distributed query, creating a query plan, from which the query pipeline can be built.
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 193bb5b6ab0f..92e6bcb326cd 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -551,7 +551,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
chassert(!task.completely_processed);
/// Setup tracing context on current thread for current DDL
- OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__ ,
+ OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__,
task.entry.tracing_context,
this->context->getOpenTelemetrySpanLog());
tracing_ctx_holder.root_span.kind = OpenTelemetry::CONSUMER;
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index 616cf80a446d..ed927d550a84 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -361,7 +361,7 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query,
std::vector> tables_to_drop;
for (auto iterator = database->getTablesIterator(table_context); iterator->isValid(); iterator->next())
{
- iterator->table()->flush();
+ iterator->table()->flushAndPrepareForShutdown();
tables_to_drop.push_back({iterator->name(), iterator->table()->isDictionary()});
}
diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp
index 75d43b541e15..ae79b3f932ec 100644
--- a/src/Interpreters/InterpreterRenameQuery.cpp
+++ b/src/Interpreters/InterpreterRenameQuery.cpp
@@ -193,7 +193,7 @@ AccessRightsElements InterpreterRenameQuery::getRequiredAccess(InterpreterRename
required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT, elem.to.getDatabase(), elem.to.getTable());
if (rename.exchange)
{
- required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT , elem.from.getDatabase(), elem.from.getTable());
+ required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT, elem.from.getDatabase(), elem.from.getTable());
required_access.emplace_back(AccessType::SELECT | AccessType::DROP_TABLE, elem.to.getDatabase(), elem.to.getTable());
}
}
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index d07a65215445..fc3ea3a13ca2 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -2274,8 +2274,7 @@ std::optional InterpreterSelectQuery::getTrivialCount(UInt64 max_paralle
&& !settings.allow_experimental_query_deduplication
&& !settings.empty_result_for_aggregation_by_empty_set
&& storage
- && storage->getName() != "MaterializedMySQL"
- && !storage->hasLightweightDeletedMask()
+ && storage->supportsTrivialCountOptimization()
&& query_info.filter_asts.empty()
&& query_analyzer->hasAggregation()
&& (query_analyzer->aggregates().size() == 1)
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index 02cdeb0154e5..1bd30e068888 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -38,6 +38,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -555,6 +556,17 @@ BlockIO InterpreterSystemQuery::execute()
);
break;
}
+ case Type::FLUSH_ASYNC_INSERT_QUEUE:
+ {
+ getContext()->checkAccess(AccessType::SYSTEM_FLUSH_ASYNC_INSERT_QUEUE);
+ auto * queue = getContext()->getAsynchronousInsertQueue();
+ if (!queue)
+ throw Exception(ErrorCodes::BAD_ARGUMENTS,
+ "Cannot flush asynchronous insert queue because it is not initialized");
+
+ queue->flushAll();
+ break;
+ }
case Type::STOP_LISTEN_QUERIES:
case Type::START_LISTEN_QUERIES:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not supported yet", query.type);
@@ -1149,6 +1161,11 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
required_access.emplace_back(AccessType::SYSTEM_FLUSH_LOGS);
break;
}
+ case Type::FLUSH_ASYNC_INSERT_QUEUE:
+ {
+ required_access.emplace_back(AccessType::SYSTEM_FLUSH_ASYNC_INSERT_QUEUE);
+ break;
+ }
case Type::RESTART_DISK:
{
required_access.emplace_back(AccessType::SYSTEM_RESTART_DISK);
diff --git a/src/Interpreters/ProfileEventsExt.cpp b/src/Interpreters/ProfileEventsExt.cpp
index bf8d060bd3cb..bd421ae8e337 100644
--- a/src/Interpreters/ProfileEventsExt.cpp
+++ b/src/Interpreters/ProfileEventsExt.cpp
@@ -86,9 +86,16 @@ static void dumpMemoryTracker(ProfileEventsSnapshot const & snapshot, DB::Mutabl
columns[i++]->insert(static_cast(snapshot.current_time));
columns[i++]->insert(static_cast(snapshot.thread_id));
columns[i++]->insert(Type::GAUGE);
-
columns[i++]->insertData(MemoryTracker::USAGE_EVENT_NAME, strlen(MemoryTracker::USAGE_EVENT_NAME));
- columns[i++]->insert(snapshot.memory_usage);
+ columns[i]->insert(snapshot.memory_usage);
+
+ i = 0;
+ columns[i++]->insertData(host_name.data(), host_name.size());
+ columns[i++]->insert(static_cast(snapshot.current_time));
+ columns[i++]->insert(static_cast(snapshot.thread_id));
+ columns[i++]->insert(Type::GAUGE);
+ columns[i++]->insertData(MemoryTracker::PEAK_USAGE_EVENT_NAME, strlen(MemoryTracker::PEAK_USAGE_EVENT_NAME));
+ columns[i]->insert(snapshot.peak_memory_usage);
}
void getProfileEvents(
@@ -121,6 +128,7 @@ void getProfileEvents(
group_snapshot.thread_id = 0;
group_snapshot.current_time = time(nullptr);
group_snapshot.memory_usage = thread_group->memory_tracker.get();
+ group_snapshot.peak_memory_usage = thread_group->memory_tracker.getPeak();
auto group_counters = thread_group->performance_counters.getPartiallyAtomicSnapshot();
auto prev_group_snapshot = last_sent_snapshots.find(0);
group_snapshot.counters =
diff --git a/src/Interpreters/ProfileEventsExt.h b/src/Interpreters/ProfileEventsExt.h
index 7d9fc512d15b..cc338530510d 100644
--- a/src/Interpreters/ProfileEventsExt.h
+++ b/src/Interpreters/ProfileEventsExt.h
@@ -16,6 +16,7 @@ struct ProfileEventsSnapshot
UInt64 thread_id;
CountersIncrement counters;
Int64 memory_usage;
+ Int64 peak_memory_usage;
time_t current_time;
};
diff --git a/src/Interpreters/TransactionLog.cpp b/src/Interpreters/TransactionLog.cpp
index 6257e617d4ac..2ef4f4d62183 100644
--- a/src/Interpreters/TransactionLog.cpp
+++ b/src/Interpreters/TransactionLog.cpp
@@ -482,7 +482,7 @@ CSN TransactionLog::finalizeCommittedTransaction(MergeTreeTransaction * txn, CSN
bool removed = running_list.erase(txn->tid.getHash());
if (!removed)
{
- LOG_ERROR(log , "I's a bug: TID {} {} doesn't exist", txn->tid.getHash(), txn->tid);
+ LOG_ERROR(log, "It's a bug: TID {} {} doesn't exist", txn->tid.getHash(), txn->tid);
abort();
}
}
diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp
index fd4d2c9d8461..54ae939dbd1f 100644
--- a/src/Interpreters/TreeOptimizer.cpp
+++ b/src/Interpreters/TreeOptimizer.cpp
@@ -289,13 +289,6 @@ void optimizeDuplicatesInOrderBy(const ASTSelectQuery * select_query)
elems = std::move(unique_elems);
}
-/// Optimize duplicate ORDER BY
-void optimizeDuplicateOrderBy(ASTPtr & query, ContextPtr context)
-{
- DuplicateOrderByVisitor::Data order_by_data{context};
- DuplicateOrderByVisitor(order_by_data).visit(query);
-}
-
/// Return simple subselect (without UNIONs or JOINs or SETTINGS) if any
const ASTSelectQuery * getSimpleSubselect(const ASTSelectQuery & select)
{
@@ -379,41 +372,6 @@ std::unordered_set getDistinctNames(const ASTSelectQuery & select)
return names;
}
-/// Remove DISTINCT from query if columns are known as DISTINCT from subquery
-void optimizeDuplicateDistinct(ASTSelectQuery & select)
-{
- if (!select.select() || select.select()->children.empty())
- return;
-
- const ASTSelectQuery * subselect = getSimpleSubselect(select);
- if (!subselect)
- return;
-
- std::unordered_set distinct_names = getDistinctNames(*subselect);
- std::unordered_set selected_names;
-
- /// Check source column names from select list (ignore aliases and table names)
- for (const auto & id : select.select()->children)
- {
- const auto * identifier = id->as();
- if (!identifier)
- return;
-
- const String & name = identifier->shortName();
- if (!distinct_names.contains(name))
- return; /// Not a distinct column, keep DISTINCT for it.
-
- selected_names.emplace(name);
- }
-
- /// select columns list != distinct columns list
- /// SELECT DISTINCT a FROM (SELECT DISTINCT a, b FROM ...)) -- cannot remove DISTINCT
- if (selected_names.size() != distinct_names.size())
- return;
-
- select.distinct = false;
-}
-
/// Replace monotonous functions in ORDER BY if they don't participate in GROUP BY expression,
/// has a single argument and not an aggregate functions.
void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, ContextPtr context,
@@ -830,17 +788,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result,
&& !select_query->group_by_with_cube)
optimizeAggregateFunctionsOfGroupByKeys(select_query, query);
- /// Remove duplicate ORDER BY and DISTINCT from subqueries.
- if (settings.optimize_duplicate_order_by_and_distinct)
- {
- optimizeDuplicateOrderBy(query, context);
-
- /// DISTINCT has special meaning in Distributed query with enabled distributed_group_by_no_merge
- /// TODO: disable Distributed/remote() tables only
- if (!settings.distributed_group_by_no_merge)
- optimizeDuplicateDistinct(*select_query);
- }
-
/// Remove functions from ORDER BY if its argument is also in ORDER BY
if (settings.optimize_redundant_functions_in_order_by)
optimizeRedundantFunctionsInOrderBy(select_query, context);
diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h
index 52b3b79b16ea..b18f8fc7b07b 100644
--- a/src/Parsers/ASTSystemQuery.h
+++ b/src/Parsers/ASTSystemQuery.h
@@ -71,6 +71,7 @@ class ASTSystemQuery : public IAST, public ASTQueryWithOnCluster
START_REPLICATION_QUEUES,
FLUSH_LOGS,
FLUSH_DISTRIBUTED,
+ FLUSH_ASYNC_INSERT_QUEUE,
STOP_DISTRIBUTED_SENDS,
START_DISTRIBUTED_SENDS,
START_THREAD_FUZZER,
diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h
index 9796ae10c07c..72e25cc3cf90 100644
--- a/src/Parsers/Kusto/ParserKQLOperators.h
+++ b/src/Parsers/Kusto/ParserKQLOperators.h
@@ -31,10 +31,10 @@ class KQLOperators
not_endswith,
endswith_cs,
not_endswith_cs,
- equal, //=~
- not_equal,//!~
- equal_cs, //=
- not_equal_cs,//!=
+ equal, /// =~
+ not_equal, /// !~
+ equal_cs, /// =
+ not_equal_cs, /// !=
has,
not_has,
has_all,
@@ -49,10 +49,10 @@ class KQLOperators
not_hassuffix,
hassuffix_cs,
not_hassuffix_cs,
- in_cs, //in
- not_in_cs, //!in
- in, //in~
- not_in ,//!in~
+ in_cs, /// in
+ not_in_cs, /// !in
+ in, /// in~
+ not_in, /// !in~
matches_regex,
startswith,
not_startswith,
diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp
index 1c48f7738234..341c1ef60b49 100644
--- a/src/Parsers/ParserSelectQuery.cpp
+++ b/src/Parsers/ParserSelectQuery.cpp
@@ -292,6 +292,9 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
/// This is needed for TOP expression, because it can also use WITH TIES.
bool limit_with_ties_occured = false;
+ bool has_offset_clause = false;
+ bool offset_clause_has_sql_standard_row_or_rows = false; /// OFFSET offset_row_count {ROW | ROWS}
+
/// LIMIT length | LIMIT offset, length | LIMIT count BY expr-list | LIMIT offset, length BY expr-list
if (s_limit.ignore(pos, expected))
{
@@ -316,6 +319,8 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
if (!exp_elem.parse(pos, limit_offset, expected))
return false;
+
+ has_offset_clause = true;
}
else if (s_with_ties.ignore(pos, expected))
{
@@ -351,60 +356,65 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
}
else if (s_offset.ignore(pos, expected))
{
- /// OFFSET offset_row_count {ROW | ROWS} FETCH {FIRST | NEXT} fetch_row_count {ROW | ROWS} {ONLY | WITH TIES}
- bool offset_with_fetch_maybe = false;
+ /// OFFSET without LIMIT
+
+ has_offset_clause = true;
if (!exp_elem.parse(pos, limit_offset, expected))
return false;
+ /// SQL standard OFFSET N ROW[S] ...
+
if (s_row.ignore(pos, expected))
+ offset_clause_has_sql_standard_row_or_rows = true;
+
+ if (s_rows.ignore(pos, expected))
{
- if (s_rows.ignore(pos, expected))
+ if (offset_clause_has_sql_standard_row_or_rows)
throw Exception(ErrorCodes::ROW_AND_ROWS_TOGETHER, "Can not use ROW and ROWS together");
- offset_with_fetch_maybe = true;
- }
- else if (s_rows.ignore(pos, expected))
- {
- offset_with_fetch_maybe = true;
+
+ offset_clause_has_sql_standard_row_or_rows = true;
}
+ }
- if (offset_with_fetch_maybe && s_fetch.ignore(pos, expected))
- {
- /// OFFSET FETCH clause must exists with "ORDER BY"
- if (!order_expression_list)
- throw Exception(ErrorCodes::OFFSET_FETCH_WITHOUT_ORDER_BY, "Can not use OFFSET FETCH clause without ORDER BY");
+ /// SQL standard FETCH (either following SQL standard OFFSET or following ORDER BY)
+ if ((!has_offset_clause || offset_clause_has_sql_standard_row_or_rows)
+ && s_fetch.ignore(pos, expected))
+ {
+ /// FETCH clause must exist with "ORDER BY"
+ if (!order_expression_list)
+ throw Exception(ErrorCodes::OFFSET_FETCH_WITHOUT_ORDER_BY, "Can not use OFFSET FETCH clause without ORDER BY");
- if (s_first.ignore(pos, expected))
- {
- if (s_next.ignore(pos, expected))
- throw Exception(ErrorCodes::FIRST_AND_NEXT_TOGETHER, "Can not use FIRST and NEXT together");
- }
- else if (!s_next.ignore(pos, expected))
- return false;
+ if (s_first.ignore(pos, expected))
+ {
+ if (s_next.ignore(pos, expected))
+ throw Exception(ErrorCodes::FIRST_AND_NEXT_TOGETHER, "Can not use FIRST and NEXT together");
+ }
+ else if (!s_next.ignore(pos, expected))
+ return false;
- if (!exp_elem.parse(pos, limit_length, expected))
- return false;
+ if (!exp_elem.parse(pos, limit_length, expected))
+ return false;
- if (s_row.ignore(pos, expected))
- {
- if (s_rows.ignore(pos, expected))
- throw Exception(ErrorCodes::ROW_AND_ROWS_TOGETHER, "Can not use ROW and ROWS together");
- }
- else if (!s_rows.ignore(pos, expected))
- return false;
+ if (s_row.ignore(pos, expected))
+ {
+ if (s_rows.ignore(pos, expected))
+ throw Exception(ErrorCodes::ROW_AND_ROWS_TOGETHER, "Can not use ROW and ROWS together");
+ }
+ else if (!s_rows.ignore(pos, expected))
+ return false;
- if (s_with_ties.ignore(pos, expected))
- {
- select_query->limit_with_ties = true;
- }
- else if (s_only.ignore(pos, expected))
- {
- select_query->limit_with_ties = false;
- }
- else
- {
- return false;
- }
+ if (s_with_ties.ignore(pos, expected))
+ {
+ select_query->limit_with_ties = true;
+ }
+ else if (s_only.ignore(pos, expected))
+ {
+ select_query->limit_with_ties = false;
+ }
+ else
+ {
+ return false;
}
}
diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp
index d77ae8d3a275..18e91c533e02 100644
--- a/src/Parsers/tests/gtest_Parser.cpp
+++ b/src/Parsers/tests/gtest_Parser.cpp
@@ -359,11 +359,11 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest,
"SELECT *\nFROM Customers\nORDER BY LastName DESC"
},
{
- "Customers | order by Age desc , FirstName asc ",
+ "Customers | order by Age desc, FirstName asc ",
"SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName ASC"
},
{
- "Customers | order by Age asc , FirstName desc",
+ "Customers | order by Age asc, FirstName desc",
"SELECT *\nFROM Customers\nORDER BY\n Age ASC,\n FirstName DESC"
},
{
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 5d8f8ca8741f..c118fccded43 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -182,6 +182,9 @@ bool applyTrivialCountIfPossible(
return false;
const auto & storage = table_node.getStorage();
+ if (!storage->supportsTrivialCountOptimization())
+ return false;
+
auto storage_id = storage->getStorageID();
auto row_policy_filter = query_context->getRowPolicyFilter(storage_id.getDatabaseName(),
storage_id.getTableName(),
diff --git a/src/Processors/Executors/ExecutionThreadContext.cpp b/src/Processors/Executors/ExecutionThreadContext.cpp
index 794f478b272b..0fa7e0b552f9 100644
--- a/src/Processors/Executors/ExecutionThreadContext.cpp
+++ b/src/Processors/Executors/ExecutionThreadContext.cpp
@@ -56,6 +56,9 @@ static void executeJob(ExecutingGraph::Node * node, ReadProgressCallback * read_
if (read_progress->counters.total_rows_approx)
read_progress_callback->addTotalRowsApprox(read_progress->counters.total_rows_approx);
+ if (read_progress->counters.total_bytes)
+ read_progress_callback->addTotalBytes(read_progress->counters.total_bytes);
+
if (!read_progress_callback->onProgress(read_progress->counters.read_rows, read_progress->counters.read_bytes, read_progress->limits))
node->processor->cancel();
}
diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp
index f523e7b7cf9a..1508d834592f 100644
--- a/src/Processors/Executors/PipelineExecutor.cpp
+++ b/src/Processors/Executors/PipelineExecutor.cpp
@@ -272,7 +272,7 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie
/// Prepare processor after execution.
if (!graph->updateNode(context.getProcessorID(), queue, async_queue))
- finish();
+ cancel();
/// Push other tasks to global queue.
tasks.pushTasks(queue, async_queue, context);
diff --git a/src/Processors/Formats/IRowInputFormat.h b/src/Processors/Formats/IRowInputFormat.h
index b7b1b0b29a67..00888cfa5e9c 100644
--- a/src/Processors/Formats/IRowInputFormat.h
+++ b/src/Processors/Formats/IRowInputFormat.h
@@ -85,7 +85,7 @@ class IRowInputFormat : public IInputFormat
size_t num_errors = 0;
BlockMissingValues block_missing_values;
- size_t approx_bytes_read_for_chunk;
+ size_t approx_bytes_read_for_chunk = 0;
};
}
diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h
index df77994c3d58..2db8bd6c59c9 100644
--- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h
+++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h
@@ -50,7 +50,7 @@ class ArrowBlockInputFormat : public IInputFormat
int record_batch_current = 0;
BlockMissingValues block_missing_values;
- size_t approx_bytes_read_for_chunk;
+ size_t approx_bytes_read_for_chunk = 0;
const FormatSettings format_settings;
diff --git a/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h b/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h
index b7adaa35335f..676ce50d04ff 100644
--- a/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h
+++ b/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h
@@ -35,7 +35,7 @@ class ArrowFieldIndexUtil
/// - key: field name with full path. eg. a struct field's name is like a.x.i
/// - value: a pair, first value refers to this field's start index, second value refers to how many
/// indices this field take. eg.
- /// For a parquet schema {x: int , y: {i: int, j: int}}, the return will be
+ /// For a parquet schema {x: int, y: {i: int, j: int}}, the return will be
/// - x: (0, 1)
/// - y: (1, 2)
/// - y.i: (1, 1)
diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
index a7efc823fbbb..b2c75db0e540 100644
--- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
@@ -369,14 +369,25 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro
break;
case avro::AVRO_UNION:
{
- if (root_node->leaves() == 2
+ if (root_node->leaves() == 1)
+ {
+ auto nested_deserialize = createDeserializeFn(root_node->leafAt(0), target_type);
+ return [nested_deserialize](IColumn & column, avro::Decoder & decoder)
+ {
+ decoder.decodeUnionIndex();
+ nested_deserialize(column, decoder);
+ return true;
+ };
+ }
+ /// FIXME Support UNION has more than two datatypes.
+ else if (
+ root_node->leaves() == 2
&& (root_node->leafAt(0)->type() == avro::AVRO_NULL || root_node->leafAt(1)->type() == avro::AVRO_NULL))
{
int non_null_union_index = root_node->leafAt(0)->type() == avro::AVRO_NULL ? 1 : 0;
if (target.isNullable())
{
- auto nested_deserialize = this->createDeserializeFn(
- root_node->leafAt(non_null_union_index), removeNullable(target_type));
+ auto nested_deserialize = createDeserializeFn(root_node->leafAt(non_null_union_index), removeNullable(target_type));
return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder)
{
ColumnNullable & col = assert_cast(column);
@@ -395,7 +406,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro
}
else if (null_as_default)
{
- auto nested_deserialize = this->createDeserializeFn(root_node->leafAt(non_null_union_index), target_type);
+ auto nested_deserialize = createDeserializeFn(root_node->leafAt(non_null_union_index), target_type);
return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder)
{
int union_index = static_cast(decoder.decodeUnionIndex());
@@ -1192,12 +1203,19 @@ DataTypePtr AvroSchemaReader::avroNodeToDataType(avro::NodePtr node)
case avro::Type::AVRO_NULL:
return std::make_shared();
case avro::Type::AVRO_UNION:
- if (node->leaves() == 2 && (node->leafAt(0)->type() == avro::Type::AVRO_NULL || node->leafAt(1)->type() == avro::Type::AVRO_NULL))
+ if (node->leaves() == 1)
+ {
+ return avroNodeToDataType(node->leafAt(0));
+ }
+ else if (
+ node->leaves() == 2
+ && (node->leafAt(0)->type() == avro::Type::AVRO_NULL || node->leafAt(1)->type() == avro::Type::AVRO_NULL))
{
int nested_leaf_index = node->leafAt(0)->type() == avro::Type::AVRO_NULL ? 1 : 0;
auto nested_type = avroNodeToDataType(node->leafAt(nested_leaf_index));
return nested_type->canBeInsideNullable() ? makeNullable(nested_type) : nested_type;
}
+ /// FIXME Support UNION has more than two datatypes.
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Avro type UNION is not supported for inserting.");
case avro::Type::AVRO_SYMBOLIC:
return avroNodeToDataType(avro::resolveSymbol(node));
diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
index 899b84cc1324..f688efa3290c 100644
--- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
+++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
@@ -233,6 +233,8 @@ namespace DB
checkStatus(components_status, nested_column->getName(), format_name);
/// Pass null null_map, because fillArrowArray will decide whether nested_type is nullable, if nullable, it will create a new null_map from nested_column
+ /// Note that it is only needed by gluten(https://github.com/oap-project/gluten), because array type in gluten is by default nullable.
+ /// And it does not influence the original ClickHouse logic, because null_map passed to fillArrowArrayWithArrayColumnData is always nullptr for ClickHouse doesn't allow nullable complex types including array type.
fillArrowArray(column_name, nested_column, nested_type, nullptr, value_builder, format_name, offsets[array_idx - 1], offsets[array_idx], output_string_as_string, output_fixed_string_as_fixed_byte_array, dictionary_values);
}
}
diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h
index 5ab20c796ea3..bb52e2aa516c 100644
--- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h
+++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h
@@ -67,7 +67,7 @@ class JSONColumnsBlockInputFormatBase : public IInputFormat
Serializations serializations;
std::unique_ptr reader;
BlockMissingValues block_missing_values;
- size_t approx_bytes_read_for_chunk;
+ size_t approx_bytes_read_for_chunk = 0;
};
diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
index e5f52936021c..b1b08cdf256e 100644
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
@@ -236,10 +236,10 @@ bool JSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtensi
bool JSONEachRowRowInputFormat::checkEndOfData(bool is_first_row)
{
- /// We consume , or \n before scanning a new row, instead scanning to next row at the end.
+ /// We consume ',' or '\n' before scanning a new row, instead scanning to next row at the end.
/// The reason is that if we want an exact number of rows read with LIMIT x
/// from a streaming table engine with text data format, like File or Kafka
- /// then seeking to next ;, or \n would trigger reading of an extra row at the end.
+ /// then seeking to next ';,' or '\n' would trigger reading of an extra row at the end.
/// Semicolon is added for convenience as it could be used at end of INSERT query.
if (!in->eof())
diff --git a/src/Processors/Formats/Impl/NativeFormat.cpp b/src/Processors/Formats/Impl/NativeFormat.cpp
index f8c9a39eedf1..65ea87479a34 100644
--- a/src/Processors/Formats/Impl/NativeFormat.cpp
+++ b/src/Processors/Formats/Impl/NativeFormat.cpp
@@ -66,7 +66,7 @@ class NativeInputFormat final : public IInputFormat
std::unique_ptr reader;
Block header;
BlockMissingValues block_missing_values;
- size_t approx_bytes_read_for_chunk;
+ size_t approx_bytes_read_for_chunk = 0;
};
class NativeOutputFormat final : public IOutputFormat
diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.h b/src/Processors/Formats/Impl/ORCBlockInputFormat.h
index 98561e72e611..7097ea3ac080 100644
--- a/src/Processors/Formats/Impl/ORCBlockInputFormat.h
+++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.h
@@ -52,7 +52,7 @@ class ORCBlockInputFormat : public IInputFormat
std::vector include_indices;
BlockMissingValues block_missing_values;
- size_t approx_bytes_read_for_chunk;
+ size_t approx_bytes_read_for_chunk = 0;
const FormatSettings format_settings;
const std::unordered_set & skip_stripes;
diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h
index 4495680f5b29..f61dc3fbc780 100644
--- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h
+++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h
@@ -202,7 +202,7 @@ class ParallelParsingInputFormat : public IInputFormat
const size_t max_block_size;
BlockMissingValues last_block_missing_values;
- size_t last_approx_bytes_read_for_chunk;
+ size_t last_approx_bytes_read_for_chunk = 0;
/// Non-atomic because it is used in one thread.
std::optional