From eeb6d3bdb5ba64ccb16c3ef17b6c95749fc9c705 Mon Sep 17 00:00:00 2001 From: Dorota Wojcik Date: Wed, 18 Dec 2024 11:39:01 +0100 Subject: [PATCH] draft --- .../concepts/clickhouse-tiered-storage.md | 14 ++++- .../howto/local-cache-tiered-storage.md | 62 +++++++++++++++++++ sidebars.ts | 1 + 3 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 docs/products/clickhouse/howto/local-cache-tiered-storage.md diff --git a/docs/products/clickhouse/concepts/clickhouse-tiered-storage.md b/docs/products/clickhouse/concepts/clickhouse-tiered-storage.md index c42af4ee..384a66d6 100644 --- a/docs/products/clickhouse/concepts/clickhouse-tiered-storage.md +++ b/docs/products/clickhouse/concepts/clickhouse-tiered-storage.md @@ -10,6 +10,8 @@ The tiered storage feature introduces a method of organizing and storing data in On top of this default data allocation mechanism, you can control the tier your data is stored in using custom data retention periods. +## Tiered storage architecture + The tiered storage in Aiven for ClickHouse® consists of the following two layers: @@ -18,12 +20,22 @@ layers: - Object storage - the second tier: Affordable storage device with unlimited capability, better suited for historical and more rarely queried data, relatively slower +Aiven for ClickHouse's tiered storage supports +[local on-disk cache for remote files](/docs/products/clickhouse/howto/local-cache-tiered-storage), +which is enabled by default. You can +[disable the cache](/docs/products/clickhouse/howto/local-cache-tiered-storage#disable-the-cache) +or +[drop it](/docs/products/clickhouse/howto/local-cache-tiered-storage#free-up-space) to free +up the space it occupies. + +## Supported cloud platforms + On the Aiven tenant (in non-[BYOC](/docs/platform/concepts/byoc) environments), Aiven for ClickHouse tiered storage is supported on the following cloud platforms: - Microsoft Azure - Amazon Web Services (AWS) -- Google Cloud Platform (GCP) +- Google Cloud ## Why use it diff --git a/docs/products/clickhouse/howto/local-cache-tiered-storage.md b/docs/products/clickhouse/howto/local-cache-tiered-storage.md new file mode 100644 index 00000000..6dccb30d --- /dev/null +++ b/docs/products/clickhouse/howto/local-cache-tiered-storage.md @@ -0,0 +1,62 @@ +--- +title: Local on-disk cache for remote files in Aiven for ClickHouse®'s tiered storage +sidebar_label: Local on-disk cache for remote files +--- + +When data is not found in the local storage tier, Aiven for ClickHouse fetches it from the remote tier. To avoid repeated remote fetches, it uses an on-disk cache. + +To manage data, Aiven for ClickHouse's tiered storage uses local storage and remote storage. +When remote storage is used, Aiven for ClickHouse leverages a local on-disk cache to avoid +repeated remote fetches. This improves query performance and reduces latency and costs. + +## How it works + +When a query requires parts of a table stored in the remote tier, Aiven for ClickHouse +fetches the required parts from the remote storage. The fetched parts are automatically +stored in a local cache directory on the disk to avoid repeated downloads for subsequent +queries. For future queries, Aiven for ClickHouse checks the local cache first: + +- If the data is found in the cache, it is read directly from the local disk. +- If the data is not found in the cache, it is fetched from the remote storage and stored + in the local cache. + +Local on-disk cache for remote files is enabled by default for Aiven for ClickHouse's +tiered storage. You can +[disable the cache](/docs/products/clickhouse/howto/local-cache-tiered-storage#disable-the-cache) +or +[drop it](/docs/products/clickhouse/howto/local-cache-tiered-storage#free-up-space) to +free up the space it occupies. + +## Prerequisites + +- At least one Aiven for ClickHouse service +- Command line tool + ([ClickHouse client](/docs/products/clickhouse/howto/connect-with-clickhouse-cli)) + installed + +## Disable the cache + +To disable the local cache for a query, set the `enable_filesystem_cache` setting for the +query to `false`. +You can achieve this by appending `SETTINGS enable_filesystem_cache = false` to the end of +your query using an SQL client (for example, the +[ClickHouse client](/docs/products/clickhouse/howto/connect-with-clickhouse-cli)): + +```sql +SELECT 1 +SETTINGS enable_filesystem_cache = false; +``` + +## Free up space + +To drop the local cache and free up the used space, use the following cache command: + +```bash +SYSTEM DROP FILESYSTEM CACHE 'remote_cache' +``` + +## Related pages + +- [About tiered storage in Aiven for ClickHouse](/docs/products/clickhouse/concepts/clickhouse-tiered-storage) +- [Check data distribution between SSD and object storage](/docs/products/clickhouse/howto/check-data-tiered-storage) +- [Configure data retention thresholds for tiered storage](/docs/products/clickhouse/howto/configure-tiered-storage) diff --git a/sidebars.ts b/sidebars.ts index 2fd50292..2d8b3354 100644 --- a/sidebars.ts +++ b/sidebars.ts @@ -1301,6 +1301,7 @@ const sidebars: SidebarsConfig = { 'products/clickhouse/howto/configure-tiered-storage', 'products/clickhouse/howto/check-data-tiered-storage', 'products/clickhouse/howto/transfer-data-tiered-storage', + 'products/clickhouse/howto/local-cache-tiered-storage', ], }, ],