From d77ab7a0c283996c98d51423e151a3f1f39dfc5d Mon Sep 17 00:00:00 2001 From: Yiran Date: Thu, 19 Sep 2024 16:08:58 +0800 Subject: [PATCH] move remote WAL under the disaster recovery --- docs/user-guide/operations/admin.md | 2 +- .../operations/disaster-recovery/overview.md | 3 ++- .../remote-wal/cluster-deployment.md | 0 .../remote-wal/quick-start.md | 0 docs/user-guide/operations/overview.md | 1 - i18n/zh/docusaurus-plugin-content-docs/current.json | 2 +- .../current/user-guide/operations/admin.md | 2 +- .../operations/disaster-recovery/overview.md | 3 ++- .../remote-wal/cluster-deployment.md | 0 .../remote-wal/quick-start.md | 4 ++-- .../current/user-guide/operations/overview.md | 1 - sidebars.ts | 12 +++++++----- 12 files changed, 16 insertions(+), 14 deletions(-) rename docs/user-guide/operations/{ => disaster-recovery}/remote-wal/cluster-deployment.md (100%) rename docs/user-guide/operations/{ => disaster-recovery}/remote-wal/quick-start.md (100%) rename i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/{ => disaster-recovery}/remote-wal/cluster-deployment.md (100%) rename i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/{ => disaster-recovery}/remote-wal/quick-start.md (98%) diff --git a/docs/user-guide/operations/admin.md b/docs/user-guide/operations/admin.md index 9ca956d67..1cc8f1bdb 100644 --- a/docs/user-guide/operations/admin.md +++ b/docs/user-guide/operations/admin.md @@ -36,7 +36,7 @@ The `INFORMATION_SCHEMA` database provides access to system metadata, such as th ## Data management * [The Storage Location](/user-guide/concepts/storage-location.md). -* Cluster Failover for GreptimeDB by [Setting Remote WAL](./remote-wal/quick-start.md). +* Cluster Failover for GreptimeDB by [Setting Remote WAL](/user-guide/operations/disaster-recovery/remote-wal/quick-start.md). * [Flush and Compaction for Table & Region](/reference/sql/admin.md#admin-functions). * Partition the table by regions, read the [Table Sharding](/contributor-guide/frontend/table-sharding.md) reference. * [Migrate the Region](./region-migration.md) for Load Balance. diff --git a/docs/user-guide/operations/disaster-recovery/overview.md b/docs/user-guide/operations/disaster-recovery/overview.md index 5f50ab910..1345f06ab 100644 --- a/docs/user-guide/operations/disaster-recovery/overview.md +++ b/docs/user-guide/operations/disaster-recovery/overview.md @@ -36,7 +36,7 @@ Before digging into the specific DR solution, let's explain the architecture of GreptimeDB is designed with a cloud-native architecture based on storage-compute separation: * **Frontend**: the ingestion and query service layer, which forwards requests to Datanode and processes, and merges responses from Datanode. * **Datanode**: the storage layer of GreptimeDB, and is an LSM storage engine. Region is the basic unit for storing and scheduling data in Datanode. A region is a table partition, a collection of data rows. The data in region is saved into Object Storage (such as AWS S3). Unflushed Memtable data is written into WAL and can be recovered in DR. -* **WAL**: persists the unflushed Memtable data in memory. It will be truncated when the Memtable is flushed into SSTable files. It can be local disk-based (local WAL) or Kafka cluster-based (remote WAL). +* **WAL**: persists the unflushed Memtable data in memory. It will be truncated when the Memtable is flushed into SSTable files. It can be local disk-based (local WAL) or [Kafka cluster-based (remote WAL)](./remote-wal/quick-start.md). * **Object Storage**: persists the SSTable data and index. The GreptimeDB stores data in object storage such as [AWS S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/DataDurability.html) or its compatible services, which is designed to provide 99.999999999% durability and 99.99% availability of objects over a given year. And services such as S3 provide [replications in Single-Region or Cross-Region](https://docs.aws.amazon.com/AmazonS3/latest/userguide/replication.html), which is naturally capable of DR. @@ -118,6 +118,7 @@ By comparing these DR solutions, you can decide on the final option based on the ## References +* [Remote WAL](./remote-wal/quick-start.md) * [Backup & restore data](./back-up-&-restore-data.md) * [DR solution for GreptimeDB Standalone](./dr-solution-for-standalone.md) * [DR solution based on Active-Active Failover ](./dr-solution-based-on-active-active-failover.md) diff --git a/docs/user-guide/operations/remote-wal/cluster-deployment.md b/docs/user-guide/operations/disaster-recovery/remote-wal/cluster-deployment.md similarity index 100% rename from docs/user-guide/operations/remote-wal/cluster-deployment.md rename to docs/user-guide/operations/disaster-recovery/remote-wal/cluster-deployment.md diff --git a/docs/user-guide/operations/remote-wal/quick-start.md b/docs/user-guide/operations/disaster-recovery/remote-wal/quick-start.md similarity index 100% rename from docs/user-guide/operations/remote-wal/quick-start.md rename to docs/user-guide/operations/disaster-recovery/remote-wal/quick-start.md diff --git a/docs/user-guide/operations/overview.md b/docs/user-guide/operations/overview.md index e13f12f68..b5e8c68be 100644 --- a/docs/user-guide/operations/overview.md +++ b/docs/user-guide/operations/overview.md @@ -5,5 +5,4 @@ * [Disaster Recovery](./disaster-recovery/overview.md) * [Monitoring](./monitoring.md) * [Tracing](./tracing.md) -* [Remote WAL](./remote-wal/quick-start.md) * [Region Migration](./region-migration.md) diff --git a/i18n/zh/docusaurus-plugin-content-docs/current.json b/i18n/zh/docusaurus-plugin-content-docs/current.json index ba5fc9818..2fac0de3d 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current.json +++ b/i18n/zh/docusaurus-plugin-content-docs/current.json @@ -64,7 +64,7 @@ "description": "The label for category Disaster Recovery in sidebar docs" }, "sidebar.docs.category.Remote WAL": { - "message": "Remote WAL", + "message": "远程 WAL", "description": "The label for category Remote WAL in sidebar docs" }, "sidebar.docs.category.GreptimeCloud": { diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/admin.md b/i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/admin.md index e73f895b0..ada8c7b06 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/admin.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/admin.md @@ -36,7 +36,7 @@ ORDER BY datanode_id ASC ## 数据管理 * [存储位置说明](/user-guide/concepts/storage-location.md)。 -* 通过 [设置Remote WAL](./remote-wal/quick-start.md) 实现 GreptimeDB 的集群容灾。 +* 通过 [设置Remote WAL](/user-guide/operations/disaster-recovery/remote-wal/quick-start.md) 实现 GreptimeDB 的集群容灾。 * [Table 和 Region 的 Flush 和 Compaction](/reference/sql/admin.md##管理函数)。 * 通过 Region 对表进行分区,请阅读 [表的分片](/contributor-guide/frontend/table-sharding.md) 参考。 * [迁移 Region](./region-migration.md) 以实现负载均衡。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/disaster-recovery/overview.md b/i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/disaster-recovery/overview.md index 837884495..5357de3f7 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/disaster-recovery/overview.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/disaster-recovery/overview.md @@ -42,7 +42,7 @@ GreptimeDB 基于存储计算分离的云原生架构设计: * **Frontend**:数据插入和查询的服务层,将请求转发到 Datanode 并处理和合并 Datanode 的响应。 * **Datanode**:GreptimeDB 的存储层,是一个 LSM 存储引擎。Region 是在 Datanode 中存储和调度数据的基本单元。Region 是一个表分区,是一组数据行的集合。Region 中的数据保存在对象存储中(例如 AWS S3)。未刷新的 Memtable 数据被写入 WAL,并可以在灾难发生时恢复。 -* **WAL**:持久化内存中未刷新的 Memtable 数据。当 Memtable 被刷新到 SSTable 文件时,WAL 将被截断。它可以是基于本地磁盘的(本地 WAL)或基于 Kafka 集群的(远程 WAL)。 +* **WAL**:持久化内存中未刷新的 Memtable 数据。当 Memtable 被刷新到 SSTable 文件时,WAL 将被截断。它可以是基于本地磁盘的(本地 WAL)或[基于 Kafka 集群的(远程 WAL)](./remote-wal/quick-start.md)。 * **对象存储**:持久化 SSTable 数据和索引。 GreptimeDB 将数据存储在对象存储(如 [AWS S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/DataDurability.html))或兼容的服务中,这些服务在年度范围内提供了 99.999999999% 的持久性和 99.99% 的可用性。像 S3 这样的服务提供了[单区域或跨区域的复制](https://docs.aws.amazon.com/AmazonS3/latest/userguide/replication.html),天然具备灾难恢复能力。 @@ -135,6 +135,7 @@ BR 进程持续定期将数据从 Cluster 1 备份到 Region 2。 ## 参考资料 +* [远程 WAL](./remote-wal/quick-start.md) * [备份与恢复数据](./back-up-&-restore-data.md) * [GreptimeDB Standalone 的 DR 解决方案](./dr-solution-for-standalone.md) * [基于双活-备份的 DR 解决方案](./dr-solution-based-on-active-active-failover.md) diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/remote-wal/cluster-deployment.md b/i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/disaster-recovery/remote-wal/cluster-deployment.md similarity index 100% rename from i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/remote-wal/cluster-deployment.md rename to i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/disaster-recovery/remote-wal/cluster-deployment.md diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/remote-wal/quick-start.md b/i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/disaster-recovery/remote-wal/quick-start.md similarity index 98% rename from i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/remote-wal/quick-start.md rename to i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/disaster-recovery/remote-wal/quick-start.md index 652eeff56..174ccff34 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/remote-wal/quick-start.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/disaster-recovery/remote-wal/quick-start.md @@ -1,13 +1,13 @@ # 快速开始 -## 什么是 Remote WAL +## 什么是远程 WAL(Remote WAL) [WAL](/contributor-guide/datanode/wal.md#introduction)(Write-Ahead Logging) 是 GreptimeDB 中的一个关键组件,它持久记录每一次数据修改,以确保不会丢失缓存在内存中的数据。我们在 [Datanode](/user-guide/concepts/why-greptimedb.md) 服务中用持久的嵌入式存储引擎 [raft-engine](https://github.com/tikv/raft-engine) 将 WAL 实现为一个模块。在公共云中部署 GreptimeDB 时,我们可以在云存储(AWS EBS、GCP 持久盘等)中持久存储 WAL 数据,以实现 0 RPO。然而,由于 WAL 与 Datanode 紧密耦合,导致部署过程中的 RTO(Recovery Time Objective)较长。此外,由于 raft-engine 无法支持多日志订阅,这使得实现 region 热备份和 region 迁移变得困难。 为了解决上述问题,我们决定设计并实现一个远程 WAL。远程 WAL 将 WAL 从 Datanode 分离到远程服务,我们选择了 Apache Kafka 作为远程服务。Apache Kafka 在流处理中被广泛采用,展现出卓越的分布式容错能力和基于主题的订阅机制。在发布 v0.5.0 版本时,我们引入了 Apache Kafka 作为 WAL 的可选存储引擎。 -## 运行带有 Remote WAL 的 Standalone GreptimeDB +## 运行带有远程 WAL 的 Standalone GreptimeDB 通过以下步骤使用 Docker 体验远程 WAL 非常简单。在这个快速开始中,我们将创建一个采用 KRaft 模式的 Kafka 集群,并将其作为独立 GreptimeDB 的远程 WAL。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/overview.md b/i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/overview.md index 2305cb643..2d5c50c00 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/overview.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/user-guide/operations/overview.md @@ -5,5 +5,4 @@ * [灾难恢复](./disaster-recovery/overview.md) * [监控](./monitoring.md) * [Tracing](./tracing.md) -* [Remote WAL](./remote-wal/quick-start.md) * [Region 迁移](./region-migration.md) diff --git a/sidebars.ts b/sidebars.ts index 7a25bb4ca..e201fa143 100644 --- a/sidebars.ts +++ b/sidebars.ts @@ -185,16 +185,18 @@ const sidebars: SidebarsConfig = { label: 'Disaster Recovery', items: [ 'user-guide/operations/disaster-recovery/overview', + { + type: 'category', + label: 'Remote WAL', + items: [ + 'user-guide/operations/disaster-recovery/remote-wal/quick-start', + 'user-guide/operations/disaster-recovery/remote-wal/cluster-deployment'], + }, 'user-guide/operations/disaster-recovery/back-up-&-restore-data', 'user-guide/operations/disaster-recovery/dr-solution-based-on-active-active-failover', 'user-guide/operations/disaster-recovery/dr-solution-based-on-cross-region-deployment-in-single-cluster', ], }, - { - type: 'category', - label: 'Remote WAL', - items: ['user-guide/operations/remote-wal/quick-start', 'user-guide/operations/remote-wal/cluster-deployment'], - }, 'user-guide/operations/region-migration', 'user-guide/operations/region-failover', 'user-guide/operations/compaction',