diff --git a/guides/locality-groups/index.html b/guides/locality-groups/index.html index a20c6a5..64a6bb2 100644 --- a/guides/locality-groups/index.html +++ b/guides/locality-groups/index.html @@ -55,13 +55,13 @@

Setup

All data is stored in the _dat_scan-example partition.

Ingest data

Let’s ingest some data and query it (body is truncated for brevity):

-
Terminal window
curl --request POST \
--url http://localhost:9876/v1/table/scan-example/write \
--header 'content-type: application/json' \
--data '{
"items": [
{
"row_key": "org.apache.spark",
"cells": [
{
"column_key": "title:",
"value": {
"String": "Apache Spark™ - Unified Engine for large-scale data analytics"
}
},
{
"column_key": "language:",
"value": {
"String": "EN"
}
}
]
},
{
"row_key": "org.apache.solr",
"cells": [
{
"column_key": "title:",
"value": {
"String": "Welcome to Apache Solr - Apache Solr"
}
},
{
"column_key": "language:",
"value": {
"String": "EN"
}
}
]
}
]
}'
+
Terminal window
curl --request POST \
--url http://localhost:9876/v1/table/scan-example/write \
--header 'content-type: application/json' \
--data '{
"items": [
{
"row_key": "org.apache.spark",
"cells": [
{
"column_key": "title:",
"type": "string",
"value": "Apache Spark™ - Unified Engine for large-scale data analytics"
},
{
"column_key": "language:",
"type": "string",
"value": "EN"
}
]
},
{
"row_key": "org.apache.solr",
"cells": [
{
"column_key": "title:",
"type": "string",
"value": "Welcome to Apache Solr - Apache Solr"
},
{
"column_key": "language:",
"type": "string",
"value": "EN"
}
]
}
]
}'

Query data

Let’s query our entire table using a scan with empty prefix, but only return the column title::

Terminal window
curl --request POST \
--url http://localhost:9876/v1/table/scan-example/scan \
--header 'content-type: application/json' \
--data '{
"row": {
"prefix": ""
},
"column": {
"key": "title:"
}
}'

Smoltable returns (again, body truncated for brevity):

-
{
"message": "Query successful",
"result": {
"affected_locality_groups": 2,
"bytes_scanned": 1141,
"cell_count": 8,
"cells_scanned": 16,
"micros_per_row": 18,
"row_count": 8,
"rows": [
{
"columns": {
"title": {
"": [
{
"timestamp": 1706197595375136143,
"value": {
"String": "Apache Cassandra | Apache Cassandra Documentation"
}
}
]
}
},
"row_key": "org.apache.cassandra"
}
],
"rows_scanned": 8
},
"status": 200,
"time_ms": 0
}
+
{
"message": "Query successful",
"result": {
"affected_locality_groups": 2,
"bytes_scanned": 1141,
"cell_count": 8,
"cells_scanned": 16,
"micros_per_row": 18,
"row_count": 8,
"rows": [
{
"columns": {
"title": {
"": [
{
"time": 1706197595375136143,
"type": "string",
"value": "Apache Cassandra | Apache Cassandra Documentation"
}
]
}
},
"row_key": "org.apache.cassandra"
}
],
"rows_scanned": 8
},
"status": 200,
"time_ms": 0
}

Note, how we scanned 1 KB of data, and 16 cells, but only returned 8 cells (because we filtered by the title column family). That means we have a read amplification of about 2.

Example: With locality groups

Setup

@@ -71,14 +71,14 @@

Setup

Terminal window
curl --request POST \
--url http://localhost:9876/v1/table/locality-example/column-family \
--header 'content-type: application/json' \
--data '{
"column_families": [
{
"name": "language"
}
]
}'
Terminal window
curl --request POST \
--url http://localhost:9876/v1/table/locality-example/column-family \
--header 'content-type: application/json' \
--data '{
"column_families": [
{
"name": "title"
}
],
"locality_group": true
}'

By listing our table, we can see the column families have been created, and title is moved into a locality group:

-
{
"message": "Tables retrieved successfully",
"result": {
"tables": {
"count": 1,
"items": [
{
"column_families": [
{
"gc_settings": {
"ttl_secs": null,
"version_limit": null
},
"name": "language"
},
{
"gc_settings": {
"ttl_secs": null,
"version_limit": null
},
"name": "title"
}
],
"disk_space_in_bytes": 0,
"locality_groups": [
{
"column_families": [
"title"
],
"id": "ur_pSQZ2QAYR6XsF9Xz0o"
}
],
"name": "locality-example",
"partitions": [
{
"name": "_man_locality-example",
"path": ".smoltable_data/partitions/_man_locality-example"
},
{
"name": "_dat_locality-example",
"path": ".smoltable_data/partitions/_dat_locality-example"
},
{
"name": "_lg_ur_pSQZ2QAYR6XsF9Xz0o",
"path": ".smoltable_data/partitions/_lg_ur_pSQZ2QAYR6XsF9Xz0o"
}
]
}
]
}
},
"status": 200,
"time_ms": 0
}
+
{
"message": "Tables retrieved successfully",
"result": {
"tables": {
"count": 1,
"items": [
{
"column_families": [
{
"gc_settings": {
"ttl_secs": null,
"version_limit": null
},
"name": "language"
},
{
"gc_settings": {
"ttl_secs": null,
"version_limit": null
},
"name": "title"
}
],
"disk_space_in_bytes": 0,
"locality_groups": [
{
"column_families": ["title"],
"id": "ur_pSQZ2QAYR6XsF9Xz0o"
}
],
"name": "locality-example",
"partitions": [
{
"name": "_man_locality-example",
"path": ".smoltable_data/partitions/_man_locality-example"
},
{
"name": "_dat_locality-example",
"path": ".smoltable_data/partitions/_dat_locality-example"
},
{
"name": "_lg_ur_pSQZ2QAYR6XsF9Xz0o",
"path": ".smoltable_data/partitions/_lg_ur_pSQZ2QAYR6XsF9Xz0o"
}
]
}
]
}
},
"status": 200,
"time_ms": 0
}

Column families that are not title are stored in the _dat_locality-example partition, and title data is moved into the _lg_ur_pSQZ2QAYR6XsF9Xz0o partition.

Ingest data

Ingest the same data as before into locality-example.

Query data

Terminal window
curl --request POST \
--url http://localhost:9876/v1/table/locality-example/scan \
--header 'content-type: application/json' \
--data '{
"row": {
"prefix": ""
},
"column": {
"key": "title:"
}
}'

which returns (truncated):

-
{
"message": "Query successful",
"result": {
"affected_locality_groups": 1,
"bytes_scanned": 681,
"cell_count": 8,
"cells_scanned": 8,
"micros_per_row": 18,
"row_count": 8,
"rows": [
{
"columns": {
"title": {
"": [
{
"timestamp": 1706198298766257607,
"value": {
"String": "Apache Cassandra | Apache Cassandra Documentation"
}
}
]
}
},
"row_key": "org.apache.cassandra"
}
],
"rows_scanned": 8
},
"status": 200,
"time_ms": 0
}
+
{
"message": "Query successful",
"result": {
"affected_locality_groups": 1,
"bytes_scanned": 681,
"cell_count": 8,
"cells_scanned": 8,
"micros_per_row": 18,
"row_count": 8,
"rows": [
{
"columns": {
"title": {
"": [
{
"time": 1706198298766257607,
"type": "string",
"value": "Apache Cassandra | Apache Cassandra Documentation"
}
]
}
},
"row_key": "org.apache.cassandra"
}
],
"rows_scanned": 8
},
"status": 200,
"time_ms": 0
}

We get the exact same result, however, we reduced scanned bytes down to 680 bytes, and halved scanned cells, achieving a read amplification of 1!

Example: Scanning another column family

Let’s scan the language column instead, which is still stored in the default partition.

diff --git a/guides/wide-column-intro/index.html b/guides/wide-column-intro/index.html index f2d2870..16ae8d4 100644 --- a/guides/wide-column-intro/index.html +++ b/guides/wide-column-intro/index.html @@ -38,13 +38,13 @@

which maps to some value, the cell value. The cell value, unlike in Bigtable, can be a certain type:

The timestamp allows storing multiple versions of the same cell.

diff --git a/pagefind/fragment/en_3e964c9.pf_fragment b/pagefind/fragment/en_3e964c9.pf_fragment new file mode 100644 index 0000000..01e5bb9 Binary files /dev/null and b/pagefind/fragment/en_3e964c9.pf_fragment differ diff --git a/pagefind/fragment/en_4341d75.pf_fragment b/pagefind/fragment/en_4341d75.pf_fragment new file mode 100644 index 0000000..c333a97 Binary files /dev/null and b/pagefind/fragment/en_4341d75.pf_fragment differ diff --git a/pagefind/fragment/en_5983553.pf_fragment b/pagefind/fragment/en_5983553.pf_fragment deleted file mode 100644 index 5e4fe91..0000000 Binary files a/pagefind/fragment/en_5983553.pf_fragment and /dev/null differ diff --git a/pagefind/fragment/en_63d921c.pf_fragment b/pagefind/fragment/en_63d921c.pf_fragment new file mode 100644 index 0000000..9dd92b4 Binary files /dev/null and b/pagefind/fragment/en_63d921c.pf_fragment differ diff --git a/pagefind/fragment/en_7f3284a.pf_fragment b/pagefind/fragment/en_7f3284a.pf_fragment new file mode 100644 index 0000000..440a774 Binary files /dev/null and b/pagefind/fragment/en_7f3284a.pf_fragment differ diff --git a/pagefind/fragment/en_be1582c.pf_fragment b/pagefind/fragment/en_be1582c.pf_fragment new file mode 100644 index 0000000..fa144a9 Binary files /dev/null and b/pagefind/fragment/en_be1582c.pf_fragment differ diff --git a/pagefind/fragment/en_c27e9e9.pf_fragment b/pagefind/fragment/en_c27e9e9.pf_fragment deleted file mode 100644 index 4d0f7cd..0000000 Binary files a/pagefind/fragment/en_c27e9e9.pf_fragment and /dev/null differ diff --git a/pagefind/fragment/en_c2e277c.pf_fragment b/pagefind/fragment/en_c2e277c.pf_fragment deleted file mode 100644 index a549583..0000000 Binary files a/pagefind/fragment/en_c2e277c.pf_fragment and /dev/null differ diff --git a/pagefind/fragment/en_f4feb32.pf_fragment b/pagefind/fragment/en_f4feb32.pf_fragment deleted file mode 100644 index c76802b..0000000 Binary files a/pagefind/fragment/en_f4feb32.pf_fragment and /dev/null differ diff --git a/pagefind/fragment/en_fd5bf06.pf_fragment b/pagefind/fragment/en_fd5bf06.pf_fragment deleted file mode 100644 index 74f6b97..0000000 Binary files a/pagefind/fragment/en_fd5bf06.pf_fragment and /dev/null differ diff --git a/pagefind/index/en_595d159.pf_index b/pagefind/index/en_595d159.pf_index deleted file mode 100644 index 74772e0..0000000 Binary files a/pagefind/index/en_595d159.pf_index and /dev/null differ diff --git a/pagefind/index/en_a0a4322.pf_index b/pagefind/index/en_a0a4322.pf_index new file mode 100644 index 0000000..5d90922 Binary files /dev/null and b/pagefind/index/en_a0a4322.pf_index differ diff --git a/pagefind/pagefind-entry.json b/pagefind/pagefind-entry.json index c0d46ff..698fa5c 100644 --- a/pagefind/pagefind-entry.json +++ b/pagefind/pagefind-entry.json @@ -1 +1 @@ -{"version":"1.0.4","languages":{"en":{"hash":"en_36c09d1723","wasm":"en","page_count":11}}} \ No newline at end of file +{"version":"1.0.4","languages":{"en":{"hash":"en_5a5433fd8e","wasm":"en","page_count":11}}} \ No newline at end of file diff --git a/pagefind/pagefind.en_36c09d1723.pf_meta b/pagefind/pagefind.en_36c09d1723.pf_meta deleted file mode 100644 index b6201f1..0000000 Binary files a/pagefind/pagefind.en_36c09d1723.pf_meta and /dev/null differ diff --git a/pagefind/pagefind.en_5a5433fd8e.pf_meta b/pagefind/pagefind.en_5a5433fd8e.pf_meta new file mode 100644 index 0000000..a9d5ed4 Binary files /dev/null and b/pagefind/pagefind.en_5a5433fd8e.pf_meta differ diff --git a/reference/json-api/ingest-data/index.html b/reference/json-api/ingest-data/index.html index 6ae244d..67690cb 100644 --- a/reference/json-api/ingest-data/index.html +++ b/reference/json-api/ingest-data/index.html @@ -32,6 +32,6 @@

Ingest data

URL

POST http://smoltable:9876/v1/table/[name]/write

Example body

-
{
"items": [
{
"row_key": "org.apache.spark",
"cells": [
{
"column_key": "title:",
"value": {
"String": "Apache Spark™ - Unified Engine for large-scale data analytics"
}
},
{
"column_key": "anchor:org.apache.hbase",
"value": {
"String": "Visit Apache Spark"
}
},
{
"column_key": "meta:size",
"value": {
"I64": 152014
}
},
]
}
]
}
+
{
"items": [
{
"row_key": "org.apache.spark",
"cells": [
{
"column_key": "title:",
"type": "string",
"value": "Apache Spark™ - Unified Engine for large-scale data analytics"
},
{
"column_key": "anchor:org.apache.hbase",
"type": "string",
"value": "Visit Apache Spark"
},
{
"column_key": "meta:size",
"type": "i64",
"value": 152014
}
]
}
]
}

Example response

{
"message": "Data ingestion successful",
"result": {
"items": {
"cell_count": 3,
"row_count": 1
},
"micros_per_item": 5
},
"status": 200,
"time_ms": 0
}
\ No newline at end of file diff --git a/reference/json-api/retrieve-rows/index.html b/reference/json-api/retrieve-rows/index.html index b02fc2d..d35272d 100644 --- a/reference/json-api/retrieve-rows/index.html +++ b/reference/json-api/retrieve-rows/index.html @@ -34,13 +34,13 @@

Example body

{
"items": [
{
"row": {
"key": "org.apache.spark"
}
}
]
}

Example response

-
{
"message": "Query successful",
"result": {
"bytes_scanned": 124,
"cells_scanned": 1,
"micros": 23,
"micros_per_row": 23,
"rows": [
{
"columns": {
"title": {
"": [
{
"timestamp": 0,
"value": {
"String": "Apache Spark"
}
}
]
}
},
"row_key": "org.apache.spark"
}
],
"rows_scanned": 1
},
"status": 200,
"time_ms": 0
}
+
{
"message": "Query successful",
"result": {
"bytes_scanned": 124,
"cells_scanned": 1,
"micros": 23,
"micros_per_row": 23,
"rows": [
{
"columns": {
"title": {
"": [
{
"time": 0,
"type": "string",
"value": "Apache Spark"
}
]
}
},
"row_key": "org.apache.spark"
}
],
"rows_scanned": 1
},
"status": 200,
"time_ms": 0
}

Filter by column family

{
"items": [
{
"row": {
"key": "org.apache.spark"
},
"column": {
"key": "anchor:"
}
}
]
}

Filter by column

{
"items": [
{
"row": {
"key": "org.apache.spark"
},
"column": {
"key": "anchor:com.apache.solr"
}
}
]
}

Filter by multiple columns

-
{
"items": [
{
"row": {
"key": "org.apache.spark"
},
"column": {
"multi_key": [
"anchor:com.apache.solr",
"anchor:com.apache.hbase"
]
}
}
]
}
+
{
"items": [
{
"row": {
"key": "org.apache.spark"
},
"column": {
"multi_key": ["anchor:com.apache.solr", "anchor:com.apache.hbase"]
}
}
]
}

Filter by column qualifier prefix

{
"items": [
{
"row": {
"key": "org.apache.spark"
},
"column": {
"prefix": "anchor:com."
}
}
]
}

Limit returned cell versions per column

diff --git a/reference/json-api/scan-rows/index.html b/reference/json-api/scan-rows/index.html index d013540..22937c3 100644 --- a/reference/json-api/scan-rows/index.html +++ b/reference/json-api/scan-rows/index.html @@ -34,13 +34,13 @@

Example body

{
"items": [
{
"row": {
"prefix": "org."
}
}
]
}

Example response

-
{
"message": "Query successful",
"result": {
"bytes_scanned": 124124,
"cells_scanned": 2,
"micros": 100,
"micros_per_row": 50,
"rows": [
{
"columns": {
"title": {
"": [
{
"timestamp": 0,
"value": {
"String": "Apache Solr"
}
}
]
}
},
"row_key": "org.apache.solr"
},
{
"columns": {
"title": {
"": [
{
"timestamp": 0,
"value": {
"String": "Apache Spark"
}
}
]
}
},
"row_key": "org.apache.spark"
}
],
"rows_scanned": 2
},
"status": 200,
"time_ms": 0
}
+
{
"message": "Query successful",
"result": {
"bytes_scanned": 124124,
"cells_scanned": 2,
"micros": 100,
"micros_per_row": 50,
"rows": [
{
"columns": {
"title": {
"": [
{
"time": 0,
"type": "string",
"value": "Apache Solr"
}
]
}
},
"row_key": "org.apache.solr"
},
{
"columns": {
"title": {
"": [
{
"time": 0,
"type": "string",
"value": "Apache Spark"
}
]
}
},
"row_key": "org.apache.spark"
}
],
"rows_scanned": 2
},
"status": 200,
"time_ms": 0
}

Filter by column family

{
"items": [
{
"row": {
"prefix": "org.apache."
},
"column": {
"key": "anchor:"
}
}
]
}

Filter by column

{
"items": [
{
"row": {
"prefix": "org.apache."
},
"column": {
"key": "anchor:com.apache.solr"
}
}
]
}

Filter by multiple columns

-
{
"items": [
{
"row": {
"prefix": "org.apache."
},
"column": {
"multi_key": [
"anchor:com.apache.solr",
"anchor:com.apache.hbase"
]
}
}
]
}
+
{
"items": [
{
"row": {
"prefix": "org.apache."
},
"column": {
"multi_key": ["anchor:com.apache.solr", "anchor:com.apache.hbase"]
}
}
]
}

Filter by column qualifier prefix

{
"items": [
{
"row": {
"prefix": "org.apache."
},
"column": {
"prefix": "anchor:com."
}
}
]
}

Limit returned cell versions per column