diff --git a/guides/locality-groups/index.html b/guides/locality-groups/index.html
index a20c6a5..64a6bb2 100644
--- a/guides/locality-groups/index.html
+++ b/guides/locality-groups/index.html
@@ -55,13 +55,13 @@
Setup
All data is stored in the _dat_scan-example
partition.
Ingest data
Let’s ingest some data and query it (body is truncated for brevity):
-curl --request POST \
--url http://localhost:9876/v1/table/scan-example/write \
--header ' content-type: application/json ' \
--data ' {
"items": [
{
"row_key": "org.apache.spark",
"cells": [
{
"column_key": "title:",
"value": {
"String": "Apache Spark™ - Unified Engine for large-scale data analytics"
}
},
{
"column_key": "language:",
"value": {
"String": "EN"
}
}
]
},
{
"row_key": "org.apache.solr",
"cells": [
{
"column_key": "title:",
"value": {
"String": "Welcome to Apache Solr - Apache Solr"
}
},
{
"column_key": "language:",
"value": {
"String": "EN"
}
}
]
}
]
} '
+curl --request POST \
--url http://localhost:9876/v1/table/scan-example/write \
--header ' content-type: application/json ' \
--data ' {
"items": [
{
"row_key": "org.apache.spark",
"cells": [
{
"column_key": "title:",
"type": "string",
"value": "Apache Spark™ - Unified Engine for large-scale data analytics"
},
{
"column_key": "language:",
"type": "string",
"value": "EN"
}
]
},
{
"row_key": "org.apache.solr",
"cells": [
{
"column_key": "title:",
"type": "string",
"value": "Welcome to Apache Solr - Apache Solr"
},
{
"column_key": "language:",
"type": "string",
"value": "EN"
}
]
}
]
} '
Query data
Let’s query our entire table using a scan with empty prefix, but
only return the column title:
:
curl --request POST \
--url http://localhost:9876/v1/table/scan-example/scan \
--header ' content-type: application/json ' \
--data ' {
"row": {
"prefix": ""
},
"column": {
"key": "title:"
}
} '
Smoltable returns (again, body truncated for brevity):
-{
"message" : " Query successful " ,
"result" : {
"affected_locality_groups" : 2 ,
"bytes_scanned" : 1141 ,
"cell_count" : 8 ,
"cells_scanned" : 16 ,
"micros_per_row" : 18 ,
"row_count" : 8 ,
"rows" : [
{
"columns" : {
"title" : {
"" : [
{
"timestamp" : 1706197595375136143 ,
"value" : {
"String" : " Apache Cassandra | Apache Cassandra Documentation "
}
}
]
}
},
"row_key" : " org.apache.cassandra "
}
],
"rows_scanned" : 8
},
"status" : 200 ,
"time_ms" : 0
}
+{
"message" : " Query successful " ,
"result" : {
"affected_locality_groups" : 2 ,
"bytes_scanned" : 1141 ,
"cell_count" : 8 ,
"cells_scanned" : 16 ,
"micros_per_row" : 18 ,
"row_count" : 8 ,
"rows" : [
{
"columns" : {
"title" : {
"" : [
{
"time" : 1706197595375136143 ,
"type" : " string " ,
"value" : " Apache Cassandra | Apache Cassandra Documentation "
}
]
}
},
"row_key" : " org.apache.cassandra "
}
],
"rows_scanned" : 8
},
"status" : 200 ,
"time_ms" : 0
}
Note, how we scanned 1 KB of data, and 16 cells, but only returned 8 cells (because we filtered by the title
column family). That means we have a read amplification of about 2
.
Example: With locality groups
Setup
@@ -71,14 +71,14 @@ Setup
curl --request POST \
--url http://localhost:9876/v1/table/locality-example/column-family \
--header ' content-type: application/json ' \
--data ' {
"column_families": [
{
"name": "language"
}
]
} '
curl --request POST \
--url http://localhost:9876/v1/table/locality-example/column-family \
--header ' content-type: application/json ' \
--data ' {
"column_families": [
{
"name": "title"
}
],
"locality_group": true
} '
By listing our table, we can see the column families have been created, and title
is moved into a locality group:
-{
"message" : " Tables retrieved successfully " ,
"result" : {
"tables" : {
"count" : 1 ,
"items" : [
{
"column_families" : [
{
"gc_settings" : {
"ttl_secs" : null ,
"version_limit" : null
},
"name" : " language "
},
{
"gc_settings" : {
"ttl_secs" : null ,
"version_limit" : null
},
"name" : " title "
}
],
"disk_space_in_bytes" : 0 ,
"locality_groups" : [
{
"column_families" : [
" title "
],
"id" : " ur_pSQZ2QAYR6XsF9Xz0o "
}
],
"name" : " locality-example " ,
"partitions" : [
{
"name" : " _man_locality-example " ,
"path" : " .smoltable_data/partitions/_man_locality-example "
},
{
"name" : " _dat_locality-example " ,
"path" : " .smoltable_data/partitions/_dat_locality-example "
},
{
"name" : " _lg_ur_pSQZ2QAYR6XsF9Xz0o " ,
"path" : " .smoltable_data/partitions/_lg_ur_pSQZ2QAYR6XsF9Xz0o "
}
]
}
]
}
},
"status" : 200 ,
"time_ms" : 0
}
+{
"message" : " Tables retrieved successfully " ,
"result" : {
"tables" : {
"count" : 1 ,
"items" : [
{
"column_families" : [
{
"gc_settings" : {
"ttl_secs" : null ,
"version_limit" : null
},
"name" : " language "
},
{
"gc_settings" : {
"ttl_secs" : null ,
"version_limit" : null
},
"name" : " title "
}
],
"disk_space_in_bytes" : 0 ,
"locality_groups" : [
{
"column_families" : [ " title " ],
"id" : " ur_pSQZ2QAYR6XsF9Xz0o "
}
],
"name" : " locality-example " ,
"partitions" : [
{
"name" : " _man_locality-example " ,
"path" : " .smoltable_data/partitions/_man_locality-example "
},
{
"name" : " _dat_locality-example " ,
"path" : " .smoltable_data/partitions/_dat_locality-example "
},
{
"name" : " _lg_ur_pSQZ2QAYR6XsF9Xz0o " ,
"path" : " .smoltable_data/partitions/_lg_ur_pSQZ2QAYR6XsF9Xz0o "
}
]
}
]
}
},
"status" : 200 ,
"time_ms" : 0
}
Column families that are not title
are stored in the _dat_locality-example
partition, and title
data is moved into the _lg_ur_pSQZ2QAYR6XsF9Xz0o
partition.
Ingest data
Ingest the same data as before into locality-example
.
Query data
curl --request POST \
--url http://localhost:9876/v1/table/locality-example/scan \
--header ' content-type: application/json ' \
--data ' {
"row": {
"prefix": ""
},
"column": {
"key": "title:"
}
} '
which returns (truncated):
-{
"message" : " Query successful " ,
"result" : {
"affected_locality_groups" : 1 ,
"bytes_scanned" : 681 ,
"cell_count" : 8 ,
"cells_scanned" : 8 ,
"micros_per_row" : 18 ,
"row_count" : 8 ,
"rows" : [
{
"columns" : {
"title" : {
"" : [
{
"timestamp" : 1706198298766257607 ,
"value" : {
"String" : " Apache Cassandra | Apache Cassandra Documentation "
}
}
]
}
},
"row_key" : " org.apache.cassandra "
}
],
"rows_scanned" : 8
},
"status" : 200 ,
"time_ms" : 0
}
+{
"message" : " Query successful " ,
"result" : {
"affected_locality_groups" : 1 ,
"bytes_scanned" : 681 ,
"cell_count" : 8 ,
"cells_scanned" : 8 ,
"micros_per_row" : 18 ,
"row_count" : 8 ,
"rows" : [
{
"columns" : {
"title" : {
"" : [
{
"time" : 1706198298766257607 ,
"type" : " string " ,
"value" : " Apache Cassandra | Apache Cassandra Documentation "
}
]
}
},
"row_key" : " org.apache.cassandra "
}
],
"rows_scanned" : 8
},
"status" : 200 ,
"time_ms" : 0
}
We get the exact same result, however, we reduced scanned bytes down to 680 bytes, and halved scanned cells, achieving a read amplification of 1
!
Example: Scanning another column family
Let’s scan the language
column instead, which is still stored in the default partition.
diff --git a/guides/wide-column-intro/index.html b/guides/wide-column-intro/index.html
index f2d2870..16ae8d4 100644
--- a/guides/wide-column-intro/index.html
+++ b/guides/wide-column-intro/index.html
@@ -38,13 +38,13 @@
which maps to some value, the cell value
. The cell value, unlike in Bigtable, can be a certain type:
-String (UTF-8 encoded string)
-Boolean (like Byte, but is unmarshalled as boolean)
-Byte (unsigned integer, 1 byte)
-I32 (signed integer, 4 bytes)
-I64 (signed integer, 8 bytes)
-F32 (floating point, 4 bytes)
-F64 (floating point, 8 bytes)
+string (UTF-8 encoded string)
+boolean (like Byte, but is unmarshalled as boolean)
+byte (unsigned integer, 1 byte)
+i32 (signed integer, 4 bytes)
+i64 (signed integer, 8 bytes)
+f32 (floating point, 4 bytes)
+f64 (floating point, 8 bytes)
The timestamp allows storing multiple versions of the same cell.
diff --git a/pagefind/fragment/en_3e964c9.pf_fragment b/pagefind/fragment/en_3e964c9.pf_fragment
new file mode 100644
index 0000000..01e5bb9
Binary files /dev/null and b/pagefind/fragment/en_3e964c9.pf_fragment differ
diff --git a/pagefind/fragment/en_4341d75.pf_fragment b/pagefind/fragment/en_4341d75.pf_fragment
new file mode 100644
index 0000000..c333a97
Binary files /dev/null and b/pagefind/fragment/en_4341d75.pf_fragment differ
diff --git a/pagefind/fragment/en_5983553.pf_fragment b/pagefind/fragment/en_5983553.pf_fragment
deleted file mode 100644
index 5e4fe91..0000000
Binary files a/pagefind/fragment/en_5983553.pf_fragment and /dev/null differ
diff --git a/pagefind/fragment/en_63d921c.pf_fragment b/pagefind/fragment/en_63d921c.pf_fragment
new file mode 100644
index 0000000..9dd92b4
Binary files /dev/null and b/pagefind/fragment/en_63d921c.pf_fragment differ
diff --git a/pagefind/fragment/en_7f3284a.pf_fragment b/pagefind/fragment/en_7f3284a.pf_fragment
new file mode 100644
index 0000000..440a774
Binary files /dev/null and b/pagefind/fragment/en_7f3284a.pf_fragment differ
diff --git a/pagefind/fragment/en_be1582c.pf_fragment b/pagefind/fragment/en_be1582c.pf_fragment
new file mode 100644
index 0000000..fa144a9
Binary files /dev/null and b/pagefind/fragment/en_be1582c.pf_fragment differ
diff --git a/pagefind/fragment/en_c27e9e9.pf_fragment b/pagefind/fragment/en_c27e9e9.pf_fragment
deleted file mode 100644
index 4d0f7cd..0000000
Binary files a/pagefind/fragment/en_c27e9e9.pf_fragment and /dev/null differ
diff --git a/pagefind/fragment/en_c2e277c.pf_fragment b/pagefind/fragment/en_c2e277c.pf_fragment
deleted file mode 100644
index a549583..0000000
Binary files a/pagefind/fragment/en_c2e277c.pf_fragment and /dev/null differ
diff --git a/pagefind/fragment/en_f4feb32.pf_fragment b/pagefind/fragment/en_f4feb32.pf_fragment
deleted file mode 100644
index c76802b..0000000
Binary files a/pagefind/fragment/en_f4feb32.pf_fragment and /dev/null differ
diff --git a/pagefind/fragment/en_fd5bf06.pf_fragment b/pagefind/fragment/en_fd5bf06.pf_fragment
deleted file mode 100644
index 74f6b97..0000000
Binary files a/pagefind/fragment/en_fd5bf06.pf_fragment and /dev/null differ
diff --git a/pagefind/index/en_595d159.pf_index b/pagefind/index/en_595d159.pf_index
deleted file mode 100644
index 74772e0..0000000
Binary files a/pagefind/index/en_595d159.pf_index and /dev/null differ
diff --git a/pagefind/index/en_a0a4322.pf_index b/pagefind/index/en_a0a4322.pf_index
new file mode 100644
index 0000000..5d90922
Binary files /dev/null and b/pagefind/index/en_a0a4322.pf_index differ
diff --git a/pagefind/pagefind-entry.json b/pagefind/pagefind-entry.json
index c0d46ff..698fa5c 100644
--- a/pagefind/pagefind-entry.json
+++ b/pagefind/pagefind-entry.json
@@ -1 +1 @@
-{"version":"1.0.4","languages":{"en":{"hash":"en_36c09d1723","wasm":"en","page_count":11}}}
\ No newline at end of file
+{"version":"1.0.4","languages":{"en":{"hash":"en_5a5433fd8e","wasm":"en","page_count":11}}}
\ No newline at end of file
diff --git a/pagefind/pagefind.en_36c09d1723.pf_meta b/pagefind/pagefind.en_36c09d1723.pf_meta
deleted file mode 100644
index b6201f1..0000000
Binary files a/pagefind/pagefind.en_36c09d1723.pf_meta and /dev/null differ
diff --git a/pagefind/pagefind.en_5a5433fd8e.pf_meta b/pagefind/pagefind.en_5a5433fd8e.pf_meta
new file mode 100644
index 0000000..a9d5ed4
Binary files /dev/null and b/pagefind/pagefind.en_5a5433fd8e.pf_meta differ
diff --git a/reference/json-api/ingest-data/index.html b/reference/json-api/ingest-data/index.html
index 6ae244d..67690cb 100644
--- a/reference/json-api/ingest-data/index.html
+++ b/reference/json-api/ingest-data/index.html
@@ -32,6 +32,6 @@
URL
POST http://smoltable:9876/v1/table/[name]/write
Example body
-
{
"items" : [
{
"row_key" : " org.apache.spark " ,
"cells" : [
{
"column_key" : " title: " ,
"value" : {
"String" : " Apache Spark™ - Unified Engine for large-scale data analytics "
}
},
{
"column_key" : " anchor:org.apache.hbase " ,
"value" : {
"String" : " Visit Apache Spark "
}
},
{
"column_key" : " meta:size " ,
"value" : {
"I64" : 152014
}
},
]
}
]
}
+
{
"items" : [
{
"row_key" : " org.apache.spark " ,
"cells" : [
{
"column_key" : " title: " ,
"type" : " string " ,
"value" : " Apache Spark™ - Unified Engine for large-scale data analytics "
},
{
"column_key" : " anchor:org.apache.hbase " ,
"type" : " string " ,
"value" : " Visit Apache Spark "
},
{
"column_key" : " meta:size " ,
"type" : " i64 " ,
"value" : 152014
}
]
}
]
}
Example response
{
"message" : " Data ingestion successful " ,
"result" : {
"items" : {
"cell_count" : 3 ,
"row_count" : 1
},
"micros_per_item" : 5
},
"status" : 200 ,
"time_ms" : 0
}