diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index caa7ebee053d..820848b4fb1b 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -404,7 +404,7 @@ jobs:
         uses: ./.github/actions/save-coverage-data
 
   regress-tests:
-    needs: [ check-permissions, build-neon ]
+    needs: [ check-permissions, build-neon, tag ]
     runs-on: [ self-hosted, gen3, large ]
     container:
       image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
@@ -436,6 +436,7 @@ jobs:
         env:
           TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
           CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
+          BUILD_TAG: ${{ needs.tag.outputs.build-tag }}
 
       - name: Merge and upload coverage data
         if: matrix.build_type == 'debug' && matrix.pg_version == 'v14'
diff --git a/Cargo.lock b/Cargo.lock
index 48c7bf1795b6..56396657586e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -256,19 +256,21 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
 
 [[package]]
 name = "aws-config"
-version = "0.56.0"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de3d533e0263bf453cc80af4c8bcc4d64e2aca293bd16f81633a36f1bf4a97cb"
+checksum = "80c950a809d39bc9480207cb1cfc879ace88ea7e3a4392a8e9999e45d6e5692e"
 dependencies = [
  "aws-credential-types",
  "aws-http",
+ "aws-runtime",
  "aws-sdk-sso",
+ "aws-sdk-ssooidc",
  "aws-sdk-sts",
  "aws-smithy-async",
- "aws-smithy-client",
  "aws-smithy-http",
- "aws-smithy-http-tower",
  "aws-smithy-json",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
  "aws-smithy-types",
  "aws-types",
  "bytes",
@@ -276,52 +278,46 @@ dependencies = [
  "hex",
  "http",
  "hyper",
- "ring",
+ "ring 0.17.6",
  "time",
  "tokio",
- "tower",
  "tracing",
  "zeroize",
 ]
 
 [[package]]
 name = "aws-credential-types"
-version = "0.56.0"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e4834ba01c5ad1ed9740aa222de62190e3c565d11ab7e72cc68314a258994567"
+checksum = "8c1317e1a3514b103cf7d5828bbab3b4d30f56bd22d684f8568bc51b6cfbbb1c"
 dependencies = [
  "aws-smithy-async",
+ "aws-smithy-runtime-api",
  "aws-smithy-types",
- "fastrand 2.0.0",
- "tokio",
- "tracing",
  "zeroize",
 ]
 
 [[package]]
 name = "aws-http"
-version = "0.56.0"
+version = "0.60.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72badf9de83cc7d66b21b004f09241836823b8302afb25a24708769e576a8d8f"
+checksum = "361c4310fdce94328cc2d1ca0c8a48c13f43009c61d3367585685a50ca8c66b6"
 dependencies = [
- "aws-credential-types",
- "aws-smithy-http",
+ "aws-smithy-runtime-api",
  "aws-smithy-types",
  "aws-types",
  "bytes",
  "http",
  "http-body",
- "lazy_static",
- "percent-encoding",
  "pin-project-lite",
  "tracing",
 ]
 
 [[package]]
 name = "aws-runtime"
-version = "0.56.0"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cf832f522111225c02547e1e1c28137e840e4b082399d93a236e4b29193a4667"
+checksum = "1ed7ef604a15fd0d4d9e43701295161ea6b504b63c44990ead352afea2bc15e9"
 dependencies = [
  "aws-credential-types",
  "aws-http",
@@ -341,9 +337,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-s3"
-version = "0.29.0"
+version = "1.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e30370b61599168d38190ad272bb91842cd81870a6ca035c05dd5726d22832c"
+checksum = "9dcafc2fe52cc30b2d56685e2fa6a879ba50d79704594852112337a472ddbd24"
 dependencies = [
  "aws-credential-types",
  "aws-http",
@@ -351,7 +347,6 @@ dependencies = [
  "aws-sigv4",
  "aws-smithy-async",
  "aws-smithy-checksums",
- "aws-smithy-client",
  "aws-smithy-eventstream",
  "aws-smithy-http",
  "aws-smithy-json",
@@ -366,22 +361,42 @@ dependencies = [
  "once_cell",
  "percent-encoding",
  "regex",
- "tokio-stream",
  "tracing",
  "url",
 ]
 
 [[package]]
 name = "aws-sdk-sso"
-version = "0.29.0"
+version = "1.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f41bf2c28d32dbb9894a8fcfcb148265d034d3f4a170552a47553a09de890895"
+checksum = "0619ab97a5ca8982e7de073cdc66f93e5f6a1b05afc09e696bec1cb3607cd4df"
+dependencies = [
+ "aws-credential-types",
+ "aws-http",
+ "aws-runtime",
+ "aws-smithy-async",
+ "aws-smithy-http",
+ "aws-smithy-json",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "aws-types",
+ "bytes",
+ "http",
+ "regex",
+ "tracing",
+]
+
+[[package]]
+name = "aws-sdk-ssooidc"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f04b9f5474cc0f35d829510b2ec8c21e352309b46bf9633c5a81fb9321e9b1c7"
 dependencies = [
  "aws-credential-types",
  "aws-http",
  "aws-runtime",
  "aws-smithy-async",
- "aws-smithy-client",
  "aws-smithy-http",
  "aws-smithy-json",
  "aws-smithy-runtime",
@@ -391,21 +406,19 @@ dependencies = [
  "bytes",
  "http",
  "regex",
- "tokio-stream",
  "tracing",
 ]
 
 [[package]]
 name = "aws-sdk-sts"
-version = "0.29.0"
+version = "1.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "79e21aa1a5b0853969a1ef96ccfaa8ff5d57c761549786a4d5f86c1902b2586a"
+checksum = "5700da387716ccfc30b27f44b008f457e1baca5b0f05b6b95455778005e3432a"
 dependencies = [
  "aws-credential-types",
  "aws-http",
  "aws-runtime",
  "aws-smithy-async",
- "aws-smithy-client",
  "aws-smithy-http",
  "aws-smithy-json",
  "aws-smithy-query",
@@ -421,42 +434,49 @@ dependencies = [
 
 [[package]]
 name = "aws-sigv4"
-version = "0.56.0"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2cb40a93429794065f41f0581734fc56a345f6a38d8e2e3c25c7448d930cd132"
+checksum = "380adcc8134ad8bbdfeb2ace7626a869914ee266322965276cbc54066186d236"
 dependencies = [
+ "aws-credential-types",
  "aws-smithy-eventstream",
  "aws-smithy-http",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
  "bytes",
+ "crypto-bigint 0.5.5",
  "form_urlencoded",
  "hex",
  "hmac",
  "http",
  "once_cell",
+ "p256",
  "percent-encoding",
  "regex",
+ "ring 0.17.6",
  "sha2",
+ "subtle",
  "time",
  "tracing",
+ "zeroize",
 ]
 
 [[package]]
 name = "aws-smithy-async"
-version = "0.56.0"
+version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ee6d17d487c8b579423067718b3580c0908d0f01d7461813f94ec4323bad623"
+checksum = "3e37ca17d25fe1e210b6d4bdf59b81caebfe99f986201a1228cb5061233b4b13"
 dependencies = [
  "futures-util",
  "pin-project-lite",
  "tokio",
- "tokio-stream",
 ]
 
 [[package]]
 name = "aws-smithy-checksums"
-version = "0.56.0"
+version = "0.60.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d1849fd5916904513fb0862543b36f8faab43c07984dbc476132b7da1aed056"
+checksum = "c5a373ec01aede3dd066ec018c1bc4e8f5dd11b2c11c59c8eef1a5c68101f397"
 dependencies = [
  "aws-smithy-http",
  "aws-smithy-types",
@@ -473,35 +493,11 @@ dependencies = [
  "tracing",
 ]
 
-[[package]]
-name = "aws-smithy-client"
-version = "0.56.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bdbe0a3ad15283cc5f863a68cb6adc8e256e7c109c43c01bdd09be407219a1e9"
-dependencies = [
- "aws-smithy-async",
- "aws-smithy-http",
- "aws-smithy-http-tower",
- "aws-smithy-types",
- "bytes",
- "fastrand 2.0.0",
- "http",
- "http-body",
- "hyper",
- "hyper-rustls",
- "lazy_static",
- "pin-project-lite",
- "rustls",
- "tokio",
- "tower",
- "tracing",
-]
-
 [[package]]
 name = "aws-smithy-eventstream"
-version = "0.56.0"
+version = "0.60.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a56afef1aa766f512b4970b4c3150b9bf2df8035939723830df4b30267e2d7cb"
+checksum = "1c669e1e5fc0d79561bf7a122b118bd50c898758354fe2c53eb8f2d31507cbc3"
 dependencies = [
  "aws-smithy-types",
  "bytes",
@@ -510,57 +506,39 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-http"
-version = "0.56.0"
+version = "0.60.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34dc313472d727f5ef44fdda93e668ebfe17380c99dee512c403e3ca51863bb9"
+checksum = "5b1de8aee22f67de467b2e3d0dd0fb30859dc53f579a63bd5381766b987db644"
 dependencies = [
  "aws-smithy-eventstream",
+ "aws-smithy-runtime-api",
  "aws-smithy-types",
  "bytes",
  "bytes-utils",
  "futures-core",
  "http",
  "http-body",
- "hyper",
  "once_cell",
  "percent-encoding",
  "pin-project-lite",
  "pin-utils",
- "tokio",
- "tokio-util",
- "tracing",
-]
-
-[[package]]
-name = "aws-smithy-http-tower"
-version = "0.56.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1dd50fca5a4ea4ec3771689ee93bf06b32de02a80af01ed93a8f8a4ed90e8483"
-dependencies = [
- "aws-smithy-http",
- "aws-smithy-types",
- "bytes",
- "http",
- "http-body",
- "pin-project-lite",
- "tower",
  "tracing",
 ]
 
 [[package]]
 name = "aws-smithy-json"
-version = "0.56.0"
+version = "0.60.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3591dd7c2fe01ab8025e4847a0a0f6d0c2b2269714688ffb856f9cf6c6d465cf"
+checksum = "6a46dd338dc9576d6a6a5b5a19bd678dcad018ececee11cf28ecd7588bd1a55c"
 dependencies = [
  "aws-smithy-types",
 ]
 
 [[package]]
 name = "aws-smithy-query"
-version = "0.56.0"
+version = "0.60.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dbabb1145e65dd57ae72d91a2619d3f5fba40b68a5f40ba009c30571dfd60aff"
+checksum = "feb5b8c7a86d4b6399169670723b7e6f21a39fc833a30f5c5a2f997608178129"
 dependencies = [
  "aws-smithy-types",
  "urlencoding",
@@ -568,74 +546,86 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-runtime"
-version = "0.56.0"
+version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3687fb838d4ad1c883b62eb59115bc9fb02c4f308aac49a7df89627067f6eb0d"
+checksum = "273479291efc55e7b0bce985b139d86b6031adb8e50f65c1f712f20ba38f6388"
 dependencies = [
  "aws-smithy-async",
- "aws-smithy-client",
  "aws-smithy-http",
  "aws-smithy-runtime-api",
  "aws-smithy-types",
  "bytes",
  "fastrand 2.0.0",
+ "h2",
  "http",
  "http-body",
+ "hyper",
+ "hyper-rustls",
  "once_cell",
  "pin-project-lite",
  "pin-utils",
+ "rustls",
  "tokio",
  "tracing",
 ]
 
 [[package]]
 name = "aws-smithy-runtime-api"
-version = "0.56.0"
+version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5cfbf1e5c2108b41f5ca607cde40dd5109fecc448f5d30c8e614b61f36dce704"
+checksum = "c6cebff0d977b6b6feed2fd07db52aac58ba3ccaf26cdd49f1af4add5061bef9"
 dependencies = [
  "aws-smithy-async",
- "aws-smithy-http",
  "aws-smithy-types",
  "bytes",
  "http",
+ "pin-project-lite",
  "tokio",
  "tracing",
+ "zeroize",
 ]
 
 [[package]]
 name = "aws-smithy-types"
-version = "0.56.0"
+version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eed0a94eefd845a2a78677f1b72f02fa75802d38f7f59be675add140279aa8bf"
+checksum = "d7f48b3f27ddb40ab19892a5abda331f403e3cb877965e4e51171447807104af"
 dependencies = [
  "base64-simd",
+ "bytes",
+ "bytes-utils",
+ "futures-core",
+ "http",
+ "http-body",
  "itoa",
  "num-integer",
+ "pin-project-lite",
+ "pin-utils",
  "ryu",
  "serde",
  "time",
+ "tokio",
+ "tokio-util",
 ]
 
 [[package]]
 name = "aws-smithy-xml"
-version = "0.56.0"
+version = "0.60.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c88052c812f696143ad7ba729c63535209ff0e0f49e31a6d2b1205208ea6ea79"
+checksum = "0ec40d74a67fd395bc3f6b4ccbdf1543672622d905ef3f979689aea5b730cb95"
 dependencies = [
  "xmlparser",
 ]
 
 [[package]]
 name = "aws-types"
-version = "0.56.0"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6bceb8cf724ad057ad7f327d0d256d7147b3eac777b39849a26189e003dc9782"
+checksum = "8403fc56b1f3761e8efe45771ddc1165e47ec3417c68e68a4519b5cb030159ca"
 dependencies = [
  "aws-credential-types",
  "aws-smithy-async",
- "aws-smithy-client",
- "aws-smithy-http",
+ "aws-smithy-runtime-api",
  "aws-smithy-types",
  "http",
  "rustc_version",
@@ -651,7 +641,7 @@ dependencies = [
  "async-trait",
  "axum-core",
  "base64 0.21.1",
- "bitflags",
+ "bitflags 1.3.2",
  "bytes",
  "futures-util",
  "http",
@@ -705,7 +695,7 @@ dependencies = [
  "bytes",
  "dyn-clone",
  "futures",
- "getrandom 0.2.9",
+ "getrandom 0.2.11",
  "http-types",
  "log",
  "paste",
@@ -799,6 +789,12 @@ dependencies = [
  "rustc-demangle",
 ]
 
+[[package]]
+name = "base16ct"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "349a06037c7bf932dd7e7d1f653678b2038b9ad46a74102f1fc7bd7872678cce"
+
 [[package]]
 name = "base64"
 version = "0.13.1"
@@ -848,7 +844,7 @@ version = "0.65.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5"
 dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
  "cexpr",
  "clang-sys",
  "lazy_static",
@@ -871,6 +867,12 @@ version = "1.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 
+[[package]]
+name = "bitflags"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
+
 [[package]]
 name = "block-buffer"
 version = "0.10.4"
@@ -947,11 +949,12 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 
 [[package]]
 name = "cc"
-version = "1.0.79"
+version = "1.0.83"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
+checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
 dependencies = [
  "jobserver",
+ "libc",
 ]
 
 [[package]]
@@ -1054,7 +1057,7 @@ checksum = "4f423e341edefb78c9caba2d9c7f7687d0e72e89df3ce3394554754393ac3990"
 dependencies = [
  "anstream",
  "anstyle",
- "bitflags",
+ "bitflags 1.3.2",
  "clap_lex",
  "strsim",
 ]
@@ -1126,6 +1129,7 @@ version = "0.1.0"
 dependencies = [
  "anyhow",
  "async-compression",
+ "bytes",
  "cfg-if",
  "chrono",
  "clap",
@@ -1167,6 +1171,12 @@ dependencies = [
  "crossbeam-utils",
 ]
 
+[[package]]
+name = "const-oid"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28c122c3980598d243d63d9a704629a2d748d101f278052ff068be5a4423ab6f"
+
 [[package]]
 name = "const_fn"
 version = "0.4.9"
@@ -1375,7 +1385,7 @@ version = "0.25.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e64e6c0fbe2c17357405f7c758c1ef960fce08bdfb2c03d88d2a18d7e09c4b67"
 dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
  "crossterm_winapi",
  "libc",
  "mio",
@@ -1394,6 +1404,28 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "crypto-bigint"
+version = "0.4.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef2b4b23cddf68b89b8f8069890e8c270d54e2d5fe1b143820234805e4cb17ef"
+dependencies = [
+ "generic-array",
+ "rand_core 0.6.4",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "crypto-bigint"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76"
+dependencies = [
+ "rand_core 0.6.4",
+ "subtle",
+]
+
 [[package]]
 name = "crypto-common"
 version = "0.1.6"
@@ -1468,6 +1500,16 @@ dependencies = [
  "uuid",
 ]
 
+[[package]]
+name = "der"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1a467a65c5e759bce6e65eaf91cc29f466cdc57cb65777bd646872a8a1fd4de"
+dependencies = [
+ "const-oid",
+ "zeroize",
+]
+
 [[package]]
 name = "der-parser"
 version = "8.2.0"
@@ -1510,12 +1552,44 @@ version = "1.0.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "23d2f3407d9a573d666de4b5bdf10569d73ca9478087346697dcbae6244bfbcd"
 
+[[package]]
+name = "ecdsa"
+version = "0.14.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "413301934810f597c1d19ca71c8710e99a3f1ba28a0d2ebc01551a2daeea3c5c"
+dependencies = [
+ "der",
+ "elliptic-curve",
+ "rfc6979",
+ "signature",
+]
+
 [[package]]
 name = "either"
 version = "1.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
 
+[[package]]
+name = "elliptic-curve"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7bb888ab5300a19b8e5bceef25ac745ad065f3c9f7efc6de1b91958110891d3"
+dependencies = [
+ "base16ct",
+ "crypto-bigint 0.4.9",
+ "der",
+ "digest",
+ "ff",
+ "generic-array",
+ "group",
+ "pkcs8",
+ "rand_core 0.6.4",
+ "sec1",
+ "subtle",
+ "zeroize",
+]
+
 [[package]]
 name = "encoding_rs"
 version = "0.8.32"
@@ -1638,6 +1712,16 @@ version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764"
 
+[[package]]
+name = "ff"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d013fc25338cc558c5c2cfbad646908fb23591e2404481826742b651c9af7160"
+dependencies = [
+ "rand_core 0.6.4",
+ "subtle",
+]
+
 [[package]]
 name = "filetime"
 version = "0.2.21"
@@ -1848,9 +1932,9 @@ dependencies = [
 
 [[package]]
 name = "getrandom"
-version = "0.2.9"
+version = "0.2.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4"
+checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f"
 dependencies = [
  "cfg-if",
  "js-sys",
@@ -1893,6 +1977,17 @@ version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
 
+[[package]]
+name = "group"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5dfbfb3a6cfbd390d5c9564ab283a0349b9b9fcd46a706c1eb10e0db70bfbac7"
+dependencies = [
+ "ff",
+ "rand_core 0.6.4",
+ "subtle",
+]
+
 [[package]]
 name = "h2"
 version = "0.3.19"
@@ -2235,7 +2330,7 @@ version = "0.9.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f8069d3ec154eb856955c1c0fbffefbf5f3c40a104ec912d4797314c1801abff"
 dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
  "inotify-sys",
  "libc",
 ]
@@ -2246,7 +2341,7 @@ version = "0.10.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fdd168d97690d0b8c412d6b6c10360277f4d7ee495c5d0d5d5fe0854923255cc"
 dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
  "futures-core",
  "inotify-sys",
  "libc",
@@ -2287,9 +2382,9 @@ dependencies = [
 
 [[package]]
 name = "ipnet"
-version = "2.7.2"
+version = "2.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "12b6ee2129af8d4fb011108c73d99a1b83a85977f23b82460c0ae2e25bb4b57f"
+checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3"
 
 [[package]]
 name = "is-terminal"
@@ -2344,7 +2439,7 @@ checksum = "6971da4d9c3aa03c3d8f3ff0f4155b534aad021292003895a469716b2a230378"
 dependencies = [
  "base64 0.21.1",
  "pem 1.1.1",
- "ring",
+ "ring 0.16.20",
  "serde",
  "serde_json",
  "simple_asn1",
@@ -2366,7 +2461,7 @@ version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8367585489f01bc55dd27404dcf56b95e6da061a256a666ab23be9ba96a2e587"
 dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
  "libc",
 ]
 
@@ -2384,9 +2479,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
 
 [[package]]
 name = "libc"
-version = "0.2.144"
+version = "0.2.150"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1"
+checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
 
 [[package]]
 name = "libloading"
@@ -2580,7 +2675,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4"
 dependencies = [
  "autocfg",
- "bitflags",
+ "bitflags 1.3.2",
  "cfg-if",
  "libc",
 ]
@@ -2591,7 +2686,7 @@ version = "0.26.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a"
 dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
  "cfg-if",
  "libc",
  "memoffset 0.7.1",
@@ -2615,7 +2710,7 @@ version = "5.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "729f63e1ca555a43fe3efa4f3efdf4801c479da85b432242a7b726f353c88486"
 dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
  "crossbeam-channel",
  "filetime",
  "fsevent-sys",
@@ -2693,7 +2788,7 @@ checksum = "c38841cdd844847e3e7c8d29cef9dcfed8877f8f56f9071f77843ecf3baf937f"
 dependencies = [
  "base64 0.13.1",
  "chrono",
- "getrandom 0.2.9",
+ "getrandom 0.2.11",
  "http",
  "rand 0.8.5",
  "serde",
@@ -2736,11 +2831,11 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
 
 [[package]]
 name = "openssl"
-version = "0.10.55"
+version = "0.10.60"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "345df152bc43501c5eb9e4654ff05f794effb78d4efe3d53abc158baddc0703d"
+checksum = "79a4c6c3a2b158f7f8f2a2fc5a969fa3a068df6fc9dbb4a43845436e3af7c800"
 dependencies = [
- "bitflags",
+ "bitflags 2.4.1",
  "cfg-if",
  "foreign-types",
  "libc",
@@ -2768,9 +2863,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
 
 [[package]]
 name = "openssl-sys"
-version = "0.9.90"
+version = "0.9.96"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "374533b0e45f3a7ced10fcaeccca020e66656bc03dac384f852e4e5a7a8104a6"
+checksum = "3812c071ba60da8b5677cc12bcb1d42989a65553772897a7e0355545a819838f"
 dependencies = [
  "cc",
  "libc",
@@ -2896,6 +2991,17 @@ version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a"
 
+[[package]]
+name = "p256"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51f44edd08f51e2ade572f141051021c5af22677e42b7dd28a88155151c33594"
+dependencies = [
+ "ecdsa",
+ "elliptic-curve",
+ "sha2",
+]
+
 [[package]]
 name = "pagectl"
 version = "0.1.0"
@@ -2970,6 +3076,7 @@ dependencies = [
  "scopeguard",
  "serde",
  "serde_json",
+ "serde_path_to_error",
  "serde_with",
  "signal-hook",
  "smallvec",
@@ -3010,6 +3117,7 @@ dependencies = [
  "serde_with",
  "strum",
  "strum_macros",
+ "thiserror",
  "utils",
  "workspace_hack",
 ]
@@ -3188,6 +3296,16 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
 
+[[package]]
+name = "pkcs8"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9eca2c590a5f85da82668fa685c09ce2888b9430e83299debf1f34b65fd4a4ba"
+dependencies = [
+ "der",
+ "spki",
+]
+
 [[package]]
 name = "pkg-config"
 version = "0.3.27"
@@ -3394,7 +3512,7 @@ version = "0.14.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b1de8dacb0873f77e6aefc6d71e044761fcc68060290f5b1089fcdf84626bb69"
 dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
  "byteorder",
  "hex",
  "lazy_static",
@@ -3494,6 +3612,7 @@ dependencies = [
  "humantime",
  "hyper",
  "hyper-tungstenite",
+ "ipnet",
  "itertools",
  "md5",
  "metrics",
@@ -3504,6 +3623,7 @@ dependencies = [
  "pbkdf2",
  "pin-project-lite",
  "postgres-native-tls",
+ "postgres-protocol",
  "postgres_backend",
  "pq_proto",
  "prometheus",
@@ -3523,6 +3643,7 @@ dependencies = [
  "serde",
  "serde_json",
  "sha2",
+ "smol_str",
  "socket2 0.5.3",
  "sync_wrapper",
  "task-local-extensions",
@@ -3623,7 +3744,7 @@ version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
 dependencies = [
- "getrandom 0.2.9",
+ "getrandom 0.2.11",
 ]
 
 [[package]]
@@ -3664,7 +3785,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4954fbc00dcd4d8282c987710e50ba513d351400dbdd00e803a05172a90d8976"
 dependencies = [
  "pem 2.0.1",
- "ring",
+ "ring 0.16.20",
  "time",
  "yasna",
 ]
@@ -3675,7 +3796,7 @@ version = "0.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
 dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
 ]
 
 [[package]]
@@ -3684,7 +3805,7 @@ version = "0.3.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29"
 dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
 ]
 
 [[package]]
@@ -3735,8 +3856,7 @@ dependencies = [
  "aws-credential-types",
  "aws-sdk-s3",
  "aws-smithy-async",
- "aws-smithy-http",
- "aws-types",
+ "aws-smithy-types",
  "azure_core",
  "azure_identity",
  "azure_storage",
@@ -3834,7 +3954,7 @@ dependencies = [
  "async-trait",
  "chrono",
  "futures",
- "getrandom 0.2.9",
+ "getrandom 0.2.11",
  "http",
  "hyper",
  "parking_lot 0.11.2",
@@ -3855,7 +3975,7 @@ checksum = "1b97ad83c2fc18113346b7158d79732242002427c30f620fa817c1f32901e0a8"
 dependencies = [
  "anyhow",
  "async-trait",
- "getrandom 0.2.9",
+ "getrandom 0.2.11",
  "matchit",
  "opentelemetry",
  "reqwest",
@@ -3876,6 +3996,17 @@ dependencies = [
  "rand 0.8.5",
 ]
 
+[[package]]
+name = "rfc6979"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7743f17af12fa0b03b803ba12cd6a8d9483a587e89c69445e3909655c0b9fabb"
+dependencies = [
+ "crypto-bigint 0.4.9",
+ "hmac",
+ "zeroize",
+]
+
 [[package]]
 name = "ring"
 version = "0.16.20"
@@ -3886,11 +4017,25 @@ dependencies = [
  "libc",
  "once_cell",
  "spin 0.5.2",
- "untrusted",
+ "untrusted 0.7.1",
  "web-sys",
  "winapi",
 ]
 
+[[package]]
+name = "ring"
+version = "0.17.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "684d5e6e18f669ccebf64a92236bb7db9a34f07be010e3627368182027180866"
+dependencies = [
+ "cc",
+ "getrandom 0.2.11",
+ "libc",
+ "spin 0.9.8",
+ "untrusted 0.9.0",
+ "windows-sys 0.48.0",
+]
+
 [[package]]
 name = "routerify"
 version = "3.0.0"
@@ -3978,7 +4123,7 @@ version = "0.36.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6da3636faa25820d8648e0e31c5d519bbb01f72fdf57131f0f5f7da5fed36eab"
 dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
  "errno",
  "io-lifetimes",
  "libc",
@@ -3992,7 +4137,7 @@ version = "0.37.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d4eb579851244c2c03e7c24f501c3432bed80b8f720af1d6e5b0e0f01555a035"
 dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
  "errno",
  "io-lifetimes",
  "libc",
@@ -4002,13 +4147,13 @@ dependencies = [
 
 [[package]]
 name = "rustls"
-version = "0.21.6"
+version = "0.21.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d1feddffcfcc0b33f5c6ce9a29e341e4cd59c3f78e7ee45f4a40c038b1d6cbb"
+checksum = "629648aced5775d558af50b2b4c7b02983a04b312126d45eeead26e7caa498b9"
 dependencies = [
  "log",
- "ring",
- "rustls-webpki 0.101.4",
+ "ring 0.17.6",
+ "rustls-webpki 0.101.7",
  "sct",
 ]
 
@@ -4039,18 +4184,18 @@ version = "0.100.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e98ff011474fa39949b7e5c0428f9b4937eda7da7848bbb947786b7be0b27dab"
 dependencies = [
- "ring",
- "untrusted",
+ "ring 0.16.20",
+ "untrusted 0.7.1",
 ]
 
 [[package]]
 name = "rustls-webpki"
-version = "0.101.4"
+version = "0.101.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7d93931baf2d282fff8d3a532bbfd7653f734643161b87e3e01e59a04439bf0d"
+checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765"
 dependencies = [
- "ring",
- "untrusted",
+ "ring 0.17.6",
+ "untrusted 0.9.0",
 ]
 
 [[package]]
@@ -4073,8 +4218,6 @@ dependencies = [
  "async-stream",
  "aws-config",
  "aws-sdk-s3",
- "aws-smithy-http",
- "aws-types",
  "bincode",
  "bytes",
  "chrono",
@@ -4196,8 +4339,8 @@ version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4"
 dependencies = [
- "ring",
- "untrusted",
+ "ring 0.16.20",
+ "untrusted 0.7.1",
 ]
 
 [[package]]
@@ -4206,13 +4349,27 @@ version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "621e3680f3e07db4c9c2c3fb07c6223ab2fab2e54bd3c04c3ae037990f428c32"
 
+[[package]]
+name = "sec1"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3be24c1842290c45df0a7bf069e0c268a747ad05a192f2fd7dcfdbc1cba40928"
+dependencies = [
+ "base16ct",
+ "der",
+ "generic-array",
+ "pkcs8",
+ "subtle",
+ "zeroize",
+]
+
 [[package]]
 name = "security-framework"
 version = "2.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1fc758eb7bffce5b308734e9b0c1468893cae9ff70ebf13e7090be8dcbcc83a8"
 dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
  "core-foundation",
  "core-foundation-sys",
  "libc",
@@ -4322,7 +4479,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "99dc599bd6646884fc403d593cdcb9816dd67c50cff3271c01ff123617908dcd"
 dependencies = [
  "debugid",
- "getrandom 0.2.9",
+ "getrandom 0.2.11",
  "hex",
  "serde",
  "serde_json",
@@ -4510,6 +4667,16 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "signature"
+version = "1.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "74233d3b3b2f6d4b006dc19dee745e73e2a6bfb6f93607cd3b02bd5b00797d7c"
+dependencies = [
+ "digest",
+ "rand_core 0.6.4",
+]
+
 [[package]]
 name = "simple_asn1"
 version = "0.6.2"
@@ -4543,6 +4710,15 @@ version = "1.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9"
 
+[[package]]
+name = "smol_str"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "74212e6bbe9a4352329b2f68ba3130c15a3f26fe88ff22dbdc6cdd58fa85e99c"
+dependencies = [
+ "serde",
+]
+
 [[package]]
 name = "socket2"
 version = "0.4.9"
@@ -4578,6 +4754,16 @@ dependencies = [
  "lock_api",
 ]
 
+[[package]]
+name = "spki"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67cf02bbac7a337dc36e4f5a693db6c21e7863f45070f7064577eb4367a3212b"
+dependencies = [
+ "base64ct",
+ "der",
+]
+
 [[package]]
 name = "stable_deref_trait"
 version = "1.2.0"
@@ -4971,7 +5157,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dd5831152cb0d3f79ef5523b357319ba154795d64c7078b2daa95a803b54057f"
 dependencies = [
  "futures",
- "ring",
+ "ring 0.16.20",
  "rustls",
  "tokio",
  "tokio-postgres",
@@ -5427,6 +5613,12 @@ version = "0.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"
 
+[[package]]
+name = "untrusted"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
+
 [[package]]
 name = "ureq"
 version = "2.7.1"
@@ -5528,7 +5720,7 @@ version = "1.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "345444e32442451b267fc254ae85a209c64be56d2890e601a0c37ff0c3c5ecd2"
 dependencies = [
- "getrandom 0.2.9",
+ "getrandom 0.2.11",
  "serde",
 ]
 
@@ -5987,9 +6179,13 @@ dependencies = [
  "aws-config",
  "aws-runtime",
  "aws-sigv4",
+ "aws-smithy-async",
  "aws-smithy-http",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
  "axum",
  "base64 0.21.1",
+ "base64ct",
  "bytes",
  "cc",
  "chrono",
@@ -6007,6 +6203,7 @@ dependencies = [
  "futures-sink",
  "futures-util",
  "hex",
+ "hmac",
  "hyper",
  "itertools",
  "libc",
@@ -6021,12 +6218,13 @@ dependencies = [
  "regex",
  "regex-syntax 0.7.2",
  "reqwest",
- "ring",
+ "ring 0.16.20",
  "rustls",
  "scopeguard",
  "serde",
  "serde_json",
  "smallvec",
+ "subtle",
  "syn 1.0.109",
  "syn 2.0.28",
  "time",
diff --git a/Cargo.toml b/Cargo.toml
index 6df48ffc55f8..ba8b49c0e010 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -45,12 +45,11 @@ azure_storage_blobs = "0.16"
 flate2 = "1.0.26"
 async-stream = "0.3"
 async-trait = "0.1"
-aws-config = { version = "0.56", default-features = false, features=["rustls"] }
-aws-sdk-s3 = "0.29"
-aws-smithy-http = "0.56"
-aws-smithy-async = { version = "0.56", default-features = false, features=["rt-tokio"] }
-aws-credential-types = "0.56"
-aws-types = "0.56"
+aws-config = { version = "1.0", default-features = false, features=["rustls"] }
+aws-sdk-s3 = "1.0"
+aws-smithy-async = { version = "1.0", default-features = false, features=["rt-tokio"] }
+aws-smithy-types = "1.0"
+aws-credential-types = "1.0"
 axum = { version = "0.6.20", features = ["ws"] }
 base64 = "0.13.0"
 bincode = "1.3"
@@ -89,6 +88,7 @@ humantime-serde = "1.1.1"
 hyper = "0.14"
 hyper-tungstenite = "0.11"
 inotify = "0.10.2"
+ipnet = "2.9.0"
 itertools = "0.10"
 jsonwebtoken = "8"
 libc = "0.2"
@@ -126,11 +126,13 @@ sd-notify = "0.4.1"
 sentry = { version = "0.31", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
+serde_path_to_error = "0.1"
 serde_with = "2.0"
 serde_assert = "0.5.0"
 sha2 = "0.10.2"
 signal-hook = "0.3"
 smallvec = "1.11"
+smol_str = { version = "0.2.0", features = ["serde"] }
 socket2 = "0.5"
 strum = "0.24"
 strum_macros = "0.24"
diff --git a/Dockerfile.compute-node b/Dockerfile.compute-node
index 36c3f874d481..a3772265c060 100644
--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -387,18 +387,10 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ARG PG_VERSION
 ENV PATH "/usr/local/pgsql/bin:$PATH"
 
-RUN case "${PG_VERSION}" in \
-      "v14" | "v15") \
-        export TIMESCALEDB_VERSION=2.10.1 \
-        export TIMESCALEDB_CHECKSUM=6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 \
-        ;; \
-      *) \
-        echo "TimescaleDB not supported on this PostgreSQL version. See https://github.com/timescale/timescaledb/issues/5752" && exit 0;; \
-    esac && \
-    apt-get update && \
+RUN apt-get update && \
     apt-get install -y cmake && \
-    wget https://github.com/timescale/timescaledb/archive/refs/tags/${TIMESCALEDB_VERSION}.tar.gz -O timescaledb.tar.gz && \
-    echo "${TIMESCALEDB_CHECKSUM} timescaledb.tar.gz" | sha256sum --check && \
+    wget https://github.com/timescale/timescaledb/archive/refs/tags/2.13.0.tar.gz -O timescaledb.tar.gz && \
+    echo "584a351c7775f0e067eaa0e7277ea88cab9077cc4c455cbbf09a5d9723dce95d timescaledb.tar.gz" | sha256sum --check && \
     mkdir timescaledb-src && cd timescaledb-src && tar xvzf ../timescaledb.tar.gz --strip-components=1 -C . && \
     ./bootstrap -DSEND_TELEMETRY_DEFAULT:BOOL=OFF -DUSE_TELEMETRY:BOOL=OFF -DAPACHE_ONLY:BOOL=ON -DCMAKE_BUILD_TYPE=Release && \
     cd build && \
@@ -714,6 +706,23 @@ RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.3.tar.gz -
     cargo pgrx install --release && \
     echo "trusted = true" >> /usr/local/pgsql/share/extension/ulid.control
 
+#########################################################################################
+#
+# Layer "wal2json-build"
+# Compile "wal2json" extension
+#
+#########################################################################################
+
+FROM build-deps AS wal2json-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
+RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
+    echo "b516653575541cf221b99cf3f8be9b6821f6dbcfc125675c85f35090f824f00e wal2json_2_5.tar.gz" | sha256sum --check && \
+    mkdir wal2json-src && cd wal2json-src && tar xvzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install
+
 #########################################################################################
 #
 # Layer "neon-pg-ext-build"
@@ -750,6 +759,7 @@ COPY --from=rdkit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-uuidv7-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-roaringbitmap-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-embedding-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=wal2json-pg-build /usr/local/pgsql /usr/local/pgsql
 COPY pgxn/ pgxn/
 
 RUN make -j $(getconf _NPROCESSORS_ONLN) \
diff --git a/README.md b/README.md
index 75fad605c59b..3e3123f5eeb4 100644
--- a/README.md
+++ b/README.md
@@ -149,6 +149,9 @@ tenant 9ef87a5bf0d92544f6fafeeb3239695c successfully created on the pageserver
 Created an initial timeline 'de200bd42b49cc1814412c7e592dd6e9' at Lsn 0/16B5A50 for tenant: 9ef87a5bf0d92544f6fafeeb3239695c
 Setting tenant 9ef87a5bf0d92544f6fafeeb3239695c as a default one
 
+# create postgres compute node
+> cargo neon endpoint create main
+
 # start postgres compute node
 > cargo neon endpoint start main
 Starting new endpoint main (PostgreSQL v14) on timeline de200bd42b49cc1814412c7e592dd6e9 ...
@@ -185,8 +188,11 @@ Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant:
 (L) main [de200bd42b49cc1814412c7e592dd6e9]
 (L) ┗━ @0/16F9A00: migration_check [b3b863fa45fa9e57e615f9f2d944e601]
 
+# create postgres on that branch
+> cargo neon endpoint create migration_check --branch-name migration_check
+
 # start postgres on that branch
-> cargo neon endpoint start migration_check --branch-name migration_check
+> cargo neon endpoint start migration_check
 Starting new endpoint migration_check (PostgreSQL v14) on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
 Starting postgres at 'postgresql://cloud_admin@127.0.0.1:55434/postgres'
 
diff --git a/compute_tools/Cargo.toml b/compute_tools/Cargo.toml
index 6c93befaa319..47378f1910ab 100644
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -38,3 +38,4 @@ toml_edit.workspace = true
 remote_storage = { version = "0.1", path = "../libs/remote_storage/" }
 vm_monitor = { version = "0.1", path = "../libs/vm_monitor/" }
 zstd = "0.12.4"
+bytes = "1.0"
diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs
index 7f22bda13ea5..36e9ca0731ae 100644
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -31,7 +31,7 @@
 //!             -C 'postgresql://cloud_admin@localhost/postgres' \
 //!             -S /var/db/postgres/specs/current.json \
 //!             -b /usr/local/bin/postgres \
-//!             -r {"bucket": "neon-dev-extensions-eu-central-1", "region": "eu-central-1"}
+//!             -r http://pg-ext-s3-gateway
 //! ```
 //!
 use std::collections::HashMap;
@@ -51,7 +51,7 @@ use compute_api::responses::ComputeStatus;
 
 use compute_tools::compute::{ComputeNode, ComputeState, ParsedSpec};
 use compute_tools::configurator::launch_configurator;
-use compute_tools::extension_server::{get_pg_version, init_remote_storage};
+use compute_tools::extension_server::get_pg_version;
 use compute_tools::http::api::launch_http_server;
 use compute_tools::logger::*;
 use compute_tools::monitor::launch_monitor;
@@ -60,7 +60,7 @@ use compute_tools::spec::*;
 
 // this is an arbitrary build tag. Fine as a default / for testing purposes
 // in-case of not-set environment var
-const BUILD_TAG_DEFAULT: &str = "5670669815";
+const BUILD_TAG_DEFAULT: &str = "latest";
 
 fn main() -> Result<()> {
     init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
@@ -74,10 +74,18 @@ fn main() -> Result<()> {
     let pgbin_default = String::from("postgres");
     let pgbin = matches.get_one::<String>("pgbin").unwrap_or(&pgbin_default);
 
-    let remote_ext_config = matches.get_one::<String>("remote-ext-config");
-    let ext_remote_storage = remote_ext_config.map(|x| {
-        init_remote_storage(x).expect("cannot initialize remote extension storage from config")
-    });
+    let ext_remote_storage = matches
+        .get_one::<String>("remote-ext-config")
+        // Compatibility hack: if the control plane specified any remote-ext-config
+        // use the default value for extension storage proxy gateway.
+        // Remove this once the control plane is updated to pass the gateway URL
+        .map(|conf| {
+            if conf.starts_with("http") {
+                conf.trim_end_matches('/')
+            } else {
+                "http://pg-ext-s3-gateway"
+            }
+        });
 
     let http_port = *matches
         .get_one::<u16>("http-port")
@@ -198,7 +206,7 @@ fn main() -> Result<()> {
         live_config_allowed,
         state: Mutex::new(new_state),
         state_changed: Condvar::new(),
-        ext_remote_storage,
+        ext_remote_storage: ext_remote_storage.map(|s| s.to_string()),
         ext_download_progress: RwLock::new(HashMap::new()),
         build_tag,
     };
diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs
index 5ace8ca1d24c..28770acdcdcf 100644
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -25,7 +25,7 @@ use compute_api::responses::{ComputeMetrics, ComputeStatus};
 use compute_api::spec::{ComputeMode, ComputeSpec};
 use utils::measured_stream::MeasuredReader;
 
-use remote_storage::{DownloadError, GenericRemoteStorage, RemotePath};
+use remote_storage::{DownloadError, RemotePath};
 
 use crate::checker::create_availability_check_data;
 use crate::pg_helpers::*;
@@ -59,8 +59,8 @@ pub struct ComputeNode {
     pub state: Mutex<ComputeState>,
     /// `Condvar` to allow notifying waiters about state changes.
     pub state_changed: Condvar,
-    ///  the S3 bucket that we search for extensions in
-    pub ext_remote_storage: Option<GenericRemoteStorage>,
+    /// the address of extension storage proxy gateway
+    pub ext_remote_storage: Option<String>,
     // key: ext_archive_name, value: started download time, download_completed?
     pub ext_download_progress: RwLock<HashMap<String, (DateTime<Utc>, bool)>>,
     pub build_tag: String,
@@ -728,7 +728,12 @@ impl ComputeNode {
 
         // Write new config
         let pgdata_path = Path::new(&self.pgdata);
-        config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), &spec, None)?;
+        let postgresql_conf_path = pgdata_path.join("postgresql.conf");
+        config::write_postgres_conf(&postgresql_conf_path, &spec, None)?;
+        // temporarily reset max_cluster_size in config
+        // to avoid the possibility of hitting the limit, while we are reconfiguring:
+        // creating new extensions, roles, etc...
+        config::compute_ctl_temp_override_create(pgdata_path, "neon.max_cluster_size=-1")?;
         self.pg_reload_conf()?;
 
         let mut client = Client::connect(self.connstr.as_str(), NoTls)?;
@@ -749,6 +754,10 @@ impl ComputeNode {
         // 'Close' connection
         drop(client);
 
+        // reset max_cluster_size in config back to original value and reload config
+        config::compute_ctl_temp_override_remove(pgdata_path)?;
+        self.pg_reload_conf()?;
+
         let unknown_op = "unknown".to_string();
         let op_id = spec.operation_uuid.as_ref().unwrap_or(&unknown_op);
         info!(
@@ -809,7 +818,17 @@ impl ComputeNode {
 
         let config_time = Utc::now();
         if pspec.spec.mode == ComputeMode::Primary && !pspec.spec.skip_pg_catalog_updates {
+            let pgdata_path = Path::new(&self.pgdata);
+            // temporarily reset max_cluster_size in config
+            // to avoid the possibility of hitting the limit, while we are applying config:
+            // creating new extensions, roles, etc...
+            config::compute_ctl_temp_override_create(pgdata_path, "neon.max_cluster_size=-1")?;
+            self.pg_reload_conf()?;
+
             self.apply_config(&compute_state)?;
+
+            config::compute_ctl_temp_override_remove(pgdata_path)?;
+            self.pg_reload_conf()?;
         }
 
         let startup_end_time = Utc::now();
@@ -957,12 +976,12 @@ LIMIT 100",
         real_ext_name: String,
         ext_path: RemotePath,
     ) -> Result<u64, DownloadError> {
-        let remote_storage = self
-            .ext_remote_storage
-            .as_ref()
-            .ok_or(DownloadError::BadInput(anyhow::anyhow!(
-                "Remote extensions storage is not configured",
-            )))?;
+        let ext_remote_storage =
+            self.ext_remote_storage
+                .as_ref()
+                .ok_or(DownloadError::BadInput(anyhow::anyhow!(
+                    "Remote extensions storage is not configured",
+                )))?;
 
         let ext_archive_name = ext_path.object_name().expect("bad path");
 
@@ -1018,7 +1037,7 @@ LIMIT 100",
         let download_size = extension_server::download_extension(
             &real_ext_name,
             &ext_path,
-            remote_storage,
+            ext_remote_storage,
             &self.pgbin,
         )
         .await
diff --git a/compute_tools/src/config.rs b/compute_tools/src/config.rs
index bc48a2110dfd..a7ef8cea9289 100644
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -93,5 +93,25 @@ pub fn write_postgres_conf(
         writeln!(file, "neon.extension_server_port={}", port)?;
     }
 
+    // This is essential to keep this line at the end of the file,
+    // because it is intended to override any settings above.
+    writeln!(file, "include_if_exists = 'compute_ctl_temp_override.conf'")?;
+
+    Ok(())
+}
+
+/// create file compute_ctl_temp_override.conf in pgdata_dir
+/// add provided options to this file
+pub fn compute_ctl_temp_override_create(pgdata_path: &Path, options: &str) -> Result<()> {
+    let path = pgdata_path.join("compute_ctl_temp_override.conf");
+    let mut file = File::create(path)?;
+    write!(file, "{}", options)?;
+    Ok(())
+}
+
+/// remove file compute_ctl_temp_override.conf in pgdata_dir
+pub fn compute_ctl_temp_override_remove(pgdata_path: &Path) -> Result<()> {
+    let path = pgdata_path.join("compute_ctl_temp_override.conf");
+    std::fs::remove_file(path)?;
     Ok(())
 }
diff --git a/compute_tools/src/extension_server.rs b/compute_tools/src/extension_server.rs
index 9732d8adeace..2cec12119f79 100644
--- a/compute_tools/src/extension_server.rs
+++ b/compute_tools/src/extension_server.rs
@@ -71,18 +71,16 @@ More specifically, here is an example ext_index.json
     }
 }
 */
-use anyhow::Context;
 use anyhow::{self, Result};
+use anyhow::{bail, Context};
+use bytes::Bytes;
 use compute_api::spec::RemoteExtSpec;
 use regex::Regex;
 use remote_storage::*;
-use serde_json;
-use std::io::Read;
-use std::num::NonZeroUsize;
+use reqwest::StatusCode;
 use std::path::Path;
 use std::str;
 use tar::Archive;
-use tokio::io::AsyncReadExt;
 use tracing::info;
 use tracing::log::warn;
 use zstd::stream::read::Decoder;
@@ -138,23 +136,31 @@ fn parse_pg_version(human_version: &str) -> &str {
 pub async fn download_extension(
     ext_name: &str,
     ext_path: &RemotePath,
-    remote_storage: &GenericRemoteStorage,
+    ext_remote_storage: &str,
     pgbin: &str,
 ) -> Result<u64> {
     info!("Download extension {:?} from {:?}", ext_name, ext_path);
-    let mut download = remote_storage.download(ext_path).await?;
-    let mut download_buffer = Vec::new();
-    download
-        .download_stream
-        .read_to_end(&mut download_buffer)
-        .await?;
+
+    // TODO add retry logic
+    let download_buffer =
+        match download_extension_tar(ext_remote_storage, &ext_path.to_string()).await {
+            Ok(buffer) => buffer,
+            Err(error_message) => {
+                return Err(anyhow::anyhow!(
+                    "error downloading extension {:?}: {:?}",
+                    ext_name,
+                    error_message
+                ));
+            }
+        };
+
     let download_size = download_buffer.len() as u64;
+    info!("Download size {:?}", download_size);
     // it's unclear whether it is more performant to decompress into memory or not
     // TODO: decompressing into memory can be avoided
-    let mut decoder = Decoder::new(download_buffer.as_slice())?;
-    let mut decompress_buffer = Vec::new();
-    decoder.read_to_end(&mut decompress_buffer)?;
-    let mut archive = Archive::new(decompress_buffer.as_slice());
+    let decoder = Decoder::new(download_buffer.as_ref())?;
+    let mut archive = Archive::new(decoder);
+
     let unzip_dest = pgbin
         .strip_suffix("/bin/postgres")
         .expect("bad pgbin")
@@ -222,29 +228,32 @@ pub fn create_control_files(remote_extensions: &RemoteExtSpec, pgbin: &str) {
     }
 }
 
-// This function initializes the necessary structs to use remote storage
-pub fn init_remote_storage(remote_ext_config: &str) -> anyhow::Result<GenericRemoteStorage> {
-    #[derive(Debug, serde::Deserialize)]
-    struct RemoteExtJson {
-        bucket: String,
-        region: String,
-        endpoint: Option<String>,
-        prefix: Option<String>,
-    }
-    let remote_ext_json = serde_json::from_str::<RemoteExtJson>(remote_ext_config)?;
+// Do request to extension storage proxy, i.e.
+// curl http://pg-ext-s3-gateway/latest/v15/extensions/anon.tar.zst
+// using HHTP GET
+// and return the response body as bytes
+//
+async fn download_extension_tar(ext_remote_storage: &str, ext_path: &str) -> Result<Bytes> {
+    let uri = format!("{}/{}", ext_remote_storage, ext_path);
 
-    let config = S3Config {
-        bucket_name: remote_ext_json.bucket,
-        bucket_region: remote_ext_json.region,
-        prefix_in_bucket: remote_ext_json.prefix,
-        endpoint: remote_ext_json.endpoint,
-        concurrency_limit: NonZeroUsize::new(100).expect("100 != 0"),
-        max_keys_per_list_response: None,
-    };
-    let config = RemoteStorageConfig {
-        storage: RemoteStorageKind::AwsS3(config),
-    };
-    GenericRemoteStorage::from_config(&config)
+    info!("Download extension {:?} from uri {:?}", ext_path, uri);
+
+    let resp = reqwest::get(uri).await?;
+
+    match resp.status() {
+        StatusCode::OK => match resp.bytes().await {
+            Ok(resp) => {
+                info!("Download extension {:?} completed successfully", ext_path);
+                Ok(resp)
+            }
+            Err(e) => bail!("could not deserialize remote extension response: {}", e),
+        },
+        StatusCode::SERVICE_UNAVAILABLE => bail!("remote extension is temporarily unavailable"),
+        _ => bail!(
+            "unexpected remote extension response status code: {}",
+            resp.status()
+        ),
+    }
 }
 
 #[cfg(test)]
diff --git a/compute_tools/src/http/api.rs b/compute_tools/src/http/api.rs
index 8851be1ec109..fa2c4cff28d9 100644
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -123,7 +123,7 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
             }
         }
 
-        // download extension files from S3 on demand
+        // download extension files from remote extension storage on demand
         (&Method::POST, route) if route.starts_with("/extension_server/") => {
             info!("serving {:?} POST request", route);
             info!("req.uri {:?}", req.uri());
@@ -227,7 +227,7 @@ async fn handle_configure_request(
 
         let parsed_spec = match ParsedSpec::try_from(spec) {
             Ok(ps) => ps,
-            Err(msg) => return Err((msg, StatusCode::PRECONDITION_FAILED)),
+            Err(msg) => return Err((msg, StatusCode::BAD_REQUEST)),
         };
 
         // XXX: wrap state update under lock in code blocks. Otherwise,
diff --git a/compute_tools/src/http/openapi_spec.yaml b/compute_tools/src/http/openapi_spec.yaml
index dc26cc63eb8d..cedc6ece8f5e 100644
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -156,17 +156,17 @@ paths:
                 description: Error text or 'OK' if download succeeded.
                 example: "OK"
         400:
-        description: Request is invalid.
-        content:
-          application/json:
-            schema:
-              $ref: "#/components/schemas/GenericError"
+          description: Request is invalid.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/GenericError"
         500:
-        description: Extension download request failed.
-        content:
-          application/json:
-            schema:
-              $ref: "#/components/schemas/GenericError"
+          description: Extension download request failed.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/GenericError"
 
 components:
   securitySchemes:
diff --git a/compute_tools/src/spec.rs b/compute_tools/src/spec.rs
index 8c44c6d519f4..f98333d8bf0e 100644
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -118,19 +118,6 @@ pub fn get_spec_from_control_plane(
     spec
 }
 
-/// It takes cluster specification and does the following:
-/// - Serialize cluster config and put it into `postgresql.conf` completely rewriting the file.
-/// - Update `pg_hba.conf` to allow external connections.
-pub fn handle_configuration(spec: &ComputeSpec, pgdata_path: &Path) -> Result<()> {
-    // File `postgresql.conf` is no longer included into `basebackup`, so just
-    // always write all config into it creating new file.
-    config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec, None)?;
-
-    update_pg_hba(pgdata_path)?;
-
-    Ok(())
-}
-
 /// Check `pg_hba.conf` and update if needed to allow external connections.
 pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> {
     // XXX: consider making it a part of spec.json
diff --git a/control_plane/src/bin/attachment_service.rs b/control_plane/src/bin/attachment_service.rs
index 16577e27d699..be7cff352ca9 100644
--- a/control_plane/src/bin/attachment_service.rs
+++ b/control_plane/src/bin/attachment_service.rs
@@ -9,6 +9,7 @@ use clap::Parser;
 use hex::FromHex;
 use hyper::StatusCode;
 use hyper::{Body, Request, Response};
+use pageserver_api::shard::TenantShardId;
 use serde::{Deserialize, Serialize};
 use std::path::{Path, PathBuf};
 use std::{collections::HashMap, sync::Arc};
@@ -173,7 +174,8 @@ async fn handle_re_attach(mut req: Request<Body>) -> Result<Response<Body>, ApiE
         if state.pageserver == Some(reattach_req.node_id) {
             state.generation += 1;
             response.tenants.push(ReAttachResponseTenant {
-                id: *t,
+                // TODO(sharding): make this shard-aware
+                id: TenantShardId::unsharded(*t),
                 gen: state.generation,
             });
         }
@@ -196,7 +198,8 @@ async fn handle_validate(mut req: Request<Body>) -> Result<Response<Body>, ApiEr
     };
 
     for req_tenant in validate_req.tenants {
-        if let Some(tenant_state) = locked.tenants.get(&req_tenant.id) {
+        // TODO(sharding): make this shard-aware
+        if let Some(tenant_state) = locked.tenants.get(&req_tenant.id.tenant_id) {
             let valid = tenant_state.generation == req_tenant.gen;
             response.tenants.push(ValidateResponseTenant {
                 id: req_tenant.id,
diff --git a/control_plane/src/bin/neon_local.rs b/control_plane/src/bin/neon_local.rs
index 384c4ee56d4b..8d53a6a65867 100644
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -415,6 +415,7 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
                 None,
                 None,
                 Some(pg_version),
+                None,
             )?;
             let new_timeline_id = timeline_info.timeline_id;
             let last_record_lsn = timeline_info.last_record_lsn;
@@ -495,6 +496,7 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
                 None,
                 None,
                 Some(pg_version),
+                None,
             )?;
             let new_timeline_id = timeline_info.timeline_id;
 
@@ -582,6 +584,7 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
                 start_lsn,
                 Some(ancestor_timeline_id),
                 None,
+                None,
             )?;
             let new_timeline_id = timeline_info.timeline_id;
 
@@ -608,11 +611,9 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
     };
     let mut cplane = ComputeControlPlane::load(env.clone())?;
 
-    // All subcommands take an optional --tenant-id option
-    let tenant_id = get_tenant_id(sub_args, env)?;
-
     match sub_name {
         "list" => {
+            let tenant_id = get_tenant_id(sub_args, env)?;
             let timeline_infos = get_timeline_infos(env, &tenant_id).unwrap_or_else(|e| {
                 eprintln!("Failed to load timeline info: {}", e);
                 HashMap::new()
@@ -672,6 +673,7 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
             println!("{table}");
         }
         "create" => {
+            let tenant_id = get_tenant_id(sub_args, env)?;
             let branch_name = sub_args
                 .get_one::<String>("branch-name")
                 .map(|s| s.as_str())
@@ -716,6 +718,18 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                 (Some(_), true) => anyhow::bail!("cannot specify both lsn and hot-standby"),
             };
 
+            match (mode, hot_standby) {
+                (ComputeMode::Static(_), true) => {
+                    bail!("Cannot start a node in hot standby mode when it is already configured as a static replica")
+                }
+                (ComputeMode::Primary, true) => {
+                    bail!("Cannot start a node as a hot standby replica, it is already configured as primary node")
+                }
+                _ => {}
+            }
+
+            cplane.check_conflicting_endpoints(mode, tenant_id, timeline_id)?;
+
             cplane.new_endpoint(
                 &endpoint_id,
                 tenant_id,
@@ -728,8 +742,6 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
             )?;
         }
         "start" => {
-            let pg_port: Option<u16> = sub_args.get_one::<u16>("pg-port").copied();
-            let http_port: Option<u16> = sub_args.get_one::<u16>("http-port").copied();
             let endpoint_id = sub_args
                 .get_one::<String>("endpoint_id")
                 .ok_or_else(|| anyhow!("No endpoint ID was provided to start"))?;
@@ -758,80 +770,28 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                     env.safekeepers.iter().map(|sk| sk.id).collect()
                 };
 
-            let endpoint = cplane.endpoints.get(endpoint_id.as_str());
+            let endpoint = cplane
+                .endpoints
+                .get(endpoint_id.as_str())
+                .ok_or_else(|| anyhow::anyhow!("endpoint {endpoint_id} not found"))?;
+
+            cplane.check_conflicting_endpoints(
+                endpoint.mode,
+                endpoint.tenant_id,
+                endpoint.timeline_id,
+            )?;
 
             let ps_conf = env.get_pageserver_conf(pageserver_id)?;
             let auth_token = if matches!(ps_conf.pg_auth_type, AuthType::NeonJWT) {
-                let claims = Claims::new(Some(tenant_id), Scope::Tenant);
+                let claims = Claims::new(Some(endpoint.tenant_id), Scope::Tenant);
 
                 Some(env.generate_auth_token(&claims)?)
             } else {
                 None
             };
 
-            let hot_standby = sub_args
-                .get_one::<bool>("hot-standby")
-                .copied()
-                .unwrap_or(false);
-
-            if let Some(endpoint) = endpoint {
-                match (&endpoint.mode, hot_standby) {
-                    (ComputeMode::Static(_), true) => {
-                        bail!("Cannot start a node in hot standby mode when it is already configured as a static replica")
-                    }
-                    (ComputeMode::Primary, true) => {
-                        bail!("Cannot start a node as a hot standby replica, it is already configured as primary node")
-                    }
-                    _ => {}
-                }
-                println!("Starting existing endpoint {endpoint_id}...");
-                endpoint.start(&auth_token, safekeepers, remote_ext_config)?;
-            } else {
-                let branch_name = sub_args
-                    .get_one::<String>("branch-name")
-                    .map(|s| s.as_str())
-                    .unwrap_or(DEFAULT_BRANCH_NAME);
-                let timeline_id = env
-                    .get_branch_timeline_id(branch_name, tenant_id)
-                    .ok_or_else(|| {
-                        anyhow!("Found no timeline id for branch name '{branch_name}'")
-                    })?;
-                let lsn = sub_args
-                    .get_one::<String>("lsn")
-                    .map(|lsn_str| Lsn::from_str(lsn_str))
-                    .transpose()
-                    .context("Failed to parse Lsn from the request")?;
-                let pg_version = sub_args
-                    .get_one::<u32>("pg-version")
-                    .copied()
-                    .context("Failed to `pg-version` from the argument string")?;
-
-                let mode = match (lsn, hot_standby) {
-                    (Some(lsn), false) => ComputeMode::Static(lsn),
-                    (None, true) => ComputeMode::Replica,
-                    (None, false) => ComputeMode::Primary,
-                    (Some(_), true) => anyhow::bail!("cannot specify both lsn and hot-standby"),
-                };
-
-                // when used with custom port this results in non obvious behaviour
-                // port is remembered from first start command, i e
-                // start --port X
-                // stop
-                // start <-- will also use port X even without explicit port argument
-                println!("Starting new endpoint {endpoint_id} (PostgreSQL v{pg_version}) on timeline {timeline_id} ...");
-
-                let ep = cplane.new_endpoint(
-                    endpoint_id,
-                    tenant_id,
-                    timeline_id,
-                    pg_port,
-                    http_port,
-                    pg_version,
-                    mode,
-                    pageserver_id,
-                )?;
-                ep.start(&auth_token, safekeepers, remote_ext_config)?;
-            }
+            println!("Starting existing endpoint {endpoint_id}...");
+            endpoint.start(&auth_token, safekeepers, remote_ext_config)?;
         }
         "reconfigure" => {
             let endpoint_id = sub_args
@@ -1252,7 +1212,7 @@ fn cli() -> Command {
     let remote_ext_config_args = Arg::new("remote-ext-config")
         .long("remote-ext-config")
         .num_args(1)
-        .help("Configure the S3 bucket that we search for extensions in.")
+        .help("Configure the remote extensions storage proxy gateway to request for extensions.")
         .required(false);
 
     let lsn_arg = Arg::new("lsn")
@@ -1437,15 +1397,7 @@ fn cli() -> Command {
                 .subcommand(Command::new("start")
                     .about("Start postgres.\n If the endpoint doesn't exist yet, it is created.")
                     .arg(endpoint_id_arg.clone())
-                    .arg(tenant_id_arg.clone())
-                    .arg(branch_name_arg.clone())
-                    .arg(timeline_id_arg.clone())
-                    .arg(lsn_arg)
-                    .arg(pg_port_arg)
-                    .arg(http_port_arg)
                     .arg(endpoint_pageserver_id_arg.clone())
-                    .arg(pg_version_arg)
-                    .arg(hot_standby_arg)
                     .arg(safekeepers_arg)
                     .arg(remote_ext_config_args)
                 )
@@ -1458,7 +1410,6 @@ fn cli() -> Command {
                 .subcommand(
                     Command::new("stop")
                     .arg(endpoint_id_arg)
-                    .arg(tenant_id_arg.clone())
                     .arg(
                         Arg::new("destroy")
                             .help("Also delete data directory (now optional, should be default in future)")
diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs
index 4443fd870432..12b12507647b 100644
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -45,6 +45,7 @@ use std::sync::Arc;
 use std::time::Duration;
 
 use anyhow::{anyhow, bail, Context, Result};
+use compute_api::spec::RemoteExtSpec;
 use serde::{Deserialize, Serialize};
 use utils::id::{NodeId, TenantId, TimelineId};
 
@@ -124,6 +125,7 @@ impl ComputeControlPlane {
         let http_port = http_port.unwrap_or_else(|| self.get_port() + 1);
         let pageserver =
             PageServerNode::from_env(&self.env, self.env.get_pageserver_conf(pageserver_id)?);
+
         let ep = Arc::new(Endpoint {
             endpoint_id: endpoint_id.to_owned(),
             pg_address: SocketAddr::new("127.0.0.1".parse().unwrap(), pg_port),
@@ -168,6 +170,30 @@ impl ComputeControlPlane {
 
         Ok(ep)
     }
+
+    pub fn check_conflicting_endpoints(
+        &self,
+        mode: ComputeMode,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+    ) -> Result<()> {
+        if matches!(mode, ComputeMode::Primary) {
+            // this check is not complete, as you could have a concurrent attempt at
+            // creating another primary, both reading the state before checking it here,
+            // but it's better than nothing.
+            let mut duplicates = self.endpoints.iter().filter(|(_k, v)| {
+                v.tenant_id == tenant_id
+                    && v.timeline_id == timeline_id
+                    && v.mode == mode
+                    && v.status() != "stopped"
+            });
+
+            if let Some((key, _)) = duplicates.next() {
+                bail!("attempting to create a duplicate primary endpoint on tenant {tenant_id}, timeline {timeline_id}: endpoint {key:?} exists already. please don't do this, it is not supported.");
+            }
+        }
+        Ok(())
+    }
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -476,6 +502,18 @@ impl Endpoint {
             }
         }
 
+        // check for file remote_extensions_spec.json
+        // if it is present, read it and pass to compute_ctl
+        let remote_extensions_spec_path = self.endpoint_path().join("remote_extensions_spec.json");
+        let remote_extensions_spec = std::fs::File::open(remote_extensions_spec_path);
+        let remote_extensions: Option<RemoteExtSpec>;
+
+        if let Ok(spec_file) = remote_extensions_spec {
+            remote_extensions = serde_json::from_reader(spec_file).ok();
+        } else {
+            remote_extensions = None;
+        };
+
         // Create spec file
         let spec = ComputeSpec {
             skip_pg_catalog_updates: self.skip_pg_catalog_updates,
@@ -497,7 +535,7 @@ impl Endpoint {
             pageserver_connstring: Some(pageserver_connstring),
             safekeeper_connstrings,
             storage_auth_token: auth_token.clone(),
-            remote_extensions: None,
+            remote_extensions,
         };
         let spec_path = self.endpoint_path().join("spec.json");
         std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;
diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs
index 237df485432c..96a41874fdf0 100644
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -11,6 +11,7 @@ use std::io::{BufReader, Write};
 use std::num::NonZeroU64;
 use std::path::PathBuf;
 use std::process::{Child, Command};
+use std::time::Duration;
 use std::{io, result};
 
 use anyhow::{bail, Context};
@@ -522,19 +523,24 @@ impl PageServerNode {
         &self,
         tenant_id: TenantId,
         config: LocationConfig,
+        flush_ms: Option<Duration>,
     ) -> anyhow::Result<()> {
         let req_body = TenantLocationConfigRequest { tenant_id, config };
 
-        self.http_request(
-            Method::PUT,
-            format!(
-                "{}/tenant/{}/location_config",
-                self.http_base_url, tenant_id
-            ),
-        )?
-        .json(&req_body)
-        .send()?
-        .error_from_body()?;
+        let path = format!(
+            "{}/tenant/{}/location_config",
+            self.http_base_url, tenant_id
+        );
+        let path = if let Some(flush_ms) = flush_ms {
+            format!("{}?flush_ms={}", path, flush_ms.as_millis())
+        } else {
+            path
+        };
+
+        self.http_request(Method::PUT, path)?
+            .json(&req_body)
+            .send()?
+            .error_from_body()?;
 
         Ok(())
     }
@@ -559,6 +565,7 @@ impl PageServerNode {
         ancestor_start_lsn: Option<Lsn>,
         ancestor_timeline_id: Option<TimelineId>,
         pg_version: Option<u32>,
+        existing_initdb_timeline_id: Option<TimelineId>,
     ) -> anyhow::Result<TimelineInfo> {
         // If timeline ID was not specified, generate one
         let new_timeline_id = new_timeline_id.unwrap_or(TimelineId::generate());
@@ -572,6 +579,7 @@ impl PageServerNode {
             ancestor_start_lsn,
             ancestor_timeline_id,
             pg_version,
+            existing_initdb_timeline_id,
         })
         .send()?
         .error_from_body()?
diff --git a/control_plane/src/tenant_migration.rs b/control_plane/src/tenant_migration.rs
index d28d1f9fe846..c0c44e279f32 100644
--- a/control_plane/src/tenant_migration.rs
+++ b/control_plane/src/tenant_migration.rs
@@ -14,7 +14,6 @@ use pageserver_api::models::{
 use std::collections::HashMap;
 use std::time::Duration;
 use utils::{
-    generation::Generation,
     id::{TenantId, TimelineId},
     lsn::Lsn,
 };
@@ -93,6 +92,22 @@ pub fn migrate_tenant(
     // Get a new generation
     let attachment_service = AttachmentService::from_env(env);
 
+    fn build_location_config(
+        mode: LocationConfigMode,
+        generation: Option<u32>,
+        secondary_conf: Option<LocationConfigSecondary>,
+    ) -> LocationConfig {
+        LocationConfig {
+            mode,
+            generation,
+            secondary_conf,
+            tenant_conf: TenantConfig::default(),
+            shard_number: 0,
+            shard_count: 0,
+            shard_stripe_size: 0,
+        }
+    }
+
     let previous = attachment_service.inspect(tenant_id)?;
     let mut baseline_lsns = None;
     if let Some((generation, origin_ps_id)) = &previous {
@@ -101,40 +116,26 @@ pub fn migrate_tenant(
         if origin_ps_id == &dest_ps.conf.id {
             println!("🔁 Already attached to {origin_ps_id}, freshening...");
             let gen = attachment_service.attach_hook(tenant_id, dest_ps.conf.id)?;
-            let dest_conf = LocationConfig {
-                mode: LocationConfigMode::AttachedSingle,
-                generation: gen.map(Generation::new),
-                secondary_conf: None,
-                tenant_conf: TenantConfig::default(),
-            };
-            dest_ps.location_config(tenant_id, dest_conf)?;
+            let dest_conf = build_location_config(LocationConfigMode::AttachedSingle, gen, None);
+            dest_ps.location_config(tenant_id, dest_conf, None)?;
             println!("✅ Migration complete");
             return Ok(());
         }
 
         println!("🔁 Switching origin pageserver {origin_ps_id} to stale mode");
 
-        let stale_conf = LocationConfig {
-            mode: LocationConfigMode::AttachedStale,
-            generation: Some(Generation::new(*generation)),
-            secondary_conf: None,
-            tenant_conf: TenantConfig::default(),
-        };
-        origin_ps.location_config(tenant_id, stale_conf)?;
+        let stale_conf =
+            build_location_config(LocationConfigMode::AttachedStale, Some(*generation), None);
+        origin_ps.location_config(tenant_id, stale_conf, Some(Duration::from_secs(10)))?;
 
         baseline_lsns = Some(get_lsns(tenant_id, &origin_ps)?);
     }
 
     let gen = attachment_service.attach_hook(tenant_id, dest_ps.conf.id)?;
-    let dest_conf = LocationConfig {
-        mode: LocationConfigMode::AttachedMulti,
-        generation: gen.map(Generation::new),
-        secondary_conf: None,
-        tenant_conf: TenantConfig::default(),
-    };
+    let dest_conf = build_location_config(LocationConfigMode::AttachedMulti, gen, None);
 
     println!("🔁 Attaching to pageserver {}", dest_ps.conf.id);
-    dest_ps.location_config(tenant_id, dest_conf)?;
+    dest_ps.location_config(tenant_id, dest_conf, None)?;
 
     if let Some(baseline) = baseline_lsns {
         println!("🕑 Waiting for LSN to catch up...");
@@ -170,31 +171,25 @@ pub fn migrate_tenant(
         }
 
         // Downgrade to a secondary location
-        let secondary_conf = LocationConfig {
-            mode: LocationConfigMode::Secondary,
-            generation: None,
-            secondary_conf: Some(LocationConfigSecondary { warm: true }),
-            tenant_conf: TenantConfig::default(),
-        };
+        let secondary_conf = build_location_config(
+            LocationConfigMode::Secondary,
+            None,
+            Some(LocationConfigSecondary { warm: true }),
+        );
 
         println!(
             "💤 Switching to secondary mode on pageserver {}",
             other_ps.conf.id
         );
-        other_ps.location_config(tenant_id, secondary_conf)?;
+        other_ps.location_config(tenant_id, secondary_conf, None)?;
     }
 
     println!(
         "🔁 Switching to AttachedSingle mode on pageserver {}",
         dest_ps.conf.id
     );
-    let dest_conf = LocationConfig {
-        mode: LocationConfigMode::AttachedSingle,
-        generation: gen.map(Generation::new),
-        secondary_conf: None,
-        tenant_conf: TenantConfig::default(),
-    };
-    dest_ps.location_config(tenant_id, dest_conf)?;
+    let dest_conf = build_location_config(LocationConfigMode::AttachedSingle, gen, None);
+    dest_ps.location_config(tenant_id, dest_conf, None)?;
 
     println!("✅ Migration complete");
 
diff --git a/docs/rfcs/029-pageserver-wal-disaster-recovery.md b/docs/rfcs/029-pageserver-wal-disaster-recovery.md
new file mode 100644
index 000000000000..15ebd72bfe77
--- /dev/null
+++ b/docs/rfcs/029-pageserver-wal-disaster-recovery.md
@@ -0,0 +1,205 @@
+# Name
+
+Created on: 2023-09-08
+Author: Arpad Müller
+
+## Summary
+
+Enable the pageserver to recover from data corruption events by implementing
+a feature to re-apply historic WAL records in parallel to the already occurring
+WAL replay.
+
+The feature is outside of the user-visible backup and history story, and only
+serves as a second-level backup for the case that there is a bug in the
+pageservers that corrupted the served pages.
+
+The RFC proposes the addition of two new features:
+* recover a broken branch from WAL (downtime is allowed)
+* a test recovery system to recover random branches to make sure recovery works
+
+## Motivation
+
+The historic WAL is currently stored in S3 even after it has been replayed by
+the pageserver and thus been integrated into the pageserver's storage system.
+This is done to defend from data corruption failures inside the pageservers.
+
+However, application of this WAL in the disaster recovery setting is currently
+very manual and we want to automate this to make it easier.
+
+### Use cases
+
+There are various use cases for this feature, like:
+
+* The main motivation is replaying in the instance of pageservers corrupting
+  data.
+* We might want to, beyond the user-visible history features, through our
+  support channels and upon customer request, in select instances, recover
+  historic versions beyond the range of history that we officially support.
+* Running the recovery process in the background for random tenant timelines
+  to figure out if there was a corruption of data (we would compare with what
+  the pageserver stores for the "official" timeline).
+* Using the WAL to arrive at historic pages we can then back up to S3 so that
+  WAL itself can be discarded, or at least not used for future replays.
+  Again, this sounds a lot like what the pageserver is already doing, but the
+  point is to provide a fallback to the service provided by the pageserver.
+
+## Design
+
+### Design constraints
+
+The main design constraint is that the feature needs to be *simple* enough that
+the number of bugs are as low, and reliability as high as possible: the main
+goal of this endeavour is to achieve higher correctness than the pageserver.
+
+For the background process, we cannot afford a downtime of the timeline that is
+being cloned, as we don't want to restrict ourselves to offline tenants only.
+In the scenario where we want to recover from disasters or roll back to a
+historic lsn through support staff, downtimes are more affordable, and
+inevitable if the original had been subject to the corruption. Ideally, the
+two code paths would share code, so the solution would be designed for not
+requiring downtimes.
+
+### API endpoint changes
+
+This RFC proposes two API endpoint changes in the safekeeper and the
+pageserver.
+
+Remember, the pageserver timeline API creation endpoint is to this URL:
+
+```
+/v1/tenant/{tenant_id}/timeline/
+```
+
+Where `{tenant_id}` is the ID of the tenant the timeline is created for,
+and specified as part of the URL. The timeline ID is passed via the POST
+request body as the only required parameter `new_timeline_id`.
+
+This proposal adds one optional parameter called
+`existing_initdb_timeline_id` to the request's json body. If the parameter
+is not specified, behaviour should be as existing, so the pageserver runs
+initdb.
+If the parameter is specified, it is expected to point to a timeline ID.
+In fact that ID might match `new_timeline_id`, what's important is that
+S3 storage contains a matching initdb under the URL matching the given
+tenant and timeline.
+
+Having both `ancestor_timeline_id` and `existing_initdb_timeline_id`
+specified is illegal and will yield in an HTTP error. This feature is
+only meant for the "main" branch that doesn't have any ancestors
+of its own, as only here initdb is relevant.
+
+For the safekeeper, we propose the addition of the following copy endpoint:
+
+```
+/v1/tenant/{tenant_id}/timeline/{source_timeline_id}/copy
+```
+it is meant for POST requests with json, and the two URL parameters
+`tenant_id` and `source_timeline_id`. The json request body contains
+the two required parameters `target_timeline_id` and `until_lsn`.
+
+After invoking, the copy endpoint starts a copy process of the WAL from
+the source ID to the target ID. The lsn is updated according to the
+progress of the API call.
+
+### Higher level features
+
+We want the API changes to support the following higher level features:
+
+* recovery-after-corruption DR of the main timeline of a tenant. This
+  feature allows for downtime.
+* test DR of the main timeline into a special copy timeline. this feature
+  is meant to run against selected production tenants in the background,
+  without the user noticing, so it does not allow for downtime.
+
+The recovery-after-corruption DR only needs the pageserver changes.
+It works as follows:
+
+* delete the timeline from the pageservers via timeline deletion API
+* re-create it via timeline creation API (same ID as before) and set
+  `existing_initdb_timeline_id` to the same timeline ID
+
+The test DR requires also the copy primitive and works as follows:
+
+* copy the WAL of the timeline to a new place
+* create a new timeline for the tenant
+
+## Non Goals
+
+At the danger of being repetitive, the main goal of this feature is to be a
+backup method, so reliability is very important. This implies that other
+aspects like performance or space reduction are less important.
+
+### Corrupt WAL
+
+The process suggested by this RFC assumes that the WAL is free of corruption.
+In some instances, corruption can make it into WAL, like for example when
+higher level components like postgres or the application first read corrupt
+data, and then execute a write with data derived from that earlier read. That
+written data might then contain the corruption.
+
+Common use cases can hit this quite easily. For example, an application reads
+some counter, increments it, and then writes the new counter value to the
+database.
+On a lower level, the compute might put FPIs (Full Page Images) into the WAL,
+which have corrupt data for rows unrelated to the write operation at hand.
+
+Separating corrupt writes from non-corrupt ones is a hard problem in general,
+and if the application was involved in making the corrupt write, a recovery
+would also involve the application. Therefore, corruption that has made it into
+the WAL is outside of the scope of this feature. However, the WAL replay can be
+issued to right before the point in time where the corruption occured. Then the
+data loss is isolated to post-corruption writes only.
+
+## Impacted components (e.g. pageserver, safekeeper, console, etc)
+
+Most changes would happen to the pageservers.
+For the higher level features, maybe other components like the console would
+be involved.
+
+We need to make sure that the shadow timelines are not subject to the usual
+limits and billing we apply to existing timelines.
+
+## Proposed implementation
+
+The first problem to keep in mind is the reproducability of `initdb`.
+So an initial step would be to upload `initdb` snapshots to S3.
+
+After that, we'd have the endpoint spawn a background process which
+performs the replay of the WAL to that new timeline. This process should
+follow the existing workflows as closely as possible, just using the
+WAL records of a different timeline.
+
+The timeline created will be in a special state that solely looks for WAL
+entries of the timeline it is trying to copy. Once the target LSN is reached,
+it turns into a normal timeline that also accepts writes to its own
+timeline ID.
+
+### Scalability
+
+For now we want to run this entire process on a single node, and as
+it is by nature linear, it's hard to parallelize. However, for the
+verification workloads, we can easily start the WAL replay in parallel
+for different points in time. This is valuable especially for tenants
+with large WAL records.
+
+Compare this with the tricks to make addition circuits execute with
+lower latency by making them perform the addition for both possible
+values of the carry bit, and then, in a second step, taking the
+result for the carry bit that was actually obtained.
+
+The other scalability dimension to consider is the WAL length, which
+is a growing question as tenants accumulate changes. There are
+possible approaches to this, including creating snapshots of the
+page files and uploading them to S3, but if we do this for every single
+branch, we lose the cheap branching property.
+
+### Implementation by component
+
+The proposed changes for the various components of the neon architecture
+are written up in this notion page:
+
+https://www.notion.so/neondatabase/Pageserver-disaster-recovery-one-pager-4ecfb5df16ce4f6bbfc3817ed1a6cbb2
+
+### Unresolved questions
+
+none known (outside of the mentioned ones).
diff --git a/libs/pageserver_api/Cargo.toml b/libs/pageserver_api/Cargo.toml
index df9796b03901..4d08d78e8741 100644
--- a/libs/pageserver_api/Cargo.toml
+++ b/libs/pageserver_api/Cargo.toml
@@ -18,6 +18,7 @@ enum-map.workspace = true
 strum.workspace = true
 strum_macros.workspace = true
 hex.workspace = true
+thiserror.workspace = true
 
 workspace_hack.workspace = true
 
diff --git a/libs/pageserver_api/src/control_api.rs b/libs/pageserver_api/src/control_api.rs
index 8232e81b9887..0acc3a7bb0ae 100644
--- a/libs/pageserver_api/src/control_api.rs
+++ b/libs/pageserver_api/src/control_api.rs
@@ -4,7 +4,9 @@
 //! See docs/rfcs/025-generation-numbers.md
 
 use serde::{Deserialize, Serialize};
-use utils::id::{NodeId, TenantId};
+use utils::id::NodeId;
+
+use crate::shard::TenantShardId;
 
 #[derive(Serialize, Deserialize)]
 pub struct ReAttachRequest {
@@ -13,7 +15,7 @@ pub struct ReAttachRequest {
 
 #[derive(Serialize, Deserialize)]
 pub struct ReAttachResponseTenant {
-    pub id: TenantId,
+    pub id: TenantShardId,
     pub gen: u32,
 }
 
@@ -24,7 +26,7 @@ pub struct ReAttachResponse {
 
 #[derive(Serialize, Deserialize)]
 pub struct ValidateRequestTenant {
-    pub id: TenantId,
+    pub id: TenantShardId,
     pub gen: u32,
 }
 
@@ -40,6 +42,6 @@ pub struct ValidateResponse {
 
 #[derive(Serialize, Deserialize)]
 pub struct ValidateResponseTenant {
-    pub id: TenantId,
+    pub id: TenantShardId,
     pub valid: bool,
 }
diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs
index 71e32e479f20..2234a06501a9 100644
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -10,7 +10,6 @@ use serde_with::serde_as;
 use strum_macros;
 use utils::{
     completion,
-    generation::Generation,
     history_buffer::HistoryBufferWithDropCounter,
     id::{NodeId, TenantId, TimelineId},
     lsn::Lsn,
@@ -180,6 +179,8 @@ pub struct TimelineCreateRequest {
     #[serde(default)]
     pub ancestor_timeline_id: Option<TimelineId>,
     #[serde(default)]
+    pub existing_initdb_timeline_id: Option<TimelineId>,
+    #[serde(default)]
     pub ancestor_start_lsn: Option<Lsn>,
     pub pg_version: Option<u32>,
 }
@@ -262,10 +263,19 @@ pub struct LocationConfig {
     pub mode: LocationConfigMode,
     /// If attaching, in what generation?
     #[serde(default)]
-    pub generation: Option<Generation>,
+    pub generation: Option<u32>,
     #[serde(default)]
     pub secondary_conf: Option<LocationConfigSecondary>,
 
+    // Shard parameters: if shard_count is nonzero, then other shard_* fields
+    // must be set accurately.
+    #[serde(default)]
+    pub shard_number: u8,
+    #[serde(default)]
+    pub shard_count: u8,
+    #[serde(default)]
+    pub shard_stripe_size: u32,
+
     // If requesting mode `Secondary`, configuration for that.
     // Custom storage configuration for the tenant, if any
     pub tenant_conf: TenantConfig,
@@ -306,25 +316,7 @@ impl std::ops::Deref for TenantConfigRequest {
 
 impl TenantConfigRequest {
     pub fn new(tenant_id: TenantId) -> TenantConfigRequest {
-        let config = TenantConfig {
-            checkpoint_distance: None,
-            checkpoint_timeout: None,
-            compaction_target_size: None,
-            compaction_period: None,
-            compaction_threshold: None,
-            gc_horizon: None,
-            gc_period: None,
-            image_creation_threshold: None,
-            pitr_interval: None,
-            walreceiver_connect_timeout: None,
-            lagging_wal_timeout: None,
-            max_lsn_wal_lag: None,
-            trace_read_requests: None,
-            eviction_policy: None,
-            min_resident_size_override: None,
-            evictions_low_residence_duration_metric_threshold: None,
-            gc_feedback: None,
-        };
+        let config = TenantConfig::default();
         TenantConfigRequest { tenant_id, config }
     }
 }
@@ -392,7 +384,9 @@ pub struct TimelineInfo {
     /// The LSN that we are advertizing to safekeepers
     pub remote_consistent_lsn_visible: Lsn,
 
-    pub current_logical_size: Option<u64>, // is None when timeline is Unloaded
+    pub current_logical_size: u64,
+    pub current_logical_size_is_accurate: bool,
+
     /// Sum of the size of all layer files.
     /// If a layer is present in both local FS and S3, it counts only once.
     pub current_physical_size: Option<u64>, // is None when timeline is Unloaded
diff --git a/libs/pageserver_api/src/shard.rs b/libs/pageserver_api/src/shard.rs
index 32a834a26a5e..3510b4dbcadd 100644
--- a/libs/pageserver_api/src/shard.rs
+++ b/libs/pageserver_api/src/shard.rs
@@ -2,12 +2,13 @@ use std::{ops::RangeInclusive, str::FromStr};
 
 use hex::FromHex;
 use serde::{Deserialize, Serialize};
+use thiserror;
 use utils::id::TenantId;
 
-#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug)]
+#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
 pub struct ShardNumber(pub u8);
 
-#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug)]
+#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
 pub struct ShardCount(pub u8);
 
 impl ShardCount {
@@ -38,7 +39,7 @@ impl ShardNumber {
 /// Note that the binary encoding is _not_ backward compatible, because
 /// at the time sharding is introduced, there are no existing binary structures
 /// containing TenantId that we need to handle.
-#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy)]
+#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
 pub struct TenantShardId {
     pub tenant_id: TenantId,
     pub shard_number: ShardNumber,
@@ -139,6 +140,89 @@ impl From<[u8; 18]> for TenantShardId {
     }
 }
 
+/// For use within the context of a particular tenant, when we need to know which
+/// shard we're dealing with, but do not need to know the full ShardIdentity (because
+/// we won't be doing any page->shard mapping), and do not need to know the fully qualified
+/// TenantShardId.
+#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy)]
+pub struct ShardIndex {
+    pub shard_number: ShardNumber,
+    pub shard_count: ShardCount,
+}
+
+impl ShardIndex {
+    pub fn new(number: ShardNumber, count: ShardCount) -> Self {
+        Self {
+            shard_number: number,
+            shard_count: count,
+        }
+    }
+    pub fn unsharded() -> Self {
+        Self {
+            shard_number: ShardNumber(0),
+            shard_count: ShardCount(0),
+        }
+    }
+
+    pub fn is_unsharded(&self) -> bool {
+        self.shard_number == ShardNumber(0) && self.shard_count == ShardCount(0)
+    }
+
+    /// For use in constructing remote storage paths: concatenate this with a TenantId
+    /// to get a fully qualified TenantShardId.
+    ///
+    /// Backward compat: this function returns an empty string if Self::is_unsharded, such
+    /// that the legacy pre-sharding remote key format is preserved.
+    pub fn get_suffix(&self) -> String {
+        if self.is_unsharded() {
+            "".to_string()
+        } else {
+            format!("-{:02x}{:02x}", self.shard_number.0, self.shard_count.0)
+        }
+    }
+}
+
+impl std::fmt::Display for ShardIndex {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{:02x}{:02x}", self.shard_number.0, self.shard_count.0)
+    }
+}
+
+impl std::fmt::Debug for ShardIndex {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        // Debug is the same as Display: the compact hex representation
+        write!(f, "{}", self)
+    }
+}
+
+impl std::str::FromStr for ShardIndex {
+    type Err = hex::FromHexError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        // Expect format: 1 byte shard number, 1 byte shard count
+        if s.len() == 4 {
+            let bytes = s.as_bytes();
+            let mut shard_parts: [u8; 2] = [0u8; 2];
+            hex::decode_to_slice(bytes, &mut shard_parts)?;
+            Ok(Self {
+                shard_number: ShardNumber(shard_parts[0]),
+                shard_count: ShardCount(shard_parts[1]),
+            })
+        } else {
+            Err(hex::FromHexError::InvalidStringLength)
+        }
+    }
+}
+
+impl From<[u8; 2]> for ShardIndex {
+    fn from(b: [u8; 2]) -> Self {
+        Self {
+            shard_number: ShardNumber(b[0]),
+            shard_count: ShardCount(b[1]),
+        }
+    }
+}
+
 impl Serialize for TenantShardId {
     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
     where
@@ -209,6 +293,151 @@ impl<'de> Deserialize<'de> for TenantShardId {
     }
 }
 
+/// Stripe size in number of pages
+#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]
+pub struct ShardStripeSize(pub u32);
+
+/// Layout version: for future upgrades where we might change how the key->shard mapping works
+#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]
+pub struct ShardLayout(u8);
+
+const LAYOUT_V1: ShardLayout = ShardLayout(1);
+
+/// Default stripe size in pages: 256MiB divided by 8kiB page size.
+const DEFAULT_STRIPE_SIZE: ShardStripeSize = ShardStripeSize(256 * 1024 / 8);
+
+/// The ShardIdentity contains the information needed for one member of map
+/// to resolve a key to a shard, and then check whether that shard is ==self.
+#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]
+pub struct ShardIdentity {
+    pub layout: ShardLayout,
+    pub number: ShardNumber,
+    pub count: ShardCount,
+    pub stripe_size: ShardStripeSize,
+}
+
+#[derive(thiserror::Error, Debug, PartialEq, Eq)]
+pub enum ShardConfigError {
+    #[error("Invalid shard count")]
+    InvalidCount,
+    #[error("Invalid shard number")]
+    InvalidNumber,
+    #[error("Invalid stripe size")]
+    InvalidStripeSize,
+}
+
+impl ShardIdentity {
+    /// An identity with number=0 count=0 is a "none" identity, which represents legacy
+    /// tenants.  Modern single-shard tenants should not use this: they should
+    /// have number=0 count=1.
+    pub fn unsharded() -> Self {
+        Self {
+            number: ShardNumber(0),
+            count: ShardCount(0),
+            layout: LAYOUT_V1,
+            stripe_size: DEFAULT_STRIPE_SIZE,
+        }
+    }
+
+    pub fn is_unsharded(&self) -> bool {
+        self.number == ShardNumber(0) && self.count == ShardCount(0)
+    }
+
+    /// Count must be nonzero, and number must be < count. To construct
+    /// the legacy case (count==0), use Self::unsharded instead.
+    pub fn new(
+        number: ShardNumber,
+        count: ShardCount,
+        stripe_size: ShardStripeSize,
+    ) -> Result<Self, ShardConfigError> {
+        if count.0 == 0 {
+            Err(ShardConfigError::InvalidCount)
+        } else if number.0 > count.0 - 1 {
+            Err(ShardConfigError::InvalidNumber)
+        } else if stripe_size.0 == 0 {
+            Err(ShardConfigError::InvalidStripeSize)
+        } else {
+            Ok(Self {
+                number,
+                count,
+                layout: LAYOUT_V1,
+                stripe_size,
+            })
+        }
+    }
+}
+
+impl Serialize for ShardIndex {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        if serializer.is_human_readable() {
+            serializer.collect_str(self)
+        } else {
+            // Binary encoding is not used in index_part.json, but is included in anticipation of
+            // switching various structures (e.g. inter-process communication, remote metadata) to more
+            // compact binary encodings in future.
+            let mut packed: [u8; 2] = [0; 2];
+            packed[0] = self.shard_number.0;
+            packed[1] = self.shard_count.0;
+            packed.serialize(serializer)
+        }
+    }
+}
+
+impl<'de> Deserialize<'de> for ShardIndex {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        struct IdVisitor {
+            is_human_readable_deserializer: bool,
+        }
+
+        impl<'de> serde::de::Visitor<'de> for IdVisitor {
+            type Value = ShardIndex;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
+                if self.is_human_readable_deserializer {
+                    formatter.write_str("value in form of hex string")
+                } else {
+                    formatter.write_str("value in form of integer array([u8; 2])")
+                }
+            }
+
+            fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
+            where
+                A: serde::de::SeqAccess<'de>,
+            {
+                let s = serde::de::value::SeqAccessDeserializer::new(seq);
+                let id: [u8; 2] = Deserialize::deserialize(s)?;
+                Ok(ShardIndex::from(id))
+            }
+
+            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
+            where
+                E: serde::de::Error,
+            {
+                ShardIndex::from_str(v).map_err(E::custom)
+            }
+        }
+
+        if deserializer.is_human_readable() {
+            deserializer.deserialize_str(IdVisitor {
+                is_human_readable_deserializer: true,
+            })
+        } else {
+            deserializer.deserialize_tuple(
+                2,
+                IdVisitor {
+                    is_human_readable_deserializer: false,
+                },
+            )
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use std::str::FromStr;
@@ -318,4 +547,66 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn shard_identity_validation() -> Result<(), ShardConfigError> {
+        // Happy cases
+        ShardIdentity::new(ShardNumber(0), ShardCount(1), DEFAULT_STRIPE_SIZE)?;
+        ShardIdentity::new(ShardNumber(0), ShardCount(1), ShardStripeSize(1))?;
+        ShardIdentity::new(ShardNumber(254), ShardCount(255), ShardStripeSize(1))?;
+
+        assert_eq!(
+            ShardIdentity::new(ShardNumber(0), ShardCount(0), DEFAULT_STRIPE_SIZE),
+            Err(ShardConfigError::InvalidCount)
+        );
+        assert_eq!(
+            ShardIdentity::new(ShardNumber(10), ShardCount(10), DEFAULT_STRIPE_SIZE),
+            Err(ShardConfigError::InvalidNumber)
+        );
+        assert_eq!(
+            ShardIdentity::new(ShardNumber(11), ShardCount(10), DEFAULT_STRIPE_SIZE),
+            Err(ShardConfigError::InvalidNumber)
+        );
+        assert_eq!(
+            ShardIdentity::new(ShardNumber(255), ShardCount(255), DEFAULT_STRIPE_SIZE),
+            Err(ShardConfigError::InvalidNumber)
+        );
+        assert_eq!(
+            ShardIdentity::new(ShardNumber(0), ShardCount(1), ShardStripeSize(0)),
+            Err(ShardConfigError::InvalidStripeSize)
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn shard_index_human_encoding() -> Result<(), hex::FromHexError> {
+        let example = ShardIndex {
+            shard_number: ShardNumber(13),
+            shard_count: ShardCount(17),
+        };
+        let expected: String = "0d11".to_string();
+        let encoded = format!("{example}");
+        assert_eq!(&encoded, &expected);
+
+        let decoded = ShardIndex::from_str(&encoded)?;
+        assert_eq!(example, decoded);
+        Ok(())
+    }
+
+    #[test]
+    fn shard_index_binary_encoding() -> Result<(), hex::FromHexError> {
+        let example = ShardIndex {
+            shard_number: ShardNumber(13),
+            shard_count: ShardCount(17),
+        };
+        let expected: [u8; 2] = [0x0d, 0x11];
+
+        let encoded = bincode::serialize(&example).unwrap();
+        assert_eq!(Hex(&encoded), Hex(&expected));
+        let decoded = bincode::deserialize(&encoded).unwrap();
+        assert_eq!(example, decoded);
+
+        Ok(())
+    }
 }
diff --git a/libs/remote_storage/Cargo.toml b/libs/remote_storage/Cargo.toml
index d7bcce28cb48..e8bfc005d32d 100644
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -9,8 +9,7 @@ anyhow.workspace = true
 async-trait.workspace = true
 once_cell.workspace = true
 aws-smithy-async.workspace = true
-aws-smithy-http.workspace = true
-aws-types.workspace = true
+aws-smithy-types.workspace = true
 aws-config.workspace = true
 aws-sdk-s3.workspace = true
 aws-credential-types.workspace = true
diff --git a/libs/remote_storage/src/s3_bucket.rs b/libs/remote_storage/src/s3_bucket.rs
index ab3fd3fe629f..0cb73f73b77d 100644
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -14,18 +14,20 @@ use aws_config::{
     provider_config::ProviderConfig,
     retry::{RetryConfigBuilder, RetryMode},
     web_identity_token::WebIdentityTokenCredentialsProvider,
+    BehaviorVersion,
 };
-use aws_credential_types::cache::CredentialsCache;
+use aws_credential_types::provider::SharedCredentialsProvider;
 use aws_sdk_s3::{
-    config::{AsyncSleep, Config, Region, SharedAsyncSleep},
+    config::{AsyncSleep, Builder, IdentityCache, Region, SharedAsyncSleep},
     error::SdkError,
     operation::get_object::GetObjectError,
-    primitives::ByteStream,
     types::{Delete, ObjectIdentifier},
     Client,
 };
 use aws_smithy_async::rt::sleep::TokioSleep;
-use aws_smithy_http::body::SdkBody;
+
+use aws_smithy_types::body::SdkBody;
+use aws_smithy_types::byte_stream::ByteStream;
 use hyper::Body;
 use scopeguard::ScopeGuard;
 use tokio::io::{self, AsyncRead};
@@ -78,7 +80,6 @@ impl S3Bucket {
             // needed to access remote extensions bucket
             .or_else("token", {
                 let provider_conf = ProviderConfig::without_region().with_region(region.clone());
-
                 WebIdentityTokenCredentialsProvider::builder()
                     .configure(&provider_conf)
                     .build()
@@ -98,18 +99,20 @@ impl S3Bucket {
             .set_max_attempts(Some(1))
             .set_mode(Some(RetryMode::Adaptive));
 
-        let mut config_builder = Config::builder()
+        let mut config_builder = Builder::default()
+            .behavior_version(BehaviorVersion::v2023_11_09())
             .region(region)
-            .credentials_cache(CredentialsCache::lazy())
-            .credentials_provider(credentials_provider)
-            .sleep_impl(SharedAsyncSleep::from(sleep_impl))
-            .retry_config(retry_config.build());
+            .identity_cache(IdentityCache::lazy().build())
+            .credentials_provider(SharedCredentialsProvider::new(credentials_provider))
+            .retry_config(retry_config.build())
+            .sleep_impl(SharedAsyncSleep::from(sleep_impl));
 
         if let Some(custom_endpoint) = aws_config.endpoint.clone() {
             config_builder = config_builder
                 .endpoint_url(custom_endpoint)
                 .force_path_style(true);
         }
+
         let client = Client::from_conf(config_builder.build());
 
         let prefix_in_bucket = aws_config.prefix_in_bucket.as_deref().map(|prefix| {
@@ -371,7 +374,7 @@ impl RemoteStorage for S3Bucket {
 
             let response = response?;
 
-            let keys = response.contents().unwrap_or_default();
+            let keys = response.contents();
             let empty = Vec::new();
             let prefixes = response.common_prefixes.as_ref().unwrap_or(&empty);
 
@@ -411,7 +414,7 @@ impl RemoteStorage for S3Bucket {
         let started_at = start_measuring_requests(kind);
 
         let body = Body::wrap_stream(ReaderStream::new(from));
-        let bytes_stream = ByteStream::new(SdkBody::from(body));
+        let bytes_stream = ByteStream::new(SdkBody::from_body_0_4(body));
 
         let res = self
             .client
@@ -474,7 +477,7 @@ impl RemoteStorage for S3Bucket {
         for path in paths {
             let obj_id = ObjectIdentifier::builder()
                 .set_key(Some(self.relative_path_to_s3_object(path)))
-                .build();
+                .build()?;
             delete_objects.push(obj_id);
         }
 
@@ -485,7 +488,11 @@ impl RemoteStorage for S3Bucket {
                 .client
                 .delete_objects()
                 .bucket(self.bucket_name.clone())
-                .delete(Delete::builder().set_objects(Some(chunk.to_vec())).build())
+                .delete(
+                    Delete::builder()
+                        .set_objects(Some(chunk.to_vec()))
+                        .build()?,
+                )
                 .send()
                 .await;
 
diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml
index 3eb01003dfd2..35c260740c9e 100644
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -51,6 +51,7 @@ regex.workspace = true
 scopeguard.workspace = true
 serde.workspace = true
 serde_json = { workspace = true, features = ["raw_value"] }
+serde_path_to_error.workspace = true
 serde_with.workspace = true
 signal-hook.workspace = true
 smallvec = { workspace = true, features = ["write"] }
diff --git a/pageserver/benches/bench_layer_map.rs b/pageserver/benches/bench_layer_map.rs
index 735f358d8b9d..5d05af0c0023 100644
--- a/pageserver/benches/bench_layer_map.rs
+++ b/pageserver/benches/bench_layer_map.rs
@@ -3,6 +3,7 @@ use pageserver::repository::Key;
 use pageserver::tenant::layer_map::LayerMap;
 use pageserver::tenant::storage_layer::LayerFileName;
 use pageserver::tenant::storage_layer::PersistentLayerDesc;
+use pageserver_api::shard::TenantShardId;
 use rand::prelude::{SeedableRng, SliceRandom, StdRng};
 use std::cmp::{max, min};
 use std::fs::File;
@@ -211,7 +212,7 @@ fn bench_sequential(c: &mut Criterion) {
         let i32 = (i as u32) % 100;
         let zero = Key::from_hex("000000000000000000000000000000000000").unwrap();
         let layer = PersistentLayerDesc::new_img(
-            TenantId::generate(),
+            TenantShardId::unsharded(TenantId::generate()),
             TimelineId::generate(),
             zero.add(10 * i32)..zero.add(10 * i32 + 1),
             Lsn(i),
diff --git a/pageserver/ctl/src/layers.rs b/pageserver/ctl/src/layers.rs
index 22ebe70b1669..ebf4a4bec3f2 100644
--- a/pageserver/ctl/src/layers.rs
+++ b/pageserver/ctl/src/layers.rs
@@ -1,13 +1,15 @@
 use std::path::{Path, PathBuf};
 
 use anyhow::Result;
-use camino::Utf8Path;
+use camino::{Utf8Path, Utf8PathBuf};
 use clap::Subcommand;
 use pageserver::context::{DownloadBehavior, RequestContext};
 use pageserver::task_mgr::TaskKind;
 use pageserver::tenant::block_io::BlockCursor;
 use pageserver::tenant::disk_btree::DiskBtreeReader;
 use pageserver::tenant::storage_layer::delta_layer::{BlobRef, Summary};
+use pageserver::tenant::storage_layer::{delta_layer, image_layer};
+use pageserver::tenant::storage_layer::{DeltaLayer, ImageLayer};
 use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
 use pageserver::{page_cache, virtual_file};
 use pageserver::{
@@ -20,6 +22,7 @@ use pageserver::{
 };
 use std::fs;
 use utils::bin_ser::BeSer;
+use utils::id::{TenantId, TimelineId};
 
 use crate::layer_map_analyzer::parse_filename;
 
@@ -45,6 +48,13 @@ pub(crate) enum LayerCmd {
         /// The id from list-layer command
         id: usize,
     },
+    RewriteSummary {
+        layer_file_path: Utf8PathBuf,
+        #[clap(long)]
+        new_tenant_id: Option<TenantId>,
+        #[clap(long)]
+        new_timeline_id: Option<TimelineId>,
+    },
 }
 
 async fn read_delta_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result<()> {
@@ -100,6 +110,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
                     println!("- timeline {}", timeline.file_name().to_string_lossy());
                 }
             }
+            Ok(())
         }
         LayerCmd::ListLayer {
             path,
@@ -128,6 +139,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
                     idx += 1;
                 }
             }
+            Ok(())
         }
         LayerCmd::DumpLayer {
             path,
@@ -168,7 +180,63 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
                     idx += 1;
                 }
             }
+            Ok(())
+        }
+        LayerCmd::RewriteSummary {
+            layer_file_path,
+            new_tenant_id,
+            new_timeline_id,
+        } => {
+            pageserver::virtual_file::init(10);
+            pageserver::page_cache::init(100);
+
+            let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
+
+            macro_rules! rewrite_closure {
+                ($($summary_ty:tt)*) => {{
+                    |summary| $($summary_ty)* {
+                        tenant_id: new_tenant_id.unwrap_or(summary.tenant_id),
+                        timeline_id: new_timeline_id.unwrap_or(summary.timeline_id),
+                        ..summary
+                    }
+                }};
+            }
+
+            let res = ImageLayer::rewrite_summary(
+                layer_file_path,
+                rewrite_closure!(image_layer::Summary),
+                &ctx,
+            )
+            .await;
+            match res {
+                Ok(()) => {
+                    println!("Successfully rewrote summary of image layer {layer_file_path}");
+                    return Ok(());
+                }
+                Err(image_layer::RewriteSummaryError::MagicMismatch) => (), // fallthrough
+                Err(image_layer::RewriteSummaryError::Other(e)) => {
+                    return Err(e);
+                }
+            }
+
+            let res = DeltaLayer::rewrite_summary(
+                layer_file_path,
+                rewrite_closure!(delta_layer::Summary),
+                &ctx,
+            )
+            .await;
+            match res {
+                Ok(()) => {
+                    println!("Successfully rewrote summary of delta layer {layer_file_path}");
+                    return Ok(());
+                }
+                Err(delta_layer::RewriteSummaryError::MagicMismatch) => (), // fallthrough
+                Err(delta_layer::RewriteSummaryError::Other(e)) => {
+                    return Err(e);
+                }
+            }
+
+            anyhow::bail!("not an image or delta layer: {layer_file_path}");
         }
     }
-    Ok(())
 }
diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs
index 87d9cc522e81..13d1fc775bb1 100644
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -5,6 +5,7 @@
 //! See also `settings.md` for better description on every parameter.
 
 use anyhow::{anyhow, bail, ensure, Context, Result};
+use pageserver_api::shard::TenantShardId;
 use remote_storage::{RemotePath, RemoteStorageConfig};
 use serde::de::IntoDeserializer;
 use std::env;
@@ -25,7 +26,7 @@ use toml_edit::{Document, Item};
 use camino::{Utf8Path, Utf8PathBuf};
 use postgres_backend::AuthType;
 use utils::{
-    id::{NodeId, TenantId, TimelineId},
+    id::{NodeId, TimelineId},
     logging::LogFormat,
 };
 
@@ -628,12 +629,13 @@ impl PageServerConf {
         self.deletion_prefix().join(format!("header-{VERSION:02x}"))
     }
 
-    pub fn tenant_path(&self, tenant_id: &TenantId) -> Utf8PathBuf {
-        self.tenants_path().join(tenant_id.to_string())
+    pub fn tenant_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
+        self.tenants_path().join(tenant_shard_id.to_string())
     }
 
-    pub fn tenant_ignore_mark_file_path(&self, tenant_id: &TenantId) -> Utf8PathBuf {
-        self.tenant_path(tenant_id).join(IGNORED_TENANT_FILE_NAME)
+    pub fn tenant_ignore_mark_file_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
+        self.tenant_path(tenant_shard_id)
+            .join(IGNORED_TENANT_FILE_NAME)
     }
 
     /// Points to a place in pageserver's local directory,
@@ -641,47 +643,53 @@ impl PageServerConf {
     ///
     /// Legacy: superseded by tenant_location_config_path.  Eventually
     /// remove this function.
-    pub fn tenant_config_path(&self, tenant_id: &TenantId) -> Utf8PathBuf {
-        self.tenant_path(tenant_id).join(TENANT_CONFIG_NAME)
+    pub fn tenant_config_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
+        self.tenant_path(tenant_shard_id).join(TENANT_CONFIG_NAME)
     }
 
-    pub fn tenant_location_config_path(&self, tenant_id: &TenantId) -> Utf8PathBuf {
-        self.tenant_path(tenant_id)
+    pub fn tenant_location_config_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
+        self.tenant_path(tenant_shard_id)
             .join(TENANT_LOCATION_CONFIG_NAME)
     }
 
-    pub fn timelines_path(&self, tenant_id: &TenantId) -> Utf8PathBuf {
-        self.tenant_path(tenant_id).join(TIMELINES_SEGMENT_NAME)
+    pub fn timelines_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
+        self.tenant_path(tenant_shard_id)
+            .join(TIMELINES_SEGMENT_NAME)
     }
 
-    pub fn timeline_path(&self, tenant_id: &TenantId, timeline_id: &TimelineId) -> Utf8PathBuf {
-        self.timelines_path(tenant_id).join(timeline_id.to_string())
+    pub fn timeline_path(
+        &self,
+        tenant_shard_id: &TenantShardId,
+        timeline_id: &TimelineId,
+    ) -> Utf8PathBuf {
+        self.timelines_path(tenant_shard_id)
+            .join(timeline_id.to_string())
     }
 
     pub fn timeline_uninit_mark_file_path(
         &self,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         timeline_id: TimelineId,
     ) -> Utf8PathBuf {
         path_with_suffix_extension(
-            self.timeline_path(&tenant_id, &timeline_id),
+            self.timeline_path(&tenant_shard_id, &timeline_id),
             TIMELINE_UNINIT_MARK_SUFFIX,
         )
     }
 
     pub fn timeline_delete_mark_file_path(
         &self,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         timeline_id: TimelineId,
     ) -> Utf8PathBuf {
         path_with_suffix_extension(
-            self.timeline_path(&tenant_id, &timeline_id),
+            self.timeline_path(&tenant_shard_id, &timeline_id),
             TIMELINE_DELETE_MARK_SUFFIX,
         )
     }
 
-    pub fn tenant_deleted_mark_file_path(&self, tenant_id: &TenantId) -> Utf8PathBuf {
-        self.tenant_path(tenant_id)
+    pub fn tenant_deleted_mark_file_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
+        self.tenant_path(tenant_shard_id)
             .join(TENANT_DELETED_MARKER_FILE_NAME)
     }
 
@@ -691,20 +699,24 @@ impl PageServerConf {
 
     pub fn trace_path(
         &self,
-        tenant_id: &TenantId,
+        tenant_shard_id: &TenantShardId,
         timeline_id: &TimelineId,
         connection_id: &ConnectionId,
     ) -> Utf8PathBuf {
         self.traces_path()
-            .join(tenant_id.to_string())
+            .join(tenant_shard_id.to_string())
             .join(timeline_id.to_string())
             .join(connection_id.to_string())
     }
 
     /// Points to a place in pageserver's local directory,
     /// where certain timeline's metadata file should be located.
-    pub fn metadata_path(&self, tenant_id: &TenantId, timeline_id: &TimelineId) -> Utf8PathBuf {
-        self.timeline_path(tenant_id, timeline_id)
+    pub fn metadata_path(
+        &self,
+        tenant_shard_id: &TenantShardId,
+        timeline_id: &TimelineId,
+    ) -> Utf8PathBuf {
+        self.timeline_path(tenant_shard_id, timeline_id)
             .join(METADATA_FILE_NAME)
     }
 
@@ -767,7 +779,7 @@ impl PageServerConf {
                     builder.remote_storage_config(RemoteStorageConfig::from_toml(item)?)
                 }
                 "tenant_config" => {
-                    t_conf = Self::parse_toml_tenant_conf(item)?;
+                    t_conf = TenantConfOpt::try_from(item.to_owned()).context(format!("failed to parse: '{key}'"))?;
                 }
                 "id" => builder.id(NodeId(parse_toml_u64(key, item)?)),
                 "broker_endpoint" => builder.broker_endpoint(parse_toml_string(key, item)?.parse().context("failed to parse broker endpoint")?),
@@ -841,114 +853,10 @@ impl PageServerConf {
         Ok(conf)
     }
 
-    // subroutine of parse_and_validate to parse `[tenant_conf]` section
-
-    pub fn parse_toml_tenant_conf(item: &toml_edit::Item) -> Result<TenantConfOpt> {
-        let mut t_conf: TenantConfOpt = Default::default();
-        if let Some(checkpoint_distance) = item.get("checkpoint_distance") {
-            t_conf.checkpoint_distance =
-                Some(parse_toml_u64("checkpoint_distance", checkpoint_distance)?);
-        }
-
-        if let Some(checkpoint_timeout) = item.get("checkpoint_timeout") {
-            t_conf.checkpoint_timeout = Some(parse_toml_duration(
-                "checkpoint_timeout",
-                checkpoint_timeout,
-            )?);
-        }
-
-        if let Some(compaction_target_size) = item.get("compaction_target_size") {
-            t_conf.compaction_target_size = Some(parse_toml_u64(
-                "compaction_target_size",
-                compaction_target_size,
-            )?);
-        }
-
-        if let Some(compaction_period) = item.get("compaction_period") {
-            t_conf.compaction_period =
-                Some(parse_toml_duration("compaction_period", compaction_period)?);
-        }
-
-        if let Some(compaction_threshold) = item.get("compaction_threshold") {
-            t_conf.compaction_threshold =
-                Some(parse_toml_u64("compaction_threshold", compaction_threshold)?.try_into()?);
-        }
-
-        if let Some(image_creation_threshold) = item.get("image_creation_threshold") {
-            t_conf.image_creation_threshold = Some(
-                parse_toml_u64("image_creation_threshold", image_creation_threshold)?.try_into()?,
-            );
-        }
-
-        if let Some(gc_horizon) = item.get("gc_horizon") {
-            t_conf.gc_horizon = Some(parse_toml_u64("gc_horizon", gc_horizon)?);
-        }
-
-        if let Some(gc_period) = item.get("gc_period") {
-            t_conf.gc_period = Some(parse_toml_duration("gc_period", gc_period)?);
-        }
-
-        if let Some(pitr_interval) = item.get("pitr_interval") {
-            t_conf.pitr_interval = Some(parse_toml_duration("pitr_interval", pitr_interval)?);
-        }
-        if let Some(walreceiver_connect_timeout) = item.get("walreceiver_connect_timeout") {
-            t_conf.walreceiver_connect_timeout = Some(parse_toml_duration(
-                "walreceiver_connect_timeout",
-                walreceiver_connect_timeout,
-            )?);
-        }
-        if let Some(lagging_wal_timeout) = item.get("lagging_wal_timeout") {
-            t_conf.lagging_wal_timeout = Some(parse_toml_duration(
-                "lagging_wal_timeout",
-                lagging_wal_timeout,
-            )?);
-        }
-        if let Some(max_lsn_wal_lag) = item.get("max_lsn_wal_lag") {
-            t_conf.max_lsn_wal_lag =
-                Some(deserialize_from_item("max_lsn_wal_lag", max_lsn_wal_lag)?);
-        }
-        if let Some(trace_read_requests) = item.get("trace_read_requests") {
-            t_conf.trace_read_requests =
-                Some(trace_read_requests.as_bool().with_context(|| {
-                    "configure option trace_read_requests is not a bool".to_string()
-                })?);
-        }
-
-        if let Some(eviction_policy) = item.get("eviction_policy") {
-            t_conf.eviction_policy = Some(
-                deserialize_from_item("eviction_policy", eviction_policy)
-                    .context("parse eviction_policy")?,
-            );
-        }
-
-        if let Some(item) = item.get("min_resident_size_override") {
-            t_conf.min_resident_size_override = Some(
-                deserialize_from_item("min_resident_size_override", item)
-                    .context("parse min_resident_size_override")?,
-            );
-        }
-
-        if let Some(item) = item.get("evictions_low_residence_duration_metric_threshold") {
-            t_conf.evictions_low_residence_duration_metric_threshold = Some(parse_toml_duration(
-                "evictions_low_residence_duration_metric_threshold",
-                item,
-            )?);
-        }
-
-        if let Some(gc_feedback) = item.get("gc_feedback") {
-            t_conf.gc_feedback = Some(
-                gc_feedback
-                    .as_bool()
-                    .with_context(|| "configure option gc_feedback is not a bool".to_string())?,
-            );
-        }
-
-        Ok(t_conf)
-    }
-
     #[cfg(test)]
     pub fn test_repo_dir(test_name: &str) -> Utf8PathBuf {
-        Utf8PathBuf::from(format!("../tmp_check/test_{test_name}"))
+        let test_output_dir = std::env::var("TEST_OUTPUT").unwrap_or("../tmp_check".into());
+        Utf8PathBuf::from(format!("{test_output_dir}/test_{test_name}"))
     }
 
     pub fn dummy_conf(repo_dir: Utf8PathBuf) -> Self {
@@ -1417,6 +1325,37 @@ trace_read_requests = {trace_read_requests}"#,
         Ok(())
     }
 
+    #[test]
+    fn parse_incorrect_tenant_config() -> anyhow::Result<()> {
+        let config_string = r#"
+            [tenant_config]
+            checkpoint_distance = -1 # supposed to be an u64
+        "#
+        .to_string();
+
+        let toml: Document = config_string.parse()?;
+        let item = toml.get("tenant_config").unwrap();
+        let error = TenantConfOpt::try_from(item.to_owned()).unwrap_err();
+
+        let expected_error_str = "checkpoint_distance: invalid value: integer `-1`, expected u64";
+        assert_eq!(error.to_string(), expected_error_str);
+
+        Ok(())
+    }
+
+    #[test]
+    fn parse_override_tenant_config() -> anyhow::Result<()> {
+        let config_string = r#"tenant_config={ min_resident_size_override =  400 }"#.to_string();
+
+        let toml: Document = config_string.parse()?;
+        let item = toml.get("tenant_config").unwrap();
+        let conf = TenantConfOpt::try_from(item.to_owned()).unwrap();
+
+        assert_eq!(conf.min_resident_size_override, Some(400));
+
+        Ok(())
+    }
+
     #[test]
     fn eviction_pageserver_config_parse() -> anyhow::Result<()> {
         let tempdir = tempdir()?;
diff --git a/pageserver/src/consumption_metrics/metrics.rs b/pageserver/src/consumption_metrics/metrics.rs
index 2989e15e8eaa..c6ff91e560e3 100644
--- a/pageserver/src/consumption_metrics/metrics.rs
+++ b/pageserver/src/consumption_metrics/metrics.rs
@@ -1,5 +1,4 @@
-use crate::context::RequestContext;
-use anyhow::Context;
+use crate::{context::RequestContext, tenant::timeline::logical_size::CurrentLogicalSize};
 use chrono::{DateTime, Utc};
 use consumption_metrics::EventType;
 use futures::stream::StreamExt;
@@ -351,14 +350,12 @@ impl TimelineSnapshot {
             let last_record_lsn = t.get_last_record_lsn();
 
             let current_exact_logical_size = {
-                let span = tracing::info_span!("collect_metrics_iteration", tenant_id = %t.tenant_id, timeline_id = %t.timeline_id);
-                let res = span
-                    .in_scope(|| t.get_current_logical_size(ctx))
-                    .context("get_current_logical_size");
-                match res? {
+                let span = tracing::info_span!("collect_metrics_iteration", tenant_id = %t.tenant_shard_id.tenant_id, timeline_id = %t.timeline_id);
+                let size = span.in_scope(|| t.get_current_logical_size(ctx));
+                match size {
                     // Only send timeline logical size when it is fully calculated.
-                    (size, is_exact) if is_exact => Some(size),
-                    (_, _) => None,
+                    CurrentLogicalSize::Exact(ref size) => Some(size.into()),
+                    CurrentLogicalSize::Approximate(_) => None,
                 }
             };
 
diff --git a/pageserver/src/control_plane_client.rs b/pageserver/src/control_plane_client.rs
index f50c19a6295a..25ae3d1b0168 100644
--- a/pageserver/src/control_plane_client.rs
+++ b/pageserver/src/control_plane_client.rs
@@ -1,16 +1,15 @@
 use std::collections::HashMap;
 
-use pageserver_api::control_api::{
-    ReAttachRequest, ReAttachResponse, ValidateRequest, ValidateRequestTenant, ValidateResponse,
+use pageserver_api::{
+    control_api::{
+        ReAttachRequest, ReAttachResponse, ValidateRequest, ValidateRequestTenant, ValidateResponse,
+    },
+    shard::TenantShardId,
 };
 use serde::{de::DeserializeOwned, Serialize};
 use tokio_util::sync::CancellationToken;
 use url::Url;
-use utils::{
-    backoff,
-    generation::Generation,
-    id::{NodeId, TenantId},
-};
+use utils::{backoff, generation::Generation, id::NodeId};
 
 use crate::config::PageServerConf;
 
@@ -31,11 +30,11 @@ pub enum RetryForeverError {
 
 #[async_trait::async_trait]
 pub trait ControlPlaneGenerationsApi {
-    async fn re_attach(&self) -> Result<HashMap<TenantId, Generation>, RetryForeverError>;
+    async fn re_attach(&self) -> Result<HashMap<TenantShardId, Generation>, RetryForeverError>;
     async fn validate(
         &self,
-        tenants: Vec<(TenantId, Generation)>,
-    ) -> Result<HashMap<TenantId, bool>, RetryForeverError>;
+        tenants: Vec<(TenantShardId, Generation)>,
+    ) -> Result<HashMap<TenantShardId, bool>, RetryForeverError>;
 }
 
 impl ControlPlaneClient {
@@ -127,7 +126,7 @@ impl ControlPlaneClient {
 #[async_trait::async_trait]
 impl ControlPlaneGenerationsApi for ControlPlaneClient {
     /// Block until we get a successful response, or error out if we are shut down
-    async fn re_attach(&self) -> Result<HashMap<TenantId, Generation>, RetryForeverError> {
+    async fn re_attach(&self) -> Result<HashMap<TenantShardId, Generation>, RetryForeverError> {
         let re_attach_path = self
             .base_url
             .join("re-attach")
@@ -154,8 +153,8 @@ impl ControlPlaneGenerationsApi for ControlPlaneClient {
     /// Block until we get a successful response, or error out if we are shut down
     async fn validate(
         &self,
-        tenants: Vec<(TenantId, Generation)>,
-    ) -> Result<HashMap<TenantId, bool>, RetryForeverError> {
+        tenants: Vec<(TenantShardId, Generation)>,
+    ) -> Result<HashMap<TenantShardId, bool>, RetryForeverError> {
         let re_attach_path = self
             .base_url
             .join("validate")
diff --git a/pageserver/src/deletion_queue.rs b/pageserver/src/deletion_queue.rs
index 86be1b7094f0..7b0574548395 100644
--- a/pageserver/src/deletion_queue.rs
+++ b/pageserver/src/deletion_queue.rs
@@ -10,11 +10,12 @@ use crate::control_plane_client::ControlPlaneGenerationsApi;
 use crate::metrics;
 use crate::tenant::remote_timeline_client::remote_layer_path;
 use crate::tenant::remote_timeline_client::remote_timeline_path;
+use crate::tenant::remote_timeline_client::LayerFileMetadata;
 use crate::virtual_file::MaybeFatalIo;
 use crate::virtual_file::VirtualFile;
 use anyhow::Context;
 use camino::Utf8PathBuf;
-use hex::FromHex;
+use pageserver_api::shard::TenantShardId;
 use remote_storage::{GenericRemoteStorage, RemotePath};
 use serde::Deserialize;
 use serde::Serialize;
@@ -25,7 +26,7 @@ use tracing::Instrument;
 use tracing::{self, debug, error};
 use utils::crashsafe::path_with_suffix_extension;
 use utils::generation::Generation;
-use utils::id::{TenantId, TimelineId};
+use utils::id::TimelineId;
 use utils::lsn::AtomicLsn;
 use utils::lsn::Lsn;
 
@@ -159,11 +160,10 @@ pub struct DeletionQueueClient {
     lsn_table: Arc<std::sync::RwLock<VisibleLsnUpdates>>,
 }
 
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
 struct TenantDeletionList {
     /// For each Timeline, a list of key fragments to append to the timeline remote path
     /// when reconstructing a full key
-    #[serde(serialize_with = "to_hex_map", deserialize_with = "from_hex_map")]
     timelines: HashMap<TimelineId, Vec<String>>,
 
     /// The generation in which this deletion was emitted: note that this may not be the
@@ -178,43 +178,11 @@ impl TenantDeletionList {
     }
 }
 
-/// For HashMaps using a `hex` compatible key, where we would like to encode the key as a string
-fn to_hex_map<S, V, I>(input: &HashMap<I, V>, serializer: S) -> Result<S::Ok, S::Error>
-where
-    S: serde::Serializer,
-    V: Serialize,
-    I: AsRef<[u8]>,
-{
-    let transformed = input.iter().map(|(k, v)| (hex::encode(k), v));
-
-    transformed
-        .collect::<HashMap<String, &V>>()
-        .serialize(serializer)
-}
-
-/// For HashMaps using a FromHex key, where we would like to decode the key
-fn from_hex_map<'de, D, V, I>(deserializer: D) -> Result<HashMap<I, V>, D::Error>
-where
-    D: serde::de::Deserializer<'de>,
-    V: Deserialize<'de>,
-    I: FromHex + std::hash::Hash + Eq,
-{
-    let hex_map = HashMap::<String, V>::deserialize(deserializer)?;
-    hex_map
-        .into_iter()
-        .map(|(k, v)| {
-            I::from_hex(k)
-                .map(|k| (k, v))
-                .map_err(|_| serde::de::Error::custom("Invalid hex ID"))
-        })
-        .collect()
-}
-
 /// Files ending with this suffix will be ignored and erased
 /// during recovery as startup.
 const TEMP_SUFFIX: &str = "tmp";
 
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
 struct DeletionList {
     /// Serialization version, for future use
     version: u8,
@@ -226,8 +194,7 @@ struct DeletionList {
     /// nested HashMaps by TenantTimelineID.  Each Tenant only appears once
     /// with one unique generation ID: if someone tries to push a second generation
     /// ID for the same tenant, we will start a new DeletionList.
-    #[serde(serialize_with = "to_hex_map", deserialize_with = "from_hex_map")]
-    tenants: HashMap<TenantId, TenantDeletionList>,
+    tenants: HashMap<TenantShardId, TenantDeletionList>,
 
     /// Avoid having to walk `tenants` to calculate the number of keys in
     /// the nested deletion lists
@@ -299,7 +266,7 @@ impl DeletionList {
     /// deletion list.
     fn push(
         &mut self,
-        tenant: &TenantId,
+        tenant: &TenantShardId,
         timeline: &TimelineId,
         generation: Generation,
         objects: &mut Vec<RemotePath>,
@@ -391,7 +358,7 @@ struct TenantLsnState {
 
 #[derive(Default)]
 struct VisibleLsnUpdates {
-    tenants: HashMap<TenantId, TenantLsnState>,
+    tenants: HashMap<TenantShardId, TenantLsnState>,
 }
 
 impl VisibleLsnUpdates {
@@ -448,7 +415,7 @@ impl DeletionQueueClient {
 
     pub(crate) fn recover(
         &self,
-        attached_tenants: HashMap<TenantId, Generation>,
+        attached_tenants: HashMap<TenantShardId, Generation>,
     ) -> Result<(), DeletionQueueError> {
         self.do_push(
             &self.tx,
@@ -465,7 +432,7 @@ impl DeletionQueueClient {
     /// backend will later wake up and notice that the tenant's generation requires validation.
     pub(crate) async fn update_remote_consistent_lsn(
         &self,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         timeline_id: TimelineId,
         current_generation: Generation,
         lsn: Lsn,
@@ -476,10 +443,13 @@ impl DeletionQueueClient {
             .write()
             .expect("Lock should never be poisoned");
 
-        let tenant_entry = locked.tenants.entry(tenant_id).or_insert(TenantLsnState {
-            timelines: HashMap::new(),
-            generation: current_generation,
-        });
+        let tenant_entry = locked
+            .tenants
+            .entry(tenant_shard_id)
+            .or_insert(TenantLsnState {
+                timelines: HashMap::new(),
+                generation: current_generation,
+            });
 
         if tenant_entry.generation != current_generation {
             // Generation might have changed if we were detached and then re-attached: in this case,
@@ -506,28 +476,29 @@ impl DeletionQueueClient {
     /// generations in `layers` are the generations in which those layers were written.
     pub(crate) async fn push_layers(
         &self,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         timeline_id: TimelineId,
         current_generation: Generation,
-        layers: Vec<(LayerFileName, Generation)>,
+        layers: Vec<(LayerFileName, LayerFileMetadata)>,
     ) -> Result<(), DeletionQueueError> {
         if current_generation.is_none() {
             debug!("Enqueuing deletions in legacy mode, skipping queue");
 
             let mut layer_paths = Vec::new();
-            for (layer, generation) in layers {
+            for (layer, meta) in layers {
                 layer_paths.push(remote_layer_path(
-                    &tenant_id,
+                    &tenant_shard_id.tenant_id,
                     &timeline_id,
+                    meta.shard,
                     &layer,
-                    generation,
+                    meta.generation,
                 ));
             }
             self.push_immediate(layer_paths).await?;
             return self.flush_immediate().await;
         }
 
-        self.push_layers_sync(tenant_id, timeline_id, current_generation, layers)
+        self.push_layers_sync(tenant_shard_id, timeline_id, current_generation, layers)
     }
 
     /// When a Tenant has a generation, push_layers is always synchronous because
@@ -537,10 +508,10 @@ impl DeletionQueueClient {
     /// support (`<https://github.com/neondatabase/neon/issues/5395>`)
     pub(crate) fn push_layers_sync(
         &self,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         timeline_id: TimelineId,
         current_generation: Generation,
-        layers: Vec<(LayerFileName, Generation)>,
+        layers: Vec<(LayerFileName, LayerFileMetadata)>,
     ) -> Result<(), DeletionQueueError> {
         metrics::DELETION_QUEUE
             .keys_submitted
@@ -548,7 +519,7 @@ impl DeletionQueueClient {
         self.do_push(
             &self.tx,
             ListWriterQueueMessage::Delete(DeletionOp {
-                tenant_id,
+                tenant_shard_id,
                 timeline_id,
                 layers,
                 generation: current_generation,
@@ -751,6 +722,7 @@ impl DeletionQueue {
 mod test {
     use camino::Utf8Path;
     use hex_literal::hex;
+    use pageserver_api::shard::ShardIndex;
     use std::{io::ErrorKind, time::Duration};
     use tracing::info;
 
@@ -815,12 +787,12 @@ mod test {
         }
 
         fn set_latest_generation(&self, gen: Generation) {
-            let tenant_id = self.harness.tenant_id;
+            let tenant_shard_id = self.harness.tenant_shard_id;
             self.mock_control_plane
                 .latest_generation
                 .lock()
                 .unwrap()
-                .insert(tenant_id, gen);
+                .insert(tenant_shard_id, gen);
         }
 
         /// Returns remote layer file name, suitable for use in assert_remote_files
@@ -829,8 +801,8 @@ mod test {
             file_name: LayerFileName,
             gen: Generation,
         ) -> anyhow::Result<String> {
-            let tenant_id = self.harness.tenant_id;
-            let relative_remote_path = remote_timeline_path(&tenant_id, &TIMELINE_ID);
+            let tenant_shard_id = self.harness.tenant_shard_id;
+            let relative_remote_path = remote_timeline_path(&tenant_shard_id, &TIMELINE_ID);
             let remote_timeline_path = self.remote_fs_dir.join(relative_remote_path.get_path());
             std::fs::create_dir_all(&remote_timeline_path)?;
             let remote_layer_file_name = format!("{}{}", file_name, gen.get_suffix());
@@ -848,7 +820,7 @@ mod test {
 
     #[derive(Debug, Clone)]
     struct MockControlPlane {
-        pub latest_generation: std::sync::Arc<std::sync::Mutex<HashMap<TenantId, Generation>>>,
+        pub latest_generation: std::sync::Arc<std::sync::Mutex<HashMap<TenantShardId, Generation>>>,
     }
 
     impl MockControlPlane {
@@ -862,20 +834,20 @@ mod test {
     #[async_trait::async_trait]
     impl ControlPlaneGenerationsApi for MockControlPlane {
         #[allow(clippy::diverging_sub_expression)] // False positive via async_trait
-        async fn re_attach(&self) -> Result<HashMap<TenantId, Generation>, RetryForeverError> {
+        async fn re_attach(&self) -> Result<HashMap<TenantShardId, Generation>, RetryForeverError> {
             unimplemented!()
         }
         async fn validate(
             &self,
-            tenants: Vec<(TenantId, Generation)>,
-        ) -> Result<HashMap<TenantId, bool>, RetryForeverError> {
+            tenants: Vec<(TenantShardId, Generation)>,
+        ) -> Result<HashMap<TenantShardId, bool>, RetryForeverError> {
             let mut result = HashMap::new();
 
             let latest_generation = self.latest_generation.lock().unwrap();
 
-            for (tenant_id, generation) in tenants {
-                if let Some(latest) = latest_generation.get(&tenant_id) {
-                    result.insert(tenant_id, *latest == generation);
+            for (tenant_shard_id, generation) in tenants {
+                if let Some(latest) = latest_generation.get(&tenant_shard_id) {
+                    result.insert(tenant_shard_id, *latest == generation);
                 }
             }
 
@@ -979,10 +951,10 @@ mod test {
         client.recover(HashMap::new())?;
 
         let layer_file_name_1: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap();
-        let tenant_id = ctx.harness.tenant_id;
+        let tenant_shard_id = ctx.harness.tenant_shard_id;
 
         let content: Vec<u8> = "victim1 contents".into();
-        let relative_remote_path = remote_timeline_path(&tenant_id, &TIMELINE_ID);
+        let relative_remote_path = remote_timeline_path(&tenant_shard_id, &TIMELINE_ID);
         let remote_timeline_path = ctx.remote_fs_dir.join(relative_remote_path.get_path());
         let deletion_prefix = ctx.harness.conf.deletion_prefix();
 
@@ -990,6 +962,8 @@ mod test {
         // we delete, and the generation of the running Tenant.
         let layer_generation = Generation::new(0xdeadbeef);
         let now_generation = Generation::new(0xfeedbeef);
+        let layer_metadata =
+            LayerFileMetadata::new(0xf00, layer_generation, ShardIndex::unsharded());
 
         let remote_layer_file_name_1 =
             format!("{}{}", layer_file_name_1, layer_generation.get_suffix());
@@ -1010,10 +984,10 @@ mod test {
         info!("Pushing");
         client
             .push_layers(
-                tenant_id,
+                tenant_shard_id,
                 TIMELINE_ID,
                 now_generation,
-                [(layer_file_name_1.clone(), layer_generation)].to_vec(),
+                [(layer_file_name_1.clone(), layer_metadata)].to_vec(),
             )
             .await?;
         assert_remote_files(&[&remote_layer_file_name_1], &remote_timeline_path);
@@ -1052,11 +1026,13 @@ mod test {
         let stale_generation = latest_generation.previous();
         // Generation that our example layer file was written with
         let layer_generation = stale_generation.previous();
+        let layer_metadata =
+            LayerFileMetadata::new(0xf00, layer_generation, ShardIndex::unsharded());
 
         ctx.set_latest_generation(latest_generation);
 
-        let tenant_id = ctx.harness.tenant_id;
-        let relative_remote_path = remote_timeline_path(&tenant_id, &TIMELINE_ID);
+        let tenant_shard_id = ctx.harness.tenant_shard_id;
+        let relative_remote_path = remote_timeline_path(&tenant_shard_id, &TIMELINE_ID);
         let remote_timeline_path = ctx.remote_fs_dir.join(relative_remote_path.get_path());
 
         // Initial state: a remote layer exists
@@ -1066,10 +1042,10 @@ mod test {
         tracing::debug!("Pushing...");
         client
             .push_layers(
-                tenant_id,
+                tenant_shard_id,
                 TIMELINE_ID,
                 stale_generation,
-                [(EXAMPLE_LAYER_NAME.clone(), layer_generation)].to_vec(),
+                [(EXAMPLE_LAYER_NAME.clone(), layer_metadata.clone())].to_vec(),
             )
             .await?;
 
@@ -1081,10 +1057,10 @@ mod test {
         tracing::debug!("Pushing...");
         client
             .push_layers(
-                tenant_id,
+                tenant_shard_id,
                 TIMELINE_ID,
                 latest_generation,
-                [(EXAMPLE_LAYER_NAME.clone(), layer_generation)].to_vec(),
+                [(EXAMPLE_LAYER_NAME.clone(), layer_metadata.clone())].to_vec(),
             )
             .await?;
 
@@ -1103,14 +1079,16 @@ mod test {
         let client = ctx.deletion_queue.new_client();
         client.recover(HashMap::new())?;
 
-        let tenant_id = ctx.harness.tenant_id;
+        let tenant_shard_id = ctx.harness.tenant_shard_id;
 
-        let relative_remote_path = remote_timeline_path(&tenant_id, &TIMELINE_ID);
+        let relative_remote_path = remote_timeline_path(&tenant_shard_id, &TIMELINE_ID);
         let remote_timeline_path = ctx.remote_fs_dir.join(relative_remote_path.get_path());
         let deletion_prefix = ctx.harness.conf.deletion_prefix();
 
         let layer_generation = Generation::new(0xdeadbeef);
         let now_generation = Generation::new(0xfeedbeef);
+        let layer_metadata =
+            LayerFileMetadata::new(0xf00, layer_generation, ShardIndex::unsharded());
 
         // Inject a deletion in the generation before generation_now: after restart,
         // this deletion should _not_ get executed (only the immediately previous
@@ -1119,10 +1097,10 @@ mod test {
             ctx.write_remote_layer(EXAMPLE_LAYER_NAME, layer_generation)?;
         client
             .push_layers(
-                tenant_id,
+                tenant_shard_id,
                 TIMELINE_ID,
                 now_generation.previous(),
-                [(EXAMPLE_LAYER_NAME.clone(), layer_generation)].to_vec(),
+                [(EXAMPLE_LAYER_NAME.clone(), layer_metadata.clone())].to_vec(),
             )
             .await?;
 
@@ -1133,10 +1111,10 @@ mod test {
             ctx.write_remote_layer(EXAMPLE_LAYER_NAME_ALT, layer_generation)?;
         client
             .push_layers(
-                tenant_id,
+                tenant_shard_id,
                 TIMELINE_ID,
                 now_generation,
-                [(EXAMPLE_LAYER_NAME_ALT.clone(), layer_generation)].to_vec(),
+                [(EXAMPLE_LAYER_NAME_ALT.clone(), layer_metadata.clone())].to_vec(),
             )
             .await?;
 
@@ -1164,7 +1142,7 @@ mod test {
         drop(client);
         ctx.restart().await;
         let client = ctx.deletion_queue.new_client();
-        client.recover(HashMap::from([(tenant_id, now_generation)]))?;
+        client.recover(HashMap::from([(tenant_shard_id, now_generation)]))?;
 
         info!("Flush-executing");
         client.flush_execute().await?;
@@ -1226,12 +1204,13 @@ pub(crate) mod mock {
                 match msg {
                     ListWriterQueueMessage::Delete(op) => {
                         let mut objects = op.objects;
-                        for (layer, generation) in op.layers {
+                        for (layer, meta) in op.layers {
                             objects.push(remote_layer_path(
-                                &op.tenant_id,
+                                &op.tenant_shard_id.tenant_id,
                                 &op.timeline_id,
+                                meta.shard,
                                 &layer,
-                                generation,
+                                meta.generation,
                             ));
                         }
 
@@ -1311,4 +1290,34 @@ pub(crate) mod mock {
             }
         }
     }
+
+    /// Test round-trip serialization/deserialization, and test stability of the format
+    /// vs. a static expected string for the serialized version.
+    #[test]
+    fn deletion_list_serialization() -> anyhow::Result<()> {
+        let tenant_id = "ad6c1a56f5680419d3a16ff55d97ec3c"
+            .to_string()
+            .parse::<TenantShardId>()?;
+        let timeline_id = "be322c834ed9e709e63b5c9698691910"
+            .to_string()
+            .parse::<TimelineId>()?;
+        let generation = Generation::new(123);
+
+        let object =
+            RemotePath::from_string(&format!("tenants/{tenant_id}/timelines/{timeline_id}/foo"))?;
+        let mut objects = [object].to_vec();
+
+        let mut example = DeletionList::new(1);
+        example.push(&tenant_id, &timeline_id, generation, &mut objects);
+
+        let encoded = serde_json::to_string(&example)?;
+
+        let expected = "{\"version\":1,\"sequence\":1,\"tenants\":{\"ad6c1a56f5680419d3a16ff55d97ec3c\":{\"timelines\":{\"be322c834ed9e709e63b5c9698691910\":[\"foo\"]},\"generation\":123}},\"size\":1}".to_string();
+        assert_eq!(encoded, expected);
+
+        let decoded = serde_json::from_str::<DeletionList>(&encoded)?;
+        assert_eq!(example, decoded);
+
+        Ok(())
+    }
 }
diff --git a/pageserver/src/deletion_queue/list_writer.rs b/pageserver/src/deletion_queue/list_writer.rs
index 28daae2da573..7ff27ceb4413 100644
--- a/pageserver/src/deletion_queue/list_writer.rs
+++ b/pageserver/src/deletion_queue/list_writer.rs
@@ -19,6 +19,7 @@ use std::collections::HashMap;
 use std::fs::create_dir_all;
 use std::time::Duration;
 
+use pageserver_api::shard::TenantShardId;
 use regex::Regex;
 use remote_storage::RemotePath;
 use tokio_util::sync::CancellationToken;
@@ -26,13 +27,13 @@ use tracing::debug;
 use tracing::info;
 use tracing::warn;
 use utils::generation::Generation;
-use utils::id::TenantId;
 use utils::id::TimelineId;
 
 use crate::config::PageServerConf;
 use crate::deletion_queue::TEMP_SUFFIX;
 use crate::metrics;
 use crate::tenant::remote_timeline_client::remote_layer_path;
+use crate::tenant::remote_timeline_client::LayerFileMetadata;
 use crate::tenant::storage_layer::LayerFileName;
 use crate::virtual_file::on_fatal_io_error;
 use crate::virtual_file::MaybeFatalIo;
@@ -53,22 +54,22 @@ const FRONTEND_FLUSHING_TIMEOUT: Duration = Duration::from_millis(100);
 
 #[derive(Debug)]
 pub(super) struct DeletionOp {
-    pub(super) tenant_id: TenantId,
+    pub(super) tenant_shard_id: TenantShardId,
     pub(super) timeline_id: TimelineId,
     // `layers` and `objects` are both just lists of objects.  `layers` is used if you do not
     // have a config object handy to project it to a remote key, and need the consuming worker
     // to do it for you.
-    pub(super) layers: Vec<(LayerFileName, Generation)>,
+    pub(super) layers: Vec<(LayerFileName, LayerFileMetadata)>,
     pub(super) objects: Vec<RemotePath>,
 
-    /// The _current_ generation of the Tenant attachment in which we are enqueuing
+    /// The _current_ generation of the Tenant shard attachment in which we are enqueuing
     /// this deletion.
     pub(super) generation: Generation,
 }
 
 #[derive(Debug)]
 pub(super) struct RecoverOp {
-    pub(super) attached_tenants: HashMap<TenantId, Generation>,
+    pub(super) attached_tenants: HashMap<TenantShardId, Generation>,
 }
 
 #[derive(Debug)]
@@ -205,7 +206,7 @@ impl ListWriter {
 
     async fn recover(
         &mut self,
-        attached_tenants: HashMap<TenantId, Generation>,
+        attached_tenants: HashMap<TenantShardId, Generation>,
     ) -> Result<(), anyhow::Error> {
         debug!(
             "recovering with {} attached tenants",
@@ -308,8 +309,8 @@ impl ListWriter {
                 // generation was issued to another node in the interval while we restarted,
                 // then we may treat deletion lists from the previous generation as if they
                 // belong to our currently attached generation, and proceed to validate & execute.
-                for (tenant_id, tenant_list) in &mut deletion_list.tenants {
-                    if let Some(attached_gen) = attached_tenants.get(tenant_id) {
+                for (tenant_shard_id, tenant_list) in &mut deletion_list.tenants {
+                    if let Some(attached_gen) = attached_tenants.get(tenant_shard_id) {
                         if attached_gen.previous() == tenant_list.generation {
                             tenant_list.generation = *attached_gen;
                         }
@@ -387,25 +388,26 @@ impl ListWriter {
                     );
 
                     let mut layer_paths = Vec::new();
-                    for (layer, generation) in op.layers {
+                    for (layer, meta) in op.layers {
                         layer_paths.push(remote_layer_path(
-                            &op.tenant_id,
+                            &op.tenant_shard_id.tenant_id,
                             &op.timeline_id,
+                            meta.shard,
                             &layer,
-                            generation,
+                            meta.generation,
                         ));
                     }
                     layer_paths.extend(op.objects);
 
                     if !self.pending.push(
-                        &op.tenant_id,
+                        &op.tenant_shard_id,
                         &op.timeline_id,
                         op.generation,
                         &mut layer_paths,
                     ) {
                         self.flush().await;
                         let retry_succeeded = self.pending.push(
-                            &op.tenant_id,
+                            &op.tenant_shard_id,
                             &op.timeline_id,
                             op.generation,
                             &mut layer_paths,
diff --git a/pageserver/src/deletion_queue/validator.rs b/pageserver/src/deletion_queue/validator.rs
index 72bdbdefd6a3..bf06c78e673f 100644
--- a/pageserver/src/deletion_queue/validator.rs
+++ b/pageserver/src/deletion_queue/validator.rs
@@ -178,7 +178,14 @@ where
                 .unwrap_or(false);
 
             if valid && *validated_generation == tenant_lsn_state.generation {
-                for (_timeline_id, pending_lsn) in tenant_lsn_state.timelines {
+                for (timeline_id, pending_lsn) in tenant_lsn_state.timelines {
+                    tracing::debug!(
+                        %tenant_id,
+                        %timeline_id,
+                        current = %pending_lsn.result_slot.load(),
+                        projected = %pending_lsn.projected,
+                        "advancing validated remote_consistent_lsn",
+                    );
                     pending_lsn.result_slot.store(pending_lsn.projected);
                 }
             } else {
diff --git a/pageserver/src/disk_usage_eviction_task.rs b/pageserver/src/disk_usage_eviction_task.rs
index 642cafad285b..f01cd1cf8c9a 100644
--- a/pageserver/src/disk_usage_eviction_task.rs
+++ b/pageserver/src/disk_usage_eviction_task.rs
@@ -310,7 +310,7 @@ pub async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
                 .unwrap()
                 .as_micros(),
             partition,
-            desc.tenant_id,
+            desc.tenant_shard_id,
             desc.timeline_id,
             candidate.layer,
         );
@@ -380,7 +380,7 @@ pub async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
     let limit = Arc::new(tokio::sync::Semaphore::new(1000.max(max_batch_size)));
 
     for (timeline, batch) in batched {
-        let tenant_id = timeline.tenant_id;
+        let tenant_shard_id = timeline.tenant_shard_id;
         let timeline_id = timeline.timeline_id;
         let batch_size =
             u32::try_from(batch.len()).expect("batch size limited to u32::MAX during partitioning");
@@ -431,7 +431,7 @@ pub async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
                 (evicted_bytes, evictions_failed)
             }
         }
-        .instrument(tracing::info_span!("evict_batch", %tenant_id, %timeline_id, batch_size));
+        .instrument(tracing::info_span!("evict_batch", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id, batch_size));
 
         js.spawn(evict);
 
@@ -572,7 +572,7 @@ async fn collect_eviction_candidates(
                 continue;
             }
             let info = tl.get_local_layers_for_disk_usage_eviction().await;
-            debug!(tenant_id=%tl.tenant_id, timeline_id=%tl.timeline_id, "timeline resident layers count: {}", info.resident_layers.len());
+            debug!(tenant_id=%tl.tenant_shard_id.tenant_id, shard_id=%tl.tenant_shard_id.shard_slug(), timeline_id=%tl.timeline_id, "timeline resident layers count: {}", info.resident_layers.len());
             tenant_candidates.extend(
                 info.resident_layers
                     .into_iter()
diff --git a/pageserver/src/http/openapi_spec.yml b/pageserver/src/http/openapi_spec.yml
index 4d455243f0eb..237109abfec8 100644
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -624,6 +624,99 @@ paths:
                 $ref: "#/components/schemas/ServiceUnavailableError"
 
 
+  /v1/tenant/{tenant_id}/location_config:
+    parameters:
+      - name: tenant_id
+        in: path
+        required: true
+        schema:
+          type: string
+          format: hex
+      - name: flush_ms
+        in: query
+        required: false
+        schema:
+          type: integer
+    put:
+      description: |
+        Configures a _tenant location_, that is how a particular pageserver handles
+        a particular tenant.  This includes _attached_ tenants, i.e. those ingesting WAL
+        and page service requests, and _secondary_ tenants, i.e. those which are just keeping
+        a warm cache in anticipation of transitioning to attached state in the future.
+
+        This is a declarative, idempotent API: there are not separate endpoints
+        for different tenant location configurations.  Rather, this single endpoint accepts
+        a description of the desired location configuration, and makes whatever changes
+        are required to reach that state.
+
+        In imperative terms, this API is used to attach and detach tenants, and
+        to transition tenants to and from secondary mode.
+
+        This is a synchronous API: there is no 202 response.  State transitions should always
+        be fast (milliseconds), with the exception of requests setting `flush_ms`, in which case
+        the caller controls the runtime of the request.
+
+        In some state transitions, it makes sense to flush dirty data to remote storage: this includes transitions
+        to AttachedStale and Detached.  Flushing is never necessary for correctness, but is an
+        important optimization when doing migrations.  The `flush_ms` parameter controls whether
+        flushing should be attempted, and how much time is allowed for flushing.  If the time limit expires,
+        the requested transition will continue without waiting for any outstanding data to flush.  Callers
+        should use a duration which is substantially less than their HTTP client's request
+        timeout.  It is safe to supply flush_ms irrespective of the request body: in state transitions
+        where flushing doesn't make sense, the server will ignore it.
+
+        It is safe to retry requests, but if one receives a 409 or 503 response, it is not
+        useful to retry aggressively: there is probably an existing request still ongoing.
+      requestBody:
+        required: false
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/TenantLocationConfigRequest"
+      responses:
+        "200":
+          description: Tenant is now in requested state
+        "503":
+          description: Tenant's state cannot be changed right now.  Wait a few seconds and retry.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "409":
+          description: |
+            The tenant is already known to Pageserver in some way,
+            and hence this `/attach` call has been rejected.
+
+            Some examples of how this can happen:
+            - tenant was created on this pageserver
+            - tenant attachment was started by an earlier call to `/attach`.
+
+            Callers should poll the tenant status's `attachment_status` field,
+            like for status 202. See the longer description for `POST /attach`
+            for details.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ConflictError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+
   /v1/tenant/{tenant_id}/detach:
     parameters:
       - name: tenant_id
@@ -935,6 +1028,9 @@ paths:
                   format: hex
                 pg_version:
                   type: integer
+                existing_initdb_timeline_id:
+                  type: string
+                  format: hex
       responses:
         "201":
           description: TimelineInfo
@@ -1274,6 +1370,31 @@ components:
             tenant_id:
               type: string
               format: hex
+    TenantLocationConfigRequest:
+      type: object
+      required:
+        - tenant_id
+      properties:
+        tenant_id:
+          type: string
+          format: hex
+        mode:
+          type: string
+          enum: ["AttachedSingle", "AttachedMulti", "AttachedStale", "Secondary", "Detached"]
+          description: Mode of functionality that this pageserver will run in for this tenant.
+        generation:
+          type: integer
+          description: Attachment generation number, mandatory when `mode` is an attached state
+        secondary_conf:
+          $ref: '#/components/schemas/SecondaryConfig'
+        tenant_conf:
+          $ref: '#/components/schemas/TenantConfig'
+    SecondaryConfig:
+      type: object
+      properties:
+        warm:
+          type: boolean
+          description: Whether to poll remote storage for layers to download.  If false, secondary locations don't download anything.
     TenantConfig:
       type: object
       properties:
diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs
index 5ce09500ee81..29a1ff52e818 100644
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -4,6 +4,7 @@
 use std::collections::HashMap;
 use std::str::FromStr;
 use std::sync::Arc;
+use std::time::Duration;
 
 use anyhow::{anyhow, Context, Result};
 use enumset::EnumSet;
@@ -337,13 +338,7 @@ async fn build_timeline_info_common(
         Lsn(0) => None,
         lsn @ Lsn(_) => Some(lsn),
     };
-    let current_logical_size = match timeline.get_current_logical_size(ctx) {
-        Ok((size, _)) => Some(size),
-        Err(err) => {
-            error!("Timeline info creation failed to get current logical size: {err:?}");
-            None
-        }
-    };
+    let current_logical_size = timeline.get_current_logical_size(ctx);
     let current_physical_size = Some(timeline.layer_size_sum().await);
     let state = timeline.current_state();
     let remote_consistent_lsn_projected = timeline
@@ -356,7 +351,8 @@ async fn build_timeline_info_common(
     let walreceiver_status = timeline.walreceiver_status();
 
     let info = TimelineInfo {
-        tenant_id: timeline.tenant_id,
+        // TODO(sharding): add a shard_id field, or make tenant_id into a tenant_shard_id
+        tenant_id: timeline.tenant_shard_id.tenant_id,
         timeline_id: timeline.timeline_id,
         ancestor_timeline_id,
         ancestor_lsn,
@@ -366,7 +362,11 @@ async fn build_timeline_info_common(
         last_record_lsn,
         prev_record_lsn: Some(timeline.get_prev_record_lsn()),
         latest_gc_cutoff_lsn: *timeline.get_latest_gc_cutoff_lsn(),
-        current_logical_size,
+        current_logical_size: current_logical_size.size_dont_care_about_accuracy(),
+        current_logical_size_is_accurate: match current_logical_size.accuracy() {
+            tenant::timeline::logical_size::Accuracy::Approximate => false,
+            tenant::timeline::logical_size::Accuracy::Exact => true,
+        },
         current_physical_size,
         current_logical_size_non_incremental: None,
         timeline_dir_layer_file_size_sum: None,
@@ -439,6 +439,7 @@ async fn timeline_create_handler(
             request_data.ancestor_timeline_id.map(TimelineId::from),
             request_data.ancestor_start_lsn,
             request_data.pg_version.unwrap_or(crate::DEFAULT_PG_VERSION),
+            request_data.existing_initdb_timeline_id,
             state.broker_client.clone(),
             &ctx,
         )
@@ -1157,6 +1158,7 @@ async fn put_tenant_location_config_handler(
     let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
 
     let request_data: TenantLocationConfigRequest = json_request(&mut request).await?;
+    let flush = parse_query_param(&request, "flush_ms")?.map(Duration::from_millis);
     check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
     let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
@@ -1189,7 +1191,7 @@ async fn put_tenant_location_config_handler(
 
     state
         .tenant_manager
-        .upsert_location(tenant_shard_id, location_conf, &ctx)
+        .upsert_location(tenant_shard_id, location_conf, flush, &ctx)
         .await
         // TODO: badrequest assumes the caller was asking for something unreasonable, but in
         // principle we might have hit something like concurrent API calls to the same tenant,
diff --git a/pageserver/src/import_datadir.rs b/pageserver/src/import_datadir.rs
index 770458e02e8d..452cd73f76b9 100644
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -7,12 +7,13 @@ use std::pin::Pin;
 use std::task::{self, Poll};
 
 use anyhow::{bail, ensure, Context, Result};
+use async_compression::tokio::bufread::ZstdDecoder;
 use async_compression::{tokio::write::ZstdEncoder, zstd::CParameter, Level};
 use bytes::Bytes;
 use camino::Utf8Path;
 use futures::StreamExt;
 use nix::NixPath;
-use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};
+use tokio::io::{AsyncBufRead, AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};
 use tokio_tar::Archive;
 use tokio_tar::Builder;
 use tokio_tar::HeaderMode;
@@ -732,3 +733,13 @@ pub async fn create_tar_zst(pgdata_path: &Utf8Path) -> Result<Vec<u8>> {
     }
     Ok(compressed.buf)
 }
+
+pub async fn extract_tar_zst(
+    pgdata_path: &Utf8Path,
+    tar_zst: impl AsyncBufRead + Unpin,
+) -> Result<()> {
+    let tar = Box::pin(ZstdDecoder::new(tar_zst));
+    let mut archive = Archive::new(tar);
+    archive.unpack(pgdata_path).await?;
+    Ok(())
+}
diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs
index d5915f4c9839..d2684691e01a 100644
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -7,6 +7,7 @@ use metrics::{
     HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
 };
 use once_cell::sync::Lazy;
+use pageserver_api::shard::TenantShardId;
 use strum::{EnumCount, IntoEnumIterator, VariantNames};
 use strum_macros::{EnumVariantNames, IntoStaticStr};
 use utils::id::{TenantId, TimelineId};
@@ -402,6 +403,126 @@ static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
     .expect("failed to define current logical size metric")
 });
 
+pub(crate) mod initial_logical_size {
+    use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
+    use once_cell::sync::Lazy;
+
+    use crate::task_mgr::TaskKind;
+
+    pub(crate) struct StartCalculation(IntCounterVec);
+    pub(crate) static START_CALCULATION: Lazy<StartCalculation> = Lazy::new(|| {
+        StartCalculation(
+            register_int_counter_vec!(
+                "pageserver_initial_logical_size_start_calculation",
+                "Incremented each time we start an initial logical size calculation attempt. \
+                 The `task_kind` label is for the task kind that caused this attempt.",
+                &["attempt", "task_kind"]
+            )
+            .unwrap(),
+        )
+    });
+
+    struct DropCalculation {
+        first: IntCounter,
+        retry: IntCounter,
+    }
+
+    static DROP_CALCULATION: Lazy<DropCalculation> = Lazy::new(|| {
+        let vec = register_int_counter_vec!(
+            "pageserver_initial_logical_size_drop_calculation",
+            "Incremented each time we abort a started size calculation attmpt.",
+            &["attempt"]
+        )
+        .unwrap();
+        DropCalculation {
+            first: vec.with_label_values(&["first"]),
+            retry: vec.with_label_values(&["retry"]),
+        }
+    });
+
+    pub(crate) struct Calculated {
+        pub(crate) births: IntCounter,
+        pub(crate) deaths: IntCounter,
+    }
+
+    pub(crate) static CALCULATED: Lazy<Calculated> = Lazy::new(|| Calculated {
+        births: register_int_counter!(
+            "pageserver_initial_logical_size_finish_calculation",
+            "Incremented every time we finish calculation of initial logical size.\
+             If everything is working well, this should happen at most once per Timeline object."
+        )
+        .unwrap(),
+        deaths: register_int_counter!(
+            "pageserver_initial_logical_size_drop_finished_calculation",
+            "Incremented when we drop a finished initial logical size calculation result.\
+             Mainly useful to turn pageserver_initial_logical_size_finish_calculation into a gauge."
+        )
+        .unwrap(),
+    });
+
+    pub(crate) struct OngoingCalculationGuard {
+        inc_drop_calculation: Option<IntCounter>,
+    }
+
+    impl StartCalculation {
+        pub(crate) fn first(&self, causing_task_kind: Option<TaskKind>) -> OngoingCalculationGuard {
+            let task_kind_label: &'static str =
+                causing_task_kind.map(|k| k.into()).unwrap_or_default();
+            self.0.with_label_values(&["first", task_kind_label]);
+            OngoingCalculationGuard {
+                inc_drop_calculation: Some(DROP_CALCULATION.first.clone()),
+            }
+        }
+        pub(crate) fn retry(&self, causing_task_kind: Option<TaskKind>) -> OngoingCalculationGuard {
+            let task_kind_label: &'static str =
+                causing_task_kind.map(|k| k.into()).unwrap_or_default();
+            self.0.with_label_values(&["retry", task_kind_label]);
+            OngoingCalculationGuard {
+                inc_drop_calculation: Some(DROP_CALCULATION.retry.clone()),
+            }
+        }
+    }
+
+    impl Drop for OngoingCalculationGuard {
+        fn drop(&mut self) {
+            if let Some(counter) = self.inc_drop_calculation.take() {
+                counter.inc();
+            }
+        }
+    }
+
+    impl OngoingCalculationGuard {
+        pub(crate) fn calculation_result_saved(mut self) -> FinishedCalculationGuard {
+            drop(self.inc_drop_calculation.take());
+            CALCULATED.births.inc();
+            FinishedCalculationGuard {
+                inc_on_drop: CALCULATED.deaths.clone(),
+            }
+        }
+    }
+
+    pub(crate) struct FinishedCalculationGuard {
+        inc_on_drop: IntCounter,
+    }
+
+    impl Drop for FinishedCalculationGuard {
+        fn drop(&mut self) {
+            self.inc_on_drop.inc();
+        }
+    }
+
+    // context: https://github.com/neondatabase/neon/issues/5963
+    pub(crate) static TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE: Lazy<IntCounter> =
+        Lazy::new(|| {
+            register_int_counter!(
+                "pageserver_initial_logical_size_timelines_where_walreceiver_got_approximate_size",
+                "Counter for the following event: walreceiver calls\
+                 Timeline::get_current_logical_size() and it returns `Approximate` for the first time."
+            )
+            .unwrap()
+        });
+}
+
 pub(crate) static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
     register_uint_gauge_vec!(
         "pageserver_tenant_states_count",
@@ -1252,6 +1373,15 @@ pub(crate) static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
     .unwrap()
 });
 
+pub(crate) static WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
+    register_histogram!(
+        "pageserver_wal_redo_process_launch_duration",
+        "Histogram of the duration of successful WalRedoProcess::launch calls",
+        redo_histogram_time_buckets!(),
+    )
+    .expect("failed to define a metric")
+});
+
 pub(crate) struct WalRedoProcessCounters {
     pub(crate) started: IntCounter,
     pub(crate) killed_by_cause: enum_map::EnumMap<WalRedoKillCause, IntCounter>,
@@ -1571,9 +1701,9 @@ pub struct RemoteTimelineClientMetrics {
 }
 
 impl RemoteTimelineClientMetrics {
-    pub fn new(tenant_id: &TenantId, timeline_id: &TimelineId) -> Self {
+    pub fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
         RemoteTimelineClientMetrics {
-            tenant_id: tenant_id.to_string(),
+            tenant_id: tenant_shard_id.tenant_id.to_string(),
             timeline_id: timeline_id.to_string(),
             calls_unfinished_gauge: Mutex::new(HashMap::default()),
             bytes_started_counter: Mutex::new(HashMap::default()),
@@ -1961,6 +2091,7 @@ pub fn preinitialize_metrics() {
         &WAL_REDO_TIME,
         &WAL_REDO_RECORDS_HISTOGRAM,
         &WAL_REDO_BYTES_HISTOGRAM,
+        &WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM,
     ]
     .into_iter()
     .for_each(|h| {
diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs
index ee5f1732e4ac..82c16eb9bd09 100644
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -399,6 +399,9 @@ impl PageServerHandler {
     {
         debug_assert_current_span_has_tenant_and_timeline_id();
 
+        // TODO(sharding): enumerate local tenant shards for this tenant, and select the one
+        // that should serve this request.
+
         // Make request tracer if needed
         let tenant = mgr::get_active_tenant_with_timeout(
             tenant_id,
@@ -408,9 +411,10 @@ impl PageServerHandler {
         .await?;
         let mut tracer = if tenant.get_trace_read_requests() {
             let connection_id = ConnectionId::generate();
-            let path = tenant
-                .conf
-                .trace_path(&tenant_id, &timeline_id, &connection_id);
+            let path =
+                tenant
+                    .conf
+                    .trace_path(&tenant.tenant_shard_id(), &timeline_id, &connection_id);
             Some(Tracer::new(path))
         } else {
             None
diff --git a/pageserver/src/repository.rs b/pageserver/src/repository.rs
index 24f47df92ef6..97d731bf4909 100644
--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -138,6 +138,14 @@ pub struct GcResult {
 
     #[serde(serialize_with = "serialize_duration_as_millis")]
     pub elapsed: Duration,
+
+    /// The layers which were garbage collected.
+    ///
+    /// Used in `/v1/tenant/:tenant_id/timeline/:timeline_id/do_gc` to wait for the layers to be
+    /// dropped in tests.
+    #[cfg(feature = "testing")]
+    #[serde(skip)]
+    pub(crate) doomed_layers: Vec<crate::tenant::storage_layer::Layer>,
 }
 
 // helper function for `GcResult`, serializing a `Duration` as an integer number of milliseconds
@@ -158,5 +166,11 @@ impl AddAssign for GcResult {
         self.layers_removed += other.layers_removed;
 
         self.elapsed += other.elapsed;
+
+        #[cfg(feature = "testing")]
+        {
+            let mut other = other;
+            self.doomed_layers.append(&mut other.doomed_layers);
+        }
     }
 }
diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index 7384459ab523..422cb671fe82 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -15,11 +15,16 @@ use anyhow::{bail, Context};
 use bytes::Bytes;
 use camino::{Utf8Path, Utf8PathBuf};
 use enumset::EnumSet;
+use futures::stream::FuturesUnordered;
 use futures::FutureExt;
+use futures::StreamExt;
 use pageserver_api::models::TimelineState;
+use pageserver_api::shard::TenantShardId;
 use remote_storage::DownloadError;
 use remote_storage::GenericRemoteStorage;
+use std::fmt;
 use storage_broker::BrokerClientChannel;
+use tokio::io::BufReader;
 use tokio::runtime::Handle;
 use tokio::sync::watch;
 use tokio::task::JoinSet;
@@ -30,26 +35,7 @@ use utils::completion;
 use utils::crashsafe::path_with_suffix_extension;
 use utils::fs_ext;
 use utils::sync::gate::Gate;
-
-use std::cmp::min;
-use std::collections::hash_map::Entry;
-use std::collections::BTreeSet;
-use std::collections::HashMap;
-use std::collections::HashSet;
-use std::fmt::Debug;
-use std::fmt::Display;
-use std::fs;
-use std::fs::File;
-use std::io;
-use std::ops::Bound::Included;
-use std::process::Command;
-use std::process::Stdio;
-use std::sync::atomic::AtomicU64;
-use std::sync::atomic::Ordering;
-use std::sync::Arc;
-use std::sync::MutexGuard;
-use std::sync::{Mutex, RwLock};
-use std::time::{Duration, Instant};
+use utils::sync::gate::GateGuard;
 
 use self::config::AttachedLocationConfig;
 use self::config::AttachmentMode;
@@ -69,6 +55,7 @@ use self::timeline::TimelineResources;
 use crate::config::PageServerConf;
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::deletion_queue::DeletionQueueClient;
+use crate::deletion_queue::DeletionQueueError;
 use crate::import_datadir;
 use crate::is_uninit_mark;
 use crate::metrics::TENANT_ACTIVATION;
@@ -84,14 +71,35 @@ use crate::tenant::remote_timeline_client::MaybeDeletedIndexPart;
 use crate::tenant::storage_layer::DeltaLayer;
 use crate::tenant::storage_layer::ImageLayer;
 use crate::InitializationOrder;
+use std::cmp::min;
+use std::collections::hash_map::Entry;
+use std::collections::BTreeSet;
+use std::collections::HashMap;
+use std::collections::HashSet;
+use std::fmt::Debug;
+use std::fmt::Display;
+use std::fs;
+use std::fs::File;
+use std::io;
+use std::ops::Bound::Included;
+use std::process::Stdio;
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering;
+use std::sync::Arc;
+use std::sync::MutexGuard;
+use std::sync::{Mutex, RwLock};
+use std::time::{Duration, Instant};
 
 use crate::tenant::timeline::delete::DeleteTimelineFlow;
 use crate::tenant::timeline::uninit::cleanup_timeline_directory;
 use crate::virtual_file::VirtualFile;
 use crate::walredo::PostgresRedoManager;
 use crate::TEMP_FILE_SUFFIX;
+use once_cell::sync::Lazy;
 pub use pageserver_api::models::TenantState;
+use tokio::sync::Semaphore;
 
+static INIT_DB_SEMAPHORE: Lazy<Semaphore> = Lazy::new(|| Semaphore::new(8));
 use toml_edit;
 use utils::{
     crashsafe,
@@ -226,7 +234,7 @@ pub struct Tenant {
     // This is necessary to allow global config updates.
     tenant_conf: Arc<RwLock<AttachedTenantConf>>,
 
-    tenant_id: TenantId,
+    tenant_shard_id: TenantShardId,
 
     /// The remote storage generation, used to protect S3 objects from split-brain.
     /// Does not change over the lifetime of the [`Tenant`] object.
@@ -270,7 +278,7 @@ pub struct Tenant {
 
 impl std::fmt::Debug for Tenant {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{} ({})", self.tenant_id, self.current_state())
+        write!(f, "{} ({})", self.tenant_shard_id, self.current_state())
     }
 }
 
@@ -403,6 +411,36 @@ pub enum CreateTimelineError {
     Other(#[from] anyhow::Error),
 }
 
+#[derive(thiserror::Error, Debug)]
+enum InitdbError {
+    Other(anyhow::Error),
+    Cancelled,
+    Spawn(std::io::Result<()>),
+    Failed(std::process::ExitStatus, Vec<u8>),
+}
+
+impl fmt::Display for InitdbError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            InitdbError::Cancelled => write!(f, "Operation was cancelled"),
+            InitdbError::Spawn(e) => write!(f, "Spawn error: {:?}", e),
+            InitdbError::Failed(status, stderr) => write!(
+                f,
+                "Command failed with status {:?}: {}",
+                status,
+                String::from_utf8_lossy(stderr)
+            ),
+            InitdbError::Other(e) => write!(f, "Error: {:?}", e),
+        }
+    }
+}
+
+impl From<std::io::Error> for InitdbError {
+    fn from(error: std::io::Error) -> Self {
+        InitdbError::Spawn(Err(error))
+    }
+}
+
 struct TenantDirectoryScan {
     sorted_timelines_to_load: Vec<(TimelineId, TimelineMetadata)>,
     timelines_to_resume_deletion: Vec<(TimelineId, Option<TimelineMetadata>)>,
@@ -434,7 +472,7 @@ impl Tenant {
         init_order: Option<&InitializationOrder>,
         _ctx: &RequestContext,
     ) -> anyhow::Result<()> {
-        let tenant_id = self.tenant_id;
+        let tenant_id = self.tenant_shard_id;
 
         let timeline = self.create_timeline_struct(
             timeline_id,
@@ -526,7 +564,7 @@ impl Tenant {
     #[allow(clippy::too_many_arguments)]
     pub(crate) fn spawn(
         conf: &'static PageServerConf,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         resources: TenantSharedResources,
         attached_conf: AttachedTenantConf,
         init_order: Option<InitializationOrder>,
@@ -534,8 +572,10 @@ impl Tenant {
         mode: SpawnMode,
         ctx: &RequestContext,
     ) -> anyhow::Result<Arc<Tenant>> {
+        // TODO(sharding): make WalRedoManager shard-aware
         let wal_redo_manager = Arc::new(WalRedoManager::from(PostgresRedoManager::new(
-            conf, tenant_id,
+            conf,
+            tenant_shard_id.tenant_id,
         )));
 
         let TenantSharedResources {
@@ -549,7 +589,7 @@ impl Tenant {
             conf,
             attached_conf,
             wal_redo_manager,
-            tenant_id,
+            tenant_shard_id,
             remote_storage.clone(),
             deletion_queue_client,
         ));
@@ -561,7 +601,7 @@ impl Tenant {
         task_mgr::spawn(
             &tokio::runtime::Handle::current(),
             TaskKind::Attach,
-            Some(tenant_id),
+            Some(tenant_shard_id.tenant_id),
             None,
             "attach tenant",
             false,
@@ -600,7 +640,7 @@ impl Tenant {
                                 match tenant_clone
                                     .preload(remote_storage, task_mgr::shutdown_token())
                                     .instrument(
-                                        tracing::info_span!(parent: None, "attach_preload", tenant_id=%tenant_id),
+                                        tracing::info_span!(parent: None, "attach_preload", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug()),
                                     )
                                     .await {
                                         Ok(p) => p,
@@ -682,7 +722,7 @@ impl Tenant {
                 Ok(())
             }
             .instrument({
-                let span = tracing::info_span!(parent: None, "attach", tenant_id=%tenant_id);
+                let span = tracing::info_span!(parent: None, "attach", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug());
                 span.follows_from(Span::current());
                 span
             }),
@@ -700,7 +740,7 @@ impl Tenant {
         info!("listing remote timelines");
         let (remote_timeline_ids, other_keys) = remote_timeline_client::list_remote_timelines(
             remote_storage,
-            self.tenant_id,
+            self.tenant_shard_id,
             cancel.clone(),
         )
         .await?;
@@ -733,7 +773,7 @@ impl Tenant {
     ///
     async fn attach(
         self: &Arc<Tenant>,
-        mut init_order: Option<InitializationOrder>,
+        init_order: Option<InitializationOrder>,
         preload: Option<TenantPreload>,
         ctx: &RequestContext,
     ) -> anyhow::Result<()> {
@@ -750,31 +790,37 @@ impl Tenant {
             }
         };
 
-        // Signal that we have completed remote phase
-        init_order
-            .as_mut()
-            .and_then(|x| x.initial_tenant_load_remote.take());
-
         let mut timelines_to_resume_deletions = vec![];
 
         let mut remote_index_and_client = HashMap::new();
         let mut timeline_ancestors = HashMap::new();
         let mut existent_timelines = HashSet::new();
         for (timeline_id, preload) in preload.timelines {
-            // In this context a timeline "exists" if it has any content in remote storage: this will
-            // be our cue to not delete any corresponding local directory
-            existent_timelines.insert(timeline_id);
-
             let index_part = match preload.index_part {
                 Ok(i) => {
                     debug!("remote index part exists for timeline {timeline_id}");
+                    // We found index_part on the remote, this is the standard case.
+                    existent_timelines.insert(timeline_id);
                     i
                 }
+                Err(DownloadError::NotFound) => {
+                    // There is no index_part on the remote. We only get here
+                    // if there is some prefix for the timeline in the remote storage.
+                    // This can e.g. be the initdb.tar.zst archive, maybe a
+                    // remnant from a prior incomplete creation or deletion attempt.
+                    // Delete the local directory as the deciding criterion for a
+                    // timeline's existence is presence of index_part.
+                    info!(%timeline_id, "index_part not found on remote");
+                    continue;
+                }
                 Err(e) => {
-                    // Timeline creation is not atomic: we might upload a layer but no index_part.  We expect
-                    // that the creation will be retried by the control plane and eventually result in
-                    // a valid loadable state.
+                    // Some (possibly ephemeral) error happened during index_part download.
+                    // Pretend the timeline exists to not delete the timeline directory,
+                    // as it might be a temporary issue and we don't want to re-download
+                    // everything after it resolves.
                     warn!(%timeline_id, "Failed to load index_part from remote storage, failed creation? ({e})");
+
+                    existent_timelines.insert(timeline_id);
                     continue;
                 }
             };
@@ -817,7 +863,7 @@ impl Tenant {
             .with_context(|| {
                 format!(
                     "failed to load remote timeline {} for tenant {}",
-                    timeline_id, self.tenant_id
+                    timeline_id, self.tenant_shard_id
                 )
             })?;
         }
@@ -857,7 +903,7 @@ impl Tenant {
     /// timeline that still exists: this can happen if we crashed during a deletion/creation, or
     /// if a timeline was deleted while the tenant was attached to a different pageserver.
     fn clean_up_timelines(&self, existent_timelines: &HashSet<TimelineId>) -> anyhow::Result<()> {
-        let timelines_dir = self.conf.timelines_path(&self.tenant_id);
+        let timelines_dir = self.conf.timelines_path(&self.tenant_shard_id);
 
         let entries = match timelines_dir.read_dir_utf8() {
             Ok(d) => d,
@@ -943,7 +989,7 @@ impl Tenant {
         span::debug_assert_current_span_has_tenant_id();
 
         info!("downloading index file for timeline {}", timeline_id);
-        tokio::fs::create_dir_all(self.conf.timeline_path(&self.tenant_id, &timeline_id))
+        tokio::fs::create_dir_all(self.conf.timeline_path(&self.tenant_shard_id, &timeline_id))
             .await
             .context("Failed to create new timeline directory")?;
 
@@ -965,10 +1011,15 @@ impl Tenant {
         let init_order = None;
 
         // timeline loading after attach expects to find metadata file for each metadata
-        save_metadata(self.conf, &self.tenant_id, &timeline_id, &remote_metadata)
-            .await
-            .context("save_metadata")
-            .map_err(LoadLocalTimelineError::Load)?;
+        save_metadata(
+            self.conf,
+            &self.tenant_shard_id,
+            &timeline_id,
+            &remote_metadata,
+        )
+        .await
+        .context("save_metadata")
+        .map_err(LoadLocalTimelineError::Load)?;
 
         self.timeline_init_and_sync(
             timeline_id,
@@ -985,11 +1036,13 @@ impl Tenant {
     /// Create a placeholder Tenant object for a broken tenant
     pub fn create_broken_tenant(
         conf: &'static PageServerConf,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         reason: String,
     ) -> Arc<Tenant> {
+        // TODO(sharding): make WalRedoManager shard-aware
         let wal_redo_manager = Arc::new(WalRedoManager::from(PostgresRedoManager::new(
-            conf, tenant_id,
+            conf,
+            tenant_shard_id.tenant_id,
         )));
         Arc::new(Tenant::new(
             TenantState::Broken {
@@ -999,7 +1052,7 @@ impl Tenant {
             conf,
             AttachedTenantConf::try_from(LocationConf::default()).unwrap(),
             wal_redo_manager,
-            tenant_id,
+            tenant_shard_id,
             None,
             DeletionQueueClient::broken(),
         ))
@@ -1012,7 +1065,7 @@ impl Tenant {
         // completed in non topological order (for example because parent has smaller number of layer files in it)
         let mut timelines_to_resume_deletion: Vec<(TimelineId, Option<TimelineMetadata>)> = vec![];
 
-        let timelines_dir = self.conf.timelines_path(&self.tenant_id);
+        let timelines_dir = self.conf.timelines_path(&self.tenant_shard_id);
 
         for entry in timelines_dir
             .read_dir_utf8()
@@ -1043,7 +1096,7 @@ impl Tenant {
                                 "Could not parse timeline id out of the timeline uninit mark name {timeline_uninit_mark_file}",
                             )
                         })?;
-                let timeline_dir = self.conf.timeline_path(&self.tenant_id, &timeline_id);
+                let timeline_dir = self.conf.timeline_path(&self.tenant_shard_id, &timeline_id);
                 if let Err(e) =
                     remove_timeline_and_uninit_mark(&timeline_dir, timeline_uninit_mark_file)
                 {
@@ -1060,7 +1113,7 @@ impl Tenant {
 
                 info!("Found deletion mark for timeline {}", timeline_id);
 
-                match load_metadata(self.conf, &self.tenant_id, &timeline_id) {
+                match load_metadata(self.conf, &self.tenant_shard_id, &timeline_id) {
                     Ok(metadata) => {
                         timelines_to_resume_deletion.push((timeline_id, Some(metadata)))
                     }
@@ -1104,7 +1157,7 @@ impl Tenant {
                     })?;
                 let timeline_uninit_mark_file = self
                     .conf
-                    .timeline_uninit_mark_file_path(self.tenant_id, timeline_id);
+                    .timeline_uninit_mark_file_path(self.tenant_shard_id, timeline_id);
                 if timeline_uninit_mark_file.exists() {
                     info!(
                         %timeline_id,
@@ -1120,7 +1173,7 @@ impl Tenant {
 
                 let timeline_delete_mark_file = self
                     .conf
-                    .timeline_delete_mark_file_path(self.tenant_id, timeline_id);
+                    .timeline_delete_mark_file_path(self.tenant_shard_id, timeline_id);
                 if timeline_delete_mark_file.exists() {
                     // Cleanup should be done in `is_delete_mark` branch above
                     continue;
@@ -1128,7 +1181,7 @@ impl Tenant {
 
                 let file_name = entry.file_name();
                 if let Ok(timeline_id) = file_name.parse::<TimelineId>() {
-                    let metadata = load_metadata(self.conf, &self.tenant_id, &timeline_id)
+                    let metadata = load_metadata(self.conf, &self.tenant_shard_id, &timeline_id)
                         .context("failed to load metadata")?;
                     timelines_to_load.insert(timeline_id, metadata);
                 } else {
@@ -1160,7 +1213,7 @@ impl Tenant {
                 remote_storage.clone(),
                 self.deletion_queue_client.clone(),
                 self.conf,
-                self.tenant_id,
+                self.tenant_shard_id,
                 timeline_id,
                 self.generation,
             );
@@ -1366,8 +1419,12 @@ impl Tenant {
         .map_err(LoadLocalTimelineError::Load)
     }
 
-    pub fn tenant_id(&self) -> TenantId {
-        self.tenant_id
+    pub(crate) fn tenant_id(&self) -> TenantId {
+        self.tenant_shard_id.tenant_id
+    }
+
+    pub(crate) fn tenant_shard_id(&self) -> TenantShardId {
+        self.tenant_shard_id
     }
 
     /// Get Timeline handle for given Neon timeline ID.
@@ -1381,13 +1438,13 @@ impl Tenant {
         let timeline = timelines_accessor
             .get(&timeline_id)
             .ok_or(GetTimelineError::NotFound {
-                tenant_id: self.tenant_id,
+                tenant_id: self.tenant_shard_id.tenant_id,
                 timeline_id,
             })?;
 
         if active_only && !timeline.is_active() {
             Err(GetTimelineError::NotActive {
-                tenant_id: self.tenant_id,
+                tenant_id: self.tenant_shard_id.tenant_id,
                 timeline_id,
                 state: timeline.current_state(),
             })
@@ -1513,12 +1570,14 @@ impl Tenant {
     ///
     /// If the caller specified the timeline ID to use (`new_timeline_id`), and timeline with
     /// the same timeline ID already exists, returns CreateTimelineError::AlreadyExists.
+    #[allow(clippy::too_many_arguments)]
     pub async fn create_timeline(
         &self,
         new_timeline_id: TimelineId,
         ancestor_timeline_id: Option<TimelineId>,
         mut ancestor_start_lsn: Option<Lsn>,
         pg_version: u32,
+        load_existing_initdb: Option<TimelineId>,
         broker_client: storage_broker::BrokerClientChannel,
         ctx: &RequestContext,
     ) -> Result<Arc<Timeline>, CreateTimelineError> {
@@ -1593,7 +1652,7 @@ impl Tenant {
                     .await?
             }
             None => {
-                self.bootstrap_timeline(new_timeline_id, pg_version, ctx)
+                self.bootstrap_timeline(new_timeline_id, pg_version, load_existing_initdb, ctx)
                     .await?
             }
         };
@@ -1745,7 +1804,7 @@ impl Tenant {
                     *current_state = TenantState::Activating(ActivatingFrom::Attaching);
                 }
             }
-            debug!(tenant_id = %self.tenant_id, "Activating tenant");
+            debug!(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), "Activating tenant");
             activating = true;
             // Continue outside the closure. We need to grab timelines.lock()
             // and we plan to turn it into a tokio::sync::Mutex in a future patch.
@@ -1782,7 +1841,8 @@ impl Tenant {
                 // times to activate. see https://github.com/neondatabase/neon/issues/4025
                 info!(
                     since_creation_millis = elapsed.as_millis(),
-                    tenant_id = %self.tenant_id,
+                    tenant_id = %self.tenant_shard_id.tenant_id,
+                    shard_id = %self.tenant_shard_id.shard_slug(),
                     activated_timelines,
                     total_timelines,
                     post_state = <&'static str>::from(&*current_state),
@@ -1879,7 +1939,7 @@ impl Tenant {
         //
         // this will additionally shutdown and await all timeline tasks.
         tracing::debug!("Waiting for tasks...");
-        task_mgr::shutdown_tasks(None, Some(self.tenant_id), None).await;
+        task_mgr::shutdown_tasks(None, Some(self.tenant_shard_id.tenant_id), None).await;
 
         // Wait for any in-flight operations to complete
         self.gate.close().await;
@@ -2054,7 +2114,7 @@ impl Tenant {
                     receiver.changed().await.map_err(
                         |_e: tokio::sync::watch::error::RecvError|
                             // Tenant existed but was dropped: report it as non-existent
-                            GetActiveTenantError::NotFound(GetTenantError::NotFound(self.tenant_id))
+                            GetActiveTenantError::NotFound(GetTenantError::NotFound(self.tenant_shard_id.tenant_id))
                     )?;
                 }
                 TenantState::Active { .. } => {
@@ -2128,9 +2188,6 @@ where
 }
 
 impl Tenant {
-    pub fn get_tenant_id(&self) -> TenantId {
-        self.tenant_id
-    }
     pub fn tenant_specific_overrides(&self) -> TenantConfOpt {
         self.tenant_conf.read().unwrap().tenant_conf
     }
@@ -2280,7 +2337,7 @@ impl Tenant {
             new_metadata,
             ancestor,
             new_timeline_id,
-            self.tenant_id,
+            self.tenant_shard_id,
             self.generation,
             Arc::clone(&self.walredo_mgr),
             resources,
@@ -2302,14 +2359,14 @@ impl Tenant {
         conf: &'static PageServerConf,
         attached_conf: AttachedTenantConf,
         walredo_mgr: Arc<WalRedoManager>,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         remote_storage: Option<GenericRemoteStorage>,
         deletion_queue_client: DeletionQueueClient,
     ) -> Tenant {
         let (state, mut rx) = watch::channel(state);
 
         tokio::spawn(async move {
-            let tid = tenant_id.to_string();
+            let tid = tenant_shard_id.to_string();
 
             fn inspect_state(state: &TenantState) -> ([&'static str; 1], bool) {
                 ([state.into()], matches!(state, TenantState::Broken { .. }))
@@ -2361,7 +2418,7 @@ impl Tenant {
         });
 
         Tenant {
-            tenant_id,
+            tenant_shard_id,
             generation: attached_conf.location.generation,
             conf,
             // using now here is good enough approximation to catch tenants with really long
@@ -2379,17 +2436,17 @@ impl Tenant {
             eviction_task_tenant_state: tokio::sync::Mutex::new(EvictionTaskTenantState::default()),
             delete_progress: Arc::new(tokio::sync::Mutex::new(DeleteTenantFlow::default())),
             cancel: CancellationToken::default(),
-            gate: Gate::new(format!("Tenant<{tenant_id}>")),
+            gate: Gate::new(format!("Tenant<{tenant_shard_id}>")),
         }
     }
 
     /// Locate and load config
     pub(super) fn load_tenant_config(
         conf: &'static PageServerConf,
-        tenant_id: &TenantId,
+        tenant_shard_id: &TenantShardId,
     ) -> anyhow::Result<LocationConf> {
-        let legacy_config_path = conf.tenant_config_path(tenant_id);
-        let config_path = conf.tenant_location_config_path(tenant_id);
+        let legacy_config_path = conf.tenant_config_path(tenant_shard_id);
+        let config_path = conf.tenant_location_config_path(tenant_shard_id);
 
         if config_path.exists() {
             // New-style config takes precedence
@@ -2403,9 +2460,7 @@ impl Tenant {
             for (key, item) in deserialized.iter() {
                 match key {
                     "tenant_config" => {
-                        tenant_conf = PageServerConf::parse_toml_tenant_conf(item).with_context(|| {
-                            format!("Failed to parse config from file '{legacy_config_path}' as pageserver config")
-                        })?;
+                        tenant_conf = TenantConfOpt::try_from(item.to_owned()).context(format!("Failed to parse config from file '{legacy_config_path}' as pageserver config"))?;
                     }
                     _ => bail!(
                         "config file {legacy_config_path} has unrecognized pageserver option '{key}'"
@@ -2443,29 +2498,34 @@ impl Tenant {
             .with_context(|| format!("Failed to parse config from file '{path}' as toml file"))
     }
 
-    #[tracing::instrument(skip_all, fields(%tenant_id))]
+    #[tracing::instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug()))]
     pub(super) async fn persist_tenant_config(
         conf: &'static PageServerConf,
-        tenant_id: &TenantId,
+        tenant_shard_id: &TenantShardId,
         location_conf: &LocationConf,
     ) -> anyhow::Result<()> {
-        let legacy_config_path = conf.tenant_config_path(tenant_id);
-        let config_path = conf.tenant_location_config_path(tenant_id);
-
-        Self::persist_tenant_config_at(tenant_id, &config_path, &legacy_config_path, location_conf)
-            .await
+        let legacy_config_path = conf.tenant_config_path(tenant_shard_id);
+        let config_path = conf.tenant_location_config_path(tenant_shard_id);
+
+        Self::persist_tenant_config_at(
+            tenant_shard_id,
+            &config_path,
+            &legacy_config_path,
+            location_conf,
+        )
+        .await
     }
 
-    #[tracing::instrument(skip_all, fields(%tenant_id))]
+    #[tracing::instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug()))]
     pub(super) async fn persist_tenant_config_at(
-        tenant_id: &TenantId,
+        tenant_shard_id: &TenantShardId,
         config_path: &Utf8Path,
         legacy_config_path: &Utf8Path,
         location_conf: &LocationConf,
     ) -> anyhow::Result<()> {
         // Forward compat: write out an old-style configuration that old versions can read, in case we roll back
         Self::persist_tenant_config_legacy(
-            tenant_id,
+            tenant_shard_id,
             legacy_config_path,
             &location_conf.tenant_conf,
         )
@@ -2492,14 +2552,16 @@ impl Tenant {
 
         let temp_path = path_with_suffix_extension(config_path, TEMP_FILE_SUFFIX);
 
-        let tenant_id = *tenant_id;
+        let tenant_shard_id = *tenant_shard_id;
         let config_path = config_path.to_owned();
         tokio::task::spawn_blocking(move || {
             Handle::current().block_on(async move {
                 let conf_content = conf_content.as_bytes();
                 VirtualFile::crashsafe_overwrite(&config_path, &temp_path, conf_content)
                     .await
-                    .with_context(|| format!("write tenant {tenant_id} config to {config_path}"))
+                    .with_context(|| {
+                        format!("write tenant {tenant_shard_id} config to {config_path}")
+                    })
             })
         })
         .await??;
@@ -2507,9 +2569,9 @@ impl Tenant {
         Ok(())
     }
 
-    #[tracing::instrument(skip_all, fields(%tenant_id))]
+    #[tracing::instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug()))]
     async fn persist_tenant_config_legacy(
-        tenant_id: &TenantId,
+        tenant_shard_id: &TenantShardId,
         target_config_path: &Utf8Path,
         tenant_conf: &TenantConfOpt,
     ) -> anyhow::Result<()> {
@@ -2527,7 +2589,7 @@ impl Tenant {
 
         let temp_path = path_with_suffix_extension(target_config_path, TEMP_FILE_SUFFIX);
 
-        let tenant_id = *tenant_id;
+        let tenant_shard_id = *tenant_shard_id;
         let target_config_path = target_config_path.to_owned();
         tokio::task::spawn_blocking(move || {
             Handle::current().block_on(async move {
@@ -2535,7 +2597,7 @@ impl Tenant {
                 VirtualFile::crashsafe_overwrite(&target_config_path, &temp_path, conf_content)
                     .await
                     .with_context(|| {
-                        format!("write tenant {tenant_id} config to {target_config_path}")
+                        format!("write tenant {tenant_shard_id} config to {target_config_path}")
                     })
             })
         })
@@ -2611,14 +2673,12 @@ impl Tenant {
 
         // Perform GC for each timeline.
         //
-        // Note that we don't hold the GC lock here because we don't want
-        // to delay the branch creation task, which requires the GC lock.
-        // A timeline GC iteration can be slow because it may need to wait for
-        // compaction (both require `layer_removal_cs` lock),
-        // but the GC iteration can run concurrently with branch creation.
+        // Note that we don't hold the `Tenant::gc_cs` lock here because we don't want to delay the
+        // branch creation task, which requires the GC lock. A GC iteration can run concurrently
+        // with branch creation.
         //
-        // See comments in [`Tenant::branch_timeline`] for more information
-        // about why branch creation task can run concurrently with timeline's GC iteration.
+        // See comments in [`Tenant::branch_timeline`] for more information about why branch
+        // creation task can run concurrently with timeline's GC iteration.
         for timeline in gc_timelines {
             if task_mgr::is_shutdown_requested() || cancel.is_cancelled() {
                 // We were requested to shut down. Stop and return with the progress we
@@ -2901,10 +2961,11 @@ impl Tenant {
     /// - after initialization completes, tar up the temp dir and upload it to S3.
     ///
     /// The caller is responsible for activating the returned timeline.
-    async fn bootstrap_timeline(
+    pub(crate) async fn bootstrap_timeline(
         &self,
         timeline_id: TimelineId,
         pg_version: u32,
+        load_existing_initdb: Option<TimelineId>,
         ctx: &RequestContext,
     ) -> anyhow::Result<Arc<Timeline>> {
         let timeline_uninit_mark = {
@@ -2913,55 +2974,79 @@ impl Tenant {
         };
         // create a `tenant/{tenant_id}/timelines/basebackup-{timeline_id}.{TEMP_FILE_SUFFIX}/`
         // temporary directory for basebackup files for the given timeline.
-        let initdb_path = path_with_suffix_extension(
+        let pgdata_path = path_with_suffix_extension(
             self.conf
-                .timelines_path(&self.tenant_id)
+                .timelines_path(&self.tenant_shard_id)
                 .join(format!("basebackup-{timeline_id}")),
             TEMP_FILE_SUFFIX,
         );
 
         // an uninit mark was placed before, nothing else can access this timeline files
         // current initdb was not run yet, so remove whatever was left from the previous runs
-        if initdb_path.exists() {
-            fs::remove_dir_all(&initdb_path).with_context(|| {
-                format!("Failed to remove already existing initdb directory: {initdb_path}")
+        if pgdata_path.exists() {
+            fs::remove_dir_all(&pgdata_path).with_context(|| {
+                format!("Failed to remove already existing initdb directory: {pgdata_path}")
             })?;
         }
-        // Init temporarily repo to get bootstrap data, this creates a directory in the `initdb_path` path
-        run_initdb(self.conf, &initdb_path, pg_version)?;
         // this new directory is very temporary, set to remove it immediately after bootstrap, we don't need it
         scopeguard::defer! {
-            if let Err(e) = fs::remove_dir_all(&initdb_path) {
+            if let Err(e) = fs::remove_dir_all(&pgdata_path) {
                 // this is unlikely, but we will remove the directory on pageserver restart or another bootstrap call
-                error!("Failed to remove temporary initdb directory '{initdb_path}': {e}");
+                error!("Failed to remove temporary initdb directory '{pgdata_path}': {e}");
             }
         }
-        let pgdata_path = &initdb_path;
-        let pgdata_lsn = import_datadir::get_lsn_from_controlfile(pgdata_path)?.align();
-
-        // Upload the created data dir to S3
-        if let Some(storage) = &self.remote_storage {
-            let pgdata_zstd = import_datadir::create_tar_zst(pgdata_path).await?;
-            let pgdata_zstd = Bytes::from(pgdata_zstd);
-            backoff::retry(
-                || async {
-                    self::remote_timeline_client::upload_initdb_dir(
-                        storage,
-                        &self.tenant_id,
-                        &timeline_id,
-                        pgdata_zstd.clone(),
-                    )
+        if let Some(existing_initdb_timeline_id) = load_existing_initdb {
+            let Some(storage) = &self.remote_storage else {
+                bail!("no storage configured but load_existing_initdb set to {existing_initdb_timeline_id}");
+            };
+            let (initdb_tar_zst_path, initdb_tar_zst) =
+                self::remote_timeline_client::download_initdb_tar_zst(
+                    self.conf,
+                    storage,
+                    &self.tenant_shard_id,
+                    &existing_initdb_timeline_id,
+                )
+                .await
+                .context("download initdb tar")?;
+            let buf_read = Box::pin(BufReader::new(initdb_tar_zst));
+            import_datadir::extract_tar_zst(&pgdata_path, buf_read)
+                .await
+                .context("extract initdb tar")?;
+
+            if initdb_tar_zst_path.exists() {
+                tokio::fs::remove_file(&initdb_tar_zst_path)
                     .await
-                },
-                |_| false,
-                3,
-                u32::MAX,
-                "persist_initdb_tar_zst",
-                // TODO: use a cancellation token (https://github.com/neondatabase/neon/issues/5066)
-                backoff::Cancel::new(CancellationToken::new(), || unreachable!()),
-            )
-            .await?;
+                    .context("tempfile removal")?;
+            }
+        } else {
+            // Init temporarily repo to get bootstrap data, this creates a directory in the `initdb_path` path
+            run_initdb(self.conf, &pgdata_path, pg_version, &self.cancel).await?;
+
+            // Upload the created data dir to S3
+            if let Some(storage) = &self.remote_storage {
+                let pgdata_zstd = import_datadir::create_tar_zst(&pgdata_path).await?;
+                let pgdata_zstd = Bytes::from(pgdata_zstd);
+                backoff::retry(
+                    || async {
+                        self::remote_timeline_client::upload_initdb_dir(
+                            storage,
+                            &self.tenant_shard_id.tenant_id,
+                            &timeline_id,
+                            pgdata_zstd.clone(),
+                        )
+                        .await
+                    },
+                    |_| false,
+                    3,
+                    u32::MAX,
+                    "persist_initdb_tar_zst",
+                    // TODO: use a cancellation token (https://github.com/neondatabase/neon/issues/5066)
+                    backoff::Cancel::new(CancellationToken::new(), || unreachable!()),
+                )
+                .await?;
+            }
         }
+        let pgdata_lsn = import_datadir::get_lsn_from_controlfile(&pgdata_path)?.align();
 
         // Import the contents of the data directory at the initial checkpoint
         // LSN, and any WAL after that.
@@ -2986,18 +3071,18 @@ impl Tenant {
             )
             .await?;
 
-        let tenant_id = raw_timeline.owning_tenant.tenant_id;
+        let tenant_shard_id = raw_timeline.owning_tenant.tenant_shard_id;
         let unfinished_timeline = raw_timeline.raw_timeline()?;
 
         import_datadir::import_timeline_from_postgres_datadir(
             unfinished_timeline,
-            pgdata_path,
+            &pgdata_path,
             pgdata_lsn,
             ctx,
         )
         .await
         .with_context(|| {
-            format!("Failed to import pgdatadir for timeline {tenant_id}/{timeline_id}")
+            format!("Failed to import pgdatadir for timeline {tenant_shard_id}/{timeline_id}")
         })?;
 
         // Flush the new layer files to disk, before we make the timeline as available to
@@ -3015,7 +3100,7 @@ impl Tenant {
             .await
             .with_context(|| {
                 format!(
-                    "Failed to flush after pgdatadir import for timeline {tenant_id}/{timeline_id}"
+                    "Failed to flush after pgdatadir import for timeline {tenant_shard_id}/{timeline_id}"
                 )
             })?;
 
@@ -3038,7 +3123,7 @@ impl Tenant {
                 remote_storage.clone(),
                 self.deletion_queue_client.clone(),
                 self.conf,
-                self.tenant_id,
+                self.tenant_shard_id,
                 timeline_id,
                 self.generation,
             );
@@ -3067,7 +3152,7 @@ impl Tenant {
         start_lsn: Lsn,
         ancestor: Option<Arc<Timeline>>,
     ) -> anyhow::Result<UninitializedTimeline> {
-        let tenant_id = self.tenant_id;
+        let tenant_shard_id = self.tenant_shard_id;
 
         let resources = self.build_timeline_resources(new_timeline_id);
         if let Some(remote_client) = &resources.remote_client {
@@ -3091,12 +3176,14 @@ impl Tenant {
             .create_timeline_files(&uninit_mark.timeline_path, &new_timeline_id, new_metadata)
             .await
         {
-            error!("Failed to create initial files for timeline {tenant_id}/{new_timeline_id}, cleaning up: {e:?}");
+            error!("Failed to create initial files for timeline {tenant_shard_id}/{new_timeline_id}, cleaning up: {e:?}");
             cleanup_timeline_directory(uninit_mark);
             return Err(e);
         }
 
-        debug!("Successfully created initial files for timeline {tenant_id}/{new_timeline_id}");
+        debug!(
+            "Successfully created initial files for timeline {tenant_shard_id}/{new_timeline_id}"
+        );
 
         Ok(UninitializedTimeline::new(
             self,
@@ -3117,9 +3204,14 @@ impl Tenant {
             anyhow::bail!("failpoint after-timeline-uninit-mark-creation");
         });
 
-        save_metadata(self.conf, &self.tenant_id, new_timeline_id, new_metadata)
-            .await
-            .context("Failed to create timeline metadata")?;
+        save_metadata(
+            self.conf,
+            &self.tenant_shard_id,
+            new_timeline_id,
+            new_metadata,
+        )
+        .await
+        .context("Failed to create timeline metadata")?;
         Ok(())
     }
 
@@ -3132,13 +3224,13 @@ impl Tenant {
         timeline_id: TimelineId,
         timelines: &MutexGuard<HashMap<TimelineId, Arc<Timeline>>>,
     ) -> anyhow::Result<TimelineUninitMark> {
-        let tenant_id = self.tenant_id;
+        let tenant_shard_id = self.tenant_shard_id;
 
         anyhow::ensure!(
             timelines.get(&timeline_id).is_none(),
-            "Timeline {tenant_id}/{timeline_id} already exists in pageserver's memory"
+            "Timeline {tenant_shard_id}/{timeline_id} already exists in pageserver's memory"
         );
-        let timeline_path = self.conf.timeline_path(&tenant_id, &timeline_id);
+        let timeline_path = self.conf.timeline_path(&tenant_shard_id, &timeline_id);
         anyhow::ensure!(
             !timeline_path.exists(),
             "Timeline {timeline_path} already exists, cannot create its uninit mark file",
@@ -3146,7 +3238,7 @@ impl Tenant {
 
         let uninit_mark_path = self
             .conf
-            .timeline_uninit_mark_file_path(tenant_id, timeline_id);
+            .timeline_uninit_mark_file_path(tenant_shard_id, timeline_id);
         fs::OpenOptions::new()
             .write(true)
             .create_new(true)
@@ -3157,7 +3249,7 @@ impl Tenant {
                     .context("Failed to fsync uninit mark file")
             })
             .with_context(|| {
-                format!("Failed to crate uninit mark for timeline {tenant_id}/{timeline_id}")
+                format!("Failed to crate uninit mark for timeline {tenant_shard_id}/{timeline_id}")
             })?;
 
         let uninit_mark = TimelineUninitMark::new(uninit_mark_path, timeline_path);
@@ -3168,7 +3260,7 @@ impl Tenant {
     /// Gathers inputs from all of the timelines to produce a sizing model input.
     ///
     /// Future is cancellation safe. Only one calculation can be running at once per tenant.
-    #[instrument(skip_all, fields(tenant_id=%self.tenant_id))]
+    #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))]
     pub async fn gather_size_inputs(
         &self,
         // `max_retention_period` overrides the cutoff that is used to calculate the size
@@ -3207,7 +3299,7 @@ impl Tenant {
     /// Calculate synthetic tenant size and cache the result.
     /// This is periodically called by background worker.
     /// result is cached in tenant struct
-    #[instrument(skip_all, fields(tenant_id=%self.tenant_id))]
+    #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))]
     pub async fn calculate_synthetic_size(
         &self,
         cause: LogicalSizeCalculationCause,
@@ -3229,7 +3321,7 @@ impl Tenant {
             .store(size, Ordering::Relaxed);
 
         TENANT_SYNTHETIC_SIZE_METRIC
-            .get_metric_with_label_values(&[&self.tenant_id.to_string()])
+            .get_metric_with_label_values(&[&self.tenant_shard_id.tenant_id.to_string()])
             .unwrap()
             .set(size);
     }
@@ -3237,6 +3329,66 @@ impl Tenant {
     pub fn cached_synthetic_size(&self) -> u64 {
         self.cached_synthetic_tenant_size.load(Ordering::Relaxed)
     }
+
+    /// Flush any in-progress layers, schedule uploads, and wait for uploads to complete.
+    ///
+    /// This function can take a long time: callers should wrap it in a timeout if calling
+    /// from an external API handler.
+    ///
+    /// Cancel-safety: cancelling this function may leave I/O running, but such I/O is
+    /// still bounded by tenant/timeline shutdown.
+    #[tracing::instrument(skip_all)]
+    pub(crate) async fn flush_remote(&self) -> anyhow::Result<()> {
+        let timelines = self.timelines.lock().unwrap().clone();
+
+        async fn flush_timeline(_gate: GateGuard, timeline: Arc<Timeline>) -> anyhow::Result<()> {
+            tracing::info!(timeline_id=%timeline.timeline_id, "Flushing...");
+            timeline.freeze_and_flush().await?;
+            tracing::info!(timeline_id=%timeline.timeline_id, "Waiting for uploads...");
+            if let Some(client) = &timeline.remote_client {
+                client.wait_completion().await?;
+            }
+
+            Ok(())
+        }
+
+        // We do not use a JoinSet for these tasks, because we don't want them to be
+        // aborted when this function's future is cancelled: they should stay alive
+        // holding their GateGuard until they complete, to ensure their I/Os complete
+        // before Timeline shutdown completes.
+        let mut results = FuturesUnordered::new();
+
+        for (_timeline_id, timeline) in timelines {
+            // Run each timeline's flush in a task holding the timeline's gate: this
+            // means that if this function's future is cancelled, the Timeline shutdown
+            // will still wait for any I/O in here to complete.
+            let gate = match timeline.gate.enter() {
+                Ok(g) => g,
+                Err(_) => continue,
+            };
+            let jh = tokio::task::spawn(async move { flush_timeline(gate, timeline).await });
+            results.push(jh);
+        }
+
+        while let Some(r) = results.next().await {
+            if let Err(e) = r {
+                if !e.is_cancelled() && !e.is_panic() {
+                    tracing::error!("unexpected join error: {e:?}");
+                }
+            }
+        }
+
+        // The flushes we did above were just writes, but the Tenant might have had
+        // pending deletions as well from recent compaction/gc: we want to flush those
+        // as well.  This requires flushing the global delete queue.  This is cheap
+        // because it's typically a no-op.
+        match self.deletion_queue_client.flush_execute().await {
+            Ok(_) => {}
+            Err(DeletionQueueError::ShuttingDown) => {}
+        }
+
+        Ok(())
+    }
 }
 
 fn remove_timeline_and_uninit_mark(
@@ -3265,9 +3417,9 @@ fn remove_timeline_and_uninit_mark(
 pub(crate) async fn create_tenant_files(
     conf: &'static PageServerConf,
     location_conf: &LocationConf,
-    tenant_id: &TenantId,
+    tenant_shard_id: &TenantShardId,
 ) -> anyhow::Result<Utf8PathBuf> {
-    let target_tenant_directory = conf.tenant_path(tenant_id);
+    let target_tenant_directory = conf.tenant_path(tenant_shard_id);
     anyhow::ensure!(
         !target_tenant_directory
             .try_exists()
@@ -3287,14 +3439,16 @@ pub(crate) async fn create_tenant_files(
     let creation_result = try_create_target_tenant_dir(
         conf,
         location_conf,
-        tenant_id,
+        tenant_shard_id,
         &temporary_tenant_dir,
         &target_tenant_directory,
     )
     .await;
 
     if creation_result.is_err() {
-        error!("Failed to create directory structure for tenant {tenant_id}, cleaning tmp data");
+        error!(
+            "Failed to create directory structure for tenant {tenant_shard_id}, cleaning tmp data"
+        );
         if let Err(e) = fs::remove_dir_all(&temporary_tenant_dir) {
             error!("Failed to remove temporary tenant directory {temporary_tenant_dir:?}: {e}")
         } else if let Err(e) = crashsafe::fsync(&temporary_tenant_dir) {
@@ -3312,31 +3466,31 @@ pub(crate) async fn create_tenant_files(
 async fn try_create_target_tenant_dir(
     conf: &'static PageServerConf,
     location_conf: &LocationConf,
-    tenant_id: &TenantId,
+    tenant_shard_id: &TenantShardId,
     temporary_tenant_dir: &Utf8Path,
     target_tenant_directory: &Utf8Path,
 ) -> Result<(), anyhow::Error> {
     let temporary_tenant_timelines_dir = rebase_directory(
-        &conf.timelines_path(tenant_id),
+        &conf.timelines_path(tenant_shard_id),
         target_tenant_directory,
         temporary_tenant_dir,
     )
-    .with_context(|| format!("resolve tenant {tenant_id} temporary timelines dir"))?;
+    .with_context(|| format!("resolve tenant {tenant_shard_id} temporary timelines dir"))?;
     let temporary_legacy_tenant_config_path = rebase_directory(
-        &conf.tenant_config_path(tenant_id),
+        &conf.tenant_config_path(tenant_shard_id),
         target_tenant_directory,
         temporary_tenant_dir,
     )
-    .with_context(|| format!("resolve tenant {tenant_id} temporary config path"))?;
+    .with_context(|| format!("resolve tenant {tenant_shard_id} temporary config path"))?;
     let temporary_tenant_config_path = rebase_directory(
-        &conf.tenant_location_config_path(tenant_id),
+        &conf.tenant_location_config_path(tenant_shard_id),
         target_tenant_directory,
         temporary_tenant_dir,
     )
-    .with_context(|| format!("resolve tenant {tenant_id} temporary config path"))?;
+    .with_context(|| format!("resolve tenant {tenant_shard_id} temporary config path"))?;
 
     Tenant::persist_tenant_config_at(
-        tenant_id,
+        tenant_shard_id,
         &temporary_tenant_config_path,
         &temporary_legacy_tenant_config_path,
         location_conf,
@@ -3346,7 +3500,7 @@ async fn try_create_target_tenant_dir(
     crashsafe::create_dir(&temporary_tenant_timelines_dir).with_context(|| {
         format!(
             "create tenant {} temporary timelines directory {}",
-            tenant_id, temporary_tenant_timelines_dir,
+            tenant_shard_id, temporary_tenant_timelines_dir,
         )
     })?;
     fail::fail_point!("tenant-creation-before-tmp-rename", |_| {
@@ -3361,19 +3515,19 @@ async fn try_create_target_tenant_dir(
     fs::rename(temporary_tenant_dir, target_tenant_directory).with_context(|| {
         format!(
             "move tenant {} temporary directory {} into the permanent one {}",
-            tenant_id, temporary_tenant_dir, target_tenant_directory
+            tenant_shard_id, temporary_tenant_dir, target_tenant_directory
         )
     })?;
     let target_dir_parent = target_tenant_directory.parent().with_context(|| {
         format!(
             "get tenant {} dir parent for {}",
-            tenant_id, target_tenant_directory,
+            tenant_shard_id, target_tenant_directory,
         )
     })?;
     crashsafe::fsync(target_dir_parent).with_context(|| {
         format!(
             "fsync renamed directory's parent {} for tenant {}",
-            target_dir_parent, tenant_id,
+            target_dir_parent, tenant_shard_id,
         )
     })?;
 
@@ -3396,42 +3550,54 @@ fn rebase_directory(
 
 /// Create the cluster temporarily in 'initdbpath' directory inside the repository
 /// to get bootstrap data for timeline initialization.
-fn run_initdb(
+async fn run_initdb(
     conf: &'static PageServerConf,
     initdb_target_dir: &Utf8Path,
     pg_version: u32,
-) -> anyhow::Result<()> {
-    let initdb_bin_path = conf.pg_bin_dir(pg_version)?.join("initdb");
-    let initdb_lib_dir = conf.pg_lib_dir(pg_version)?;
+    cancel: &CancellationToken,
+) -> Result<(), InitdbError> {
+    let initdb_bin_path = conf
+        .pg_bin_dir(pg_version)
+        .map_err(InitdbError::Other)?
+        .join("initdb");
+    let initdb_lib_dir = conf.pg_lib_dir(pg_version).map_err(InitdbError::Other)?;
     info!(
         "running {} in {}, libdir: {}",
         initdb_bin_path, initdb_target_dir, initdb_lib_dir,
     );
 
-    let initdb_output = Command::new(&initdb_bin_path)
+    let _permit = INIT_DB_SEMAPHORE.acquire().await;
+
+    let initdb_command = tokio::process::Command::new(&initdb_bin_path)
         .args(["-D", initdb_target_dir.as_ref()])
         .args(["-U", &conf.superuser])
         .args(["-E", "utf8"])
         .arg("--no-instructions")
-        // This is only used for a temporary installation that is deleted shortly after,
-        // so no need to fsync it
         .arg("--no-sync")
         .env_clear()
         .env("LD_LIBRARY_PATH", &initdb_lib_dir)
         .env("DYLD_LIBRARY_PATH", &initdb_lib_dir)
-        .stdout(Stdio::null())
-        .output()
-        .with_context(|| {
-            format!(
-                "failed to execute {} at target dir {}",
-                initdb_bin_path, initdb_target_dir,
-            )
-        })?;
-    if !initdb_output.status.success() {
-        bail!(
-            "initdb failed: '{}'",
-            String::from_utf8_lossy(&initdb_output.stderr)
-        );
+        .stdout(Stdio::piped())
+        .stderr(Stdio::piped())
+        // If the `select!` below doesn't finish the `wait_with_output`,
+        // let the task get `wait()`ed for asynchronously by tokio.
+        // This means there is a slim chance we can go over the INIT_DB_SEMAPHORE.
+        // TODO: fix for this is non-trivial, see
+        // https://github.com/neondatabase/neon/pull/5921#pullrequestreview-1750858021
+        //
+        .kill_on_drop(true)
+        .spawn()?;
+
+    tokio::select! {
+        initdb_output = initdb_command.wait_with_output() => {
+            let initdb_output = initdb_output?;
+            if !initdb_output.status.success() {
+                return Err(InitdbError::Failed(initdb_output.status, initdb_output.stderr));
+            }
+        }
+        _ = cancel.cancelled() => {
+            return Err(InitdbError::Cancelled);
+        }
     }
 
     Ok(())
@@ -3439,7 +3605,7 @@ fn run_initdb(
 
 impl Drop for Tenant {
     fn drop(&mut self) {
-        remove_tenant_metrics(&self.tenant_id);
+        remove_tenant_metrics(&self.tenant_shard_id.tenant_id);
     }
 }
 /// Dump contents of a layer file to stdout.
@@ -3477,6 +3643,7 @@ pub async fn dump_layerfile_from_path(
 pub(crate) mod harness {
     use bytes::{Bytes, BytesMut};
     use once_cell::sync::OnceCell;
+    use pageserver_api::shard::ShardIndex;
     use std::fs;
     use std::sync::Arc;
     use utils::logging;
@@ -3541,8 +3708,11 @@ pub(crate) mod harness {
     pub struct TenantHarness {
         pub conf: &'static PageServerConf,
         pub tenant_conf: TenantConf,
-        pub tenant_id: TenantId,
+        // TODO(sharding): remove duplicative `tenant_id` in favor of access to tenant_shard_id
+        pub(crate) tenant_id: TenantId,
+        pub tenant_shard_id: TenantShardId,
         pub generation: Generation,
+        pub shard: ShardIndex,
         pub remote_storage: GenericRemoteStorage,
         pub remote_fs_dir: Utf8PathBuf,
         pub deletion_queue: MockDeletionQueue,
@@ -3585,8 +3755,9 @@ pub(crate) mod harness {
             };
 
             let tenant_id = TenantId::generate();
-            fs::create_dir_all(conf.tenant_path(&tenant_id))?;
-            fs::create_dir_all(conf.timelines_path(&tenant_id))?;
+            let tenant_shard_id = TenantShardId::unsharded(tenant_id);
+            fs::create_dir_all(conf.tenant_path(&tenant_shard_id))?;
+            fs::create_dir_all(conf.timelines_path(&tenant_shard_id))?;
 
             use remote_storage::{RemoteStorageConfig, RemoteStorageKind};
             let remote_fs_dir = conf.workdir.join("localfs");
@@ -3601,7 +3772,9 @@ pub(crate) mod harness {
                 conf,
                 tenant_conf,
                 tenant_id,
+                tenant_shard_id,
                 generation: Generation::new(0xdeadbeef),
+                shard: ShardIndex::unsharded(),
                 remote_storage,
                 remote_fs_dir,
                 deletion_queue,
@@ -3619,7 +3792,7 @@ pub(crate) mod harness {
         }
 
         fn remote_empty(&self) -> bool {
-            let tenant_path = self.conf.tenant_path(&self.tenant_id);
+            let tenant_path = self.conf.tenant_path(&self.tenant_shard_id);
             let remote_tenant_dir = self
                 .remote_fs_dir
                 .join(tenant_path.strip_prefix(&self.conf.workdir).unwrap());
@@ -3659,7 +3832,7 @@ pub(crate) mod harness {
                 ))
                 .unwrap(),
                 walredo_mgr,
-                self.tenant_id,
+                self.tenant_shard_id,
                 Some(self.remote_storage.clone()),
                 self.deletion_queue.new_client(),
             ));
@@ -3668,17 +3841,17 @@ pub(crate) mod harness {
                 LoadMode::Local => {
                     tenant
                         .load_local(None, ctx)
-                        .instrument(info_span!("try_load", tenant_id=%self.tenant_id))
+                        .instrument(info_span!("try_load", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))
                         .await?;
                 }
                 LoadMode::Remote => {
                     let preload = tenant
                         .preload(&self.remote_storage, CancellationToken::new())
-                        .instrument(info_span!("try_load_preload", tenant_id=%self.tenant_id))
+                        .instrument(info_span!("try_load_preload", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))
                         .await?;
                     tenant
                         .attach(None, Some(preload), ctx)
-                        .instrument(info_span!("try_load", tenant_id=%self.tenant_id))
+                        .instrument(info_span!("try_load", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))
                         .await?;
                 }
             }
@@ -3712,7 +3885,7 @@ pub(crate) mod harness {
         }
 
         pub fn timeline_path(&self, timeline_id: &TimelineId) -> Utf8PathBuf {
-            self.conf.timeline_path(&self.tenant_id, timeline_id)
+            self.conf.timeline_path(&self.tenant_shard_id, timeline_id)
         }
     }
 
@@ -3828,7 +4001,7 @@ mod tests {
                 e.to_string(),
                 format!(
                     "Timeline {}/{} already exists in pageserver's memory",
-                    tenant.tenant_id, TIMELINE_ID
+                    tenant.tenant_shard_id, TIMELINE_ID
                 )
             ),
         }
@@ -4212,7 +4385,7 @@ mod tests {
             // so that all uploads finish & we can call harness.load() below again
             tenant
                 .shutdown(Default::default(), true)
-                .instrument(info_span!("test_shutdown", tenant_id=%tenant.tenant_id))
+                .instrument(info_span!("test_shutdown", tenant_id=%tenant.tenant_shard_id))
                 .await
                 .ok()
                 .unwrap();
@@ -4253,7 +4426,7 @@ mod tests {
             // so that all uploads finish & we can call harness.load() below again
             tenant
                 .shutdown(Default::default(), true)
-                .instrument(info_span!("test_shutdown", tenant_id=%tenant.tenant_id))
+                .instrument(info_span!("test_shutdown", tenant_id=%tenant.tenant_shard_id))
                 .await
                 .ok()
                 .unwrap();
@@ -4315,7 +4488,7 @@ mod tests {
         // so that all uploads finish & we can call harness.try_load() below again
         tenant
             .shutdown(Default::default(), true)
-            .instrument(info_span!("test_shutdown", tenant_id=%tenant.tenant_id))
+            .instrument(info_span!("test_shutdown", tenant_id=%tenant.tenant_shard_id))
             .await
             .ok()
             .unwrap();
@@ -4848,7 +5021,7 @@ mod tests {
             let raw_tline = tline.raw_timeline().unwrap();
             raw_tline
                 .shutdown()
-                .instrument(info_span!("test_shutdown", tenant_id=%raw_tline.tenant_id))
+                .instrument(info_span!("test_shutdown", tenant_id=%raw_tline.tenant_shard_id))
                 .await;
             std::mem::forget(tline);
         }
@@ -4860,7 +5033,7 @@ mod tests {
                 assert_eq!(
                     e,
                     GetTimelineError::NotFound {
-                        tenant_id: tenant.tenant_id,
+                        tenant_id: tenant.tenant_shard_id.tenant_id,
                         timeline_id: TIMELINE_ID,
                     }
                 )
@@ -4869,12 +5042,12 @@ mod tests {
 
         assert!(!harness
             .conf
-            .timeline_path(&tenant.tenant_id, &TIMELINE_ID)
+            .timeline_path(&tenant.tenant_shard_id, &TIMELINE_ID)
             .exists());
 
         assert!(!harness
             .conf
-            .timeline_uninit_mark_file_path(tenant.tenant_id, TIMELINE_ID)
+            .timeline_uninit_mark_file_path(tenant.tenant_shard_id, TIMELINE_ID)
             .exists());
 
         Ok(())
diff --git a/pageserver/src/tenant/config.rs b/pageserver/src/tenant/config.rs
index 5f8c7f6c591c..7a454b53d247 100644
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -8,9 +8,12 @@
 //! We cannot use global or default config instead, because wrong settings
 //! may lead to a data loss.
 //!
-use anyhow::Context;
+use anyhow::bail;
 use pageserver_api::models;
+use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize};
+use serde::de::IntoDeserializer;
 use serde::{Deserialize, Serialize};
+use serde_json::Value;
 use std::num::NonZeroU64;
 use std::time::Duration;
 use utils::generation::Generation;
@@ -88,6 +91,14 @@ pub(crate) struct LocationConf {
     /// The location-specific part of the configuration, describes the operating
     /// mode of this pageserver for this tenant.
     pub(crate) mode: LocationMode,
+
+    /// The detailed shard identity.  This structure is already scoped within
+    /// a TenantShardId, but we need the full ShardIdentity to enable calculating
+    /// key->shard mappings.
+    #[serde(default = "ShardIdentity::unsharded")]
+    #[serde(skip_serializing_if = "ShardIdentity::is_unsharded")]
+    pub(crate) shard: ShardIdentity,
+
     /// The pan-cluster tenant configuration, the same on all locations
     pub(crate) tenant_conf: TenantConfOpt,
 }
@@ -160,6 +171,8 @@ impl LocationConf {
                 generation,
                 attach_mode: AttachmentMode::Single,
             }),
+            // Legacy configuration loads are always from tenants created before sharding existed.
+            shard: ShardIdentity::unsharded(),
             tenant_conf,
         }
     }
@@ -187,6 +200,7 @@ impl LocationConf {
 
         fn get_generation(conf: &'_ models::LocationConfig) -> Result<Generation, anyhow::Error> {
             conf.generation
+                .map(Generation::new)
                 .ok_or_else(|| anyhow::anyhow!("Generation must be set when attaching"))
         }
 
@@ -226,7 +240,21 @@ impl LocationConf {
             }
         };
 
-        Ok(Self { mode, tenant_conf })
+        let shard = if conf.shard_count == 0 {
+            ShardIdentity::unsharded()
+        } else {
+            ShardIdentity::new(
+                ShardNumber(conf.shard_number),
+                ShardCount(conf.shard_count),
+                ShardStripeSize(conf.shard_stripe_size),
+            )?
+        };
+
+        Ok(Self {
+            shard,
+            mode,
+            tenant_conf,
+        })
     }
 }
 
@@ -241,6 +269,7 @@ impl Default for LocationConf {
                 attach_mode: AttachmentMode::Single,
             }),
             tenant_conf: TenantConfOpt::default(),
+            shard: ShardIdentity::unsharded(),
         }
     }
 }
@@ -494,105 +523,49 @@ impl Default for TenantConf {
     }
 }
 
-// Helper function to standardize the error messages we produce on bad durations
-//
-// Intended to be used with anyhow's `with_context`, e.g.:
-//
-//   let value = result.with_context(bad_duration("name", &value))?;
-//
-fn bad_duration<'a>(field_name: &'static str, value: &'a str) -> impl 'a + Fn() -> String {
-    move || format!("Cannot parse `{field_name}` duration {value:?}")
-}
-
 impl TryFrom<&'_ models::TenantConfig> for TenantConfOpt {
     type Error = anyhow::Error;
 
     fn try_from(request_data: &'_ models::TenantConfig) -> Result<Self, Self::Error> {
-        let mut tenant_conf = TenantConfOpt::default();
-
-        if let Some(gc_period) = &request_data.gc_period {
-            tenant_conf.gc_period = Some(
-                humantime::parse_duration(gc_period)
-                    .with_context(bad_duration("gc_period", gc_period))?,
-            );
-        }
-        tenant_conf.gc_horizon = request_data.gc_horizon;
-        tenant_conf.image_creation_threshold = request_data.image_creation_threshold;
-
-        if let Some(pitr_interval) = &request_data.pitr_interval {
-            tenant_conf.pitr_interval = Some(
-                humantime::parse_duration(pitr_interval)
-                    .with_context(bad_duration("pitr_interval", pitr_interval))?,
-            );
-        }
+        // Convert the request_data to a JSON Value
+        let json_value: Value = serde_json::to_value(request_data)?;
 
-        if let Some(walreceiver_connect_timeout) = &request_data.walreceiver_connect_timeout {
-            tenant_conf.walreceiver_connect_timeout = Some(
-                humantime::parse_duration(walreceiver_connect_timeout).with_context(
-                    bad_duration("walreceiver_connect_timeout", walreceiver_connect_timeout),
-                )?,
-            );
-        }
-        if let Some(lagging_wal_timeout) = &request_data.lagging_wal_timeout {
-            tenant_conf.lagging_wal_timeout = Some(
-                humantime::parse_duration(lagging_wal_timeout)
-                    .with_context(bad_duration("lagging_wal_timeout", lagging_wal_timeout))?,
-            );
-        }
-        if let Some(max_lsn_wal_lag) = request_data.max_lsn_wal_lag {
-            tenant_conf.max_lsn_wal_lag = Some(max_lsn_wal_lag);
-        }
-        if let Some(trace_read_requests) = request_data.trace_read_requests {
-            tenant_conf.trace_read_requests = Some(trace_read_requests);
-        }
-
-        tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
-        if let Some(checkpoint_timeout) = &request_data.checkpoint_timeout {
-            tenant_conf.checkpoint_timeout = Some(
-                humantime::parse_duration(checkpoint_timeout)
-                    .with_context(bad_duration("checkpoint_timeout", checkpoint_timeout))?,
-            );
-        }
+        // Create a Deserializer from the JSON Value
+        let deserializer = json_value.into_deserializer();
 
-        tenant_conf.compaction_target_size = request_data.compaction_target_size;
-        tenant_conf.compaction_threshold = request_data.compaction_threshold;
+        // Use serde_path_to_error to deserialize the JSON Value into TenantConfOpt
+        let tenant_conf: TenantConfOpt = serde_path_to_error::deserialize(deserializer)?;
 
-        if let Some(compaction_period) = &request_data.compaction_period {
-            tenant_conf.compaction_period = Some(
-                humantime::parse_duration(compaction_period)
-                    .with_context(bad_duration("compaction_period", compaction_period))?,
-            );
-        }
+        Ok(tenant_conf)
+    }
+}
 
-        if let Some(eviction_policy) = &request_data.eviction_policy {
-            tenant_conf.eviction_policy = Some(
-                serde::Deserialize::deserialize(eviction_policy)
-                    .context("parse field `eviction_policy`")?,
-            );
-        }
+impl TryFrom<toml_edit::Item> for TenantConfOpt {
+    type Error = anyhow::Error;
 
-        tenant_conf.min_resident_size_override = request_data.min_resident_size_override;
-
-        if let Some(evictions_low_residence_duration_metric_threshold) =
-            &request_data.evictions_low_residence_duration_metric_threshold
-        {
-            tenant_conf.evictions_low_residence_duration_metric_threshold = Some(
-                humantime::parse_duration(evictions_low_residence_duration_metric_threshold)
-                    .with_context(bad_duration(
-                        "evictions_low_residence_duration_metric_threshold",
-                        evictions_low_residence_duration_metric_threshold,
-                    ))?,
-            );
+    fn try_from(item: toml_edit::Item) -> Result<Self, Self::Error> {
+        match item {
+            toml_edit::Item::Value(value) => {
+                let d = value.into_deserializer();
+                return serde_path_to_error::deserialize(d)
+                    .map_err(|e| anyhow::anyhow!("{}: {}", e.path(), e.inner().message()));
+            }
+            toml_edit::Item::Table(table) => {
+                let deserializer = toml_edit::de::Deserializer::new(table.into());
+                return serde_path_to_error::deserialize(deserializer)
+                    .map_err(|e| anyhow::anyhow!("{}: {}", e.path(), e.inner().message()));
+            }
+            _ => {
+                bail!("expected non-inline table but found {item}")
+            }
         }
-        tenant_conf.gc_feedback = request_data.gc_feedback;
-
-        Ok(tenant_conf)
     }
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
+    use models::TenantConfig;
 
     #[test]
     fn de_serializing_pageserver_config_omits_empty_values() {
@@ -609,4 +582,38 @@ mod tests {
         assert_eq!(json_form, "{\"gc_horizon\":42}");
         assert_eq!(small_conf, serde_json::from_str(&json_form).unwrap());
     }
+
+    #[test]
+    fn test_try_from_models_tenant_config_err() {
+        let tenant_config = models::TenantConfig {
+            lagging_wal_timeout: Some("5a".to_string()),
+            ..TenantConfig::default()
+        };
+
+        let tenant_conf_opt = TenantConfOpt::try_from(&tenant_config);
+
+        assert!(
+            tenant_conf_opt.is_err(),
+            "Suceeded to convert TenantConfig to TenantConfOpt"
+        );
+
+        let expected_error_str =
+            "lagging_wal_timeout: invalid value: string \"5a\", expected a duration";
+        assert_eq!(tenant_conf_opt.unwrap_err().to_string(), expected_error_str);
+    }
+
+    #[test]
+    fn test_try_from_models_tenant_config_success() {
+        let tenant_config = models::TenantConfig {
+            lagging_wal_timeout: Some("5s".to_string()),
+            ..TenantConfig::default()
+        };
+
+        let tenant_conf_opt = TenantConfOpt::try_from(&tenant_config).unwrap();
+
+        assert_eq!(
+            tenant_conf_opt.lagging_wal_timeout,
+            Some(Duration::from_secs(5))
+        );
+    }
 }
diff --git a/pageserver/src/tenant/delete.rs b/pageserver/src/tenant/delete.rs
index 066f239ff0b2..b7b2ef9c79cb 100644
--- a/pageserver/src/tenant/delete.rs
+++ b/pageserver/src/tenant/delete.rs
@@ -2,21 +2,19 @@ use std::sync::Arc;
 
 use anyhow::Context;
 use camino::{Utf8Path, Utf8PathBuf};
-use pageserver_api::models::TenantState;
+use pageserver_api::{models::TenantState, shard::TenantShardId};
 use remote_storage::{GenericRemoteStorage, RemotePath};
 use tokio::sync::OwnedMutexGuard;
 use tokio_util::sync::CancellationToken;
-use tracing::{error, instrument, warn, Instrument, Span};
+use tracing::{error, instrument, Instrument, Span};
 
-use utils::{
-    backoff, completion, crashsafe, fs_ext,
-    id::{TenantId, TimelineId},
-};
+use utils::{backoff, completion, crashsafe, fs_ext, id::TimelineId};
 
 use crate::{
     config::PageServerConf,
     context::RequestContext,
     task_mgr::{self, TaskKind},
+    tenant::mgr::{TenantSlot, TenantsMapRemoveResult},
     InitializationOrder,
 };
 
@@ -59,10 +57,10 @@ type DeletionGuard = tokio::sync::OwnedMutexGuard<DeleteTenantFlow>;
 
 fn remote_tenant_delete_mark_path(
     conf: &PageServerConf,
-    tenant_id: &TenantId,
+    tenant_shard_id: &TenantShardId,
 ) -> anyhow::Result<RemotePath> {
     let tenant_remote_path = conf
-        .tenant_path(tenant_id)
+        .tenant_path(tenant_shard_id)
         .strip_prefix(&conf.workdir)
         .context("Failed to strip workdir prefix")
         .and_then(RemotePath::new)
@@ -73,9 +71,9 @@ fn remote_tenant_delete_mark_path(
 async fn create_remote_delete_mark(
     conf: &PageServerConf,
     remote_storage: &GenericRemoteStorage,
-    tenant_id: &TenantId,
+    tenant_shard_id: &TenantShardId,
 ) -> Result<(), DeleteTenantError> {
-    let remote_mark_path = remote_tenant_delete_mark_path(conf, tenant_id)?;
+    let remote_mark_path = remote_tenant_delete_mark_path(conf, tenant_shard_id)?;
 
     let data: &[u8] = &[];
     backoff::retry(
@@ -99,9 +97,9 @@ async fn create_remote_delete_mark(
 
 async fn create_local_delete_mark(
     conf: &PageServerConf,
-    tenant_id: &TenantId,
+    tenant_shard_id: &TenantShardId,
 ) -> Result<(), DeleteTenantError> {
-    let marker_path = conf.tenant_deleted_mark_file_path(tenant_id);
+    let marker_path = conf.tenant_deleted_mark_file_path(tenant_shard_id);
 
     // Note: we're ok to replace existing file.
     let _ = std::fs::OpenOptions::new()
@@ -170,10 +168,10 @@ async fn ensure_timelines_dir_empty(timelines_path: &Utf8Path) -> Result<(), Del
 async fn remove_tenant_remote_delete_mark(
     conf: &PageServerConf,
     remote_storage: Option<&GenericRemoteStorage>,
-    tenant_id: &TenantId,
+    tenant_shard_id: &TenantShardId,
 ) -> Result<(), DeleteTenantError> {
     if let Some(remote_storage) = remote_storage {
-        let path = remote_tenant_delete_mark_path(conf, tenant_id)?;
+        let path = remote_tenant_delete_mark_path(conf, tenant_shard_id)?;
         backoff::retry(
             || async { remote_storage.delete(&path).await },
             |_e| false,
@@ -192,7 +190,7 @@ async fn remove_tenant_remote_delete_mark(
 // Cleanup fs traces: tenant config, timelines dir local delete mark, tenant dir
 async fn cleanup_remaining_fs_traces(
     conf: &PageServerConf,
-    tenant_id: &TenantId,
+    tenant_shard_id: &TenantShardId,
 ) -> Result<(), DeleteTenantError> {
     let rm = |p: Utf8PathBuf, is_dir: bool| async move {
         if is_dir {
@@ -204,8 +202,8 @@ async fn cleanup_remaining_fs_traces(
         .with_context(|| format!("failed to delete {p}"))
     };
 
-    rm(conf.tenant_config_path(tenant_id), false).await?;
-    rm(conf.tenant_location_config_path(tenant_id), false).await?;
+    rm(conf.tenant_config_path(tenant_shard_id), false).await?;
+    rm(conf.tenant_location_config_path(tenant_shard_id), false).await?;
 
     fail::fail_point!("tenant-delete-before-remove-timelines-dir", |_| {
         Err(anyhow::anyhow!(
@@ -213,7 +211,7 @@ async fn cleanup_remaining_fs_traces(
         ))?
     });
 
-    rm(conf.timelines_path(tenant_id), true).await?;
+    rm(conf.timelines_path(tenant_shard_id), true).await?;
 
     fail::fail_point!("tenant-delete-before-remove-deleted-mark", |_| {
         Err(anyhow::anyhow!(
@@ -227,14 +225,14 @@ async fn cleanup_remaining_fs_traces(
     // to be reordered later and thus missed if a crash occurs.
     // Note that we dont need to sync after mark file is removed
     // because we can tolerate the case when mark file reappears on startup.
-    let tenant_path = &conf.tenant_path(tenant_id);
+    let tenant_path = &conf.tenant_path(tenant_shard_id);
     if tenant_path.exists() {
-        crashsafe::fsync_async(&conf.tenant_path(tenant_id))
+        crashsafe::fsync_async(&conf.tenant_path(tenant_shard_id))
             .await
             .context("fsync_pre_mark_remove")?;
     }
 
-    rm(conf.tenant_deleted_mark_file_path(tenant_id), false).await?;
+    rm(conf.tenant_deleted_mark_file_path(tenant_shard_id), false).await?;
 
     fail::fail_point!("tenant-delete-before-remove-tenant-dir", |_| {
         Err(anyhow::anyhow!(
@@ -242,7 +240,7 @@ async fn cleanup_remaining_fs_traces(
         ))?
     });
 
-    rm(conf.tenant_path(tenant_id), true).await?;
+    rm(conf.tenant_path(tenant_shard_id), true).await?;
 
     Ok(())
 }
@@ -287,6 +285,8 @@ impl DeleteTenantFlow {
     ) -> Result<(), DeleteTenantError> {
         span::debug_assert_current_span_has_tenant_id();
 
+        pausable_failpoint!("tenant-delete-before-run");
+
         let mut guard = Self::prepare(&tenant).await?;
 
         if let Err(e) = Self::run_inner(&mut guard, conf, remote_storage.as_ref(), &tenant).await {
@@ -321,7 +321,7 @@ impl DeleteTenantFlow {
         // Though sounds scary, different mark name?
         // Detach currently uses remove_dir_all so in case of a crash we can end up in a weird state.
         if let Some(remote_storage) = &remote_storage {
-            create_remote_delete_mark(conf, remote_storage, &tenant.tenant_id)
+            create_remote_delete_mark(conf, remote_storage, &tenant.tenant_shard_id)
                 .await
                 .context("remote_mark")?
         }
@@ -332,7 +332,7 @@ impl DeleteTenantFlow {
             ))?
         });
 
-        create_local_delete_mark(conf, &tenant.tenant_id)
+        create_local_delete_mark(conf, &tenant.tenant_shard_id)
             .await
             .context("local delete mark")?;
 
@@ -374,9 +374,11 @@ impl DeleteTenantFlow {
             return Ok(acquire(tenant));
         }
 
-        let tenant_id = tenant.tenant_id;
         // Check local mark first, if its there there is no need to go to s3 to check whether remote one exists.
-        if conf.tenant_deleted_mark_file_path(&tenant_id).exists() {
+        if conf
+            .tenant_deleted_mark_file_path(&tenant.tenant_shard_id)
+            .exists()
+        {
             Ok(acquire(tenant))
         } else {
             Ok(None)
@@ -459,12 +461,12 @@ impl DeleteTenantFlow {
         tenants: &'static std::sync::RwLock<TenantsMap>,
         tenant: Arc<Tenant>,
     ) {
-        let tenant_id = tenant.tenant_id;
+        let tenant_shard_id = tenant.tenant_shard_id;
 
         task_mgr::spawn(
             task_mgr::BACKGROUND_RUNTIME.handle(),
             TaskKind::TimelineDeletionWorker,
-            Some(tenant_id),
+            Some(tenant_shard_id.tenant_id),
             None,
             "tenant_delete",
             false,
@@ -478,7 +480,7 @@ impl DeleteTenantFlow {
                 Ok(())
             }
             .instrument({
-                let span = tracing::info_span!(parent: None, "delete_tenant", tenant_id=%tenant_id);
+                let span = tracing::info_span!(parent: None, "delete_tenant", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug());
                 span.follows_from(Span::current());
                 span
             }),
@@ -516,7 +518,7 @@ impl DeleteTenantFlow {
             }
         }
 
-        let timelines_path = conf.timelines_path(&tenant.tenant_id);
+        let timelines_path = conf.timelines_path(&tenant.tenant_shard_id);
         // May not exist if we fail in cleanup_remaining_fs_traces after removing it
         if timelines_path.exists() {
             // sanity check to guard against layout changes
@@ -525,7 +527,8 @@ impl DeleteTenantFlow {
                 .context("timelines dir not empty")?;
         }
 
-        remove_tenant_remote_delete_mark(conf, remote_storage.as_ref(), &tenant.tenant_id).await?;
+        remove_tenant_remote_delete_mark(conf, remote_storage.as_ref(), &tenant.tenant_shard_id)
+            .await?;
 
         fail::fail_point!("tenant-delete-before-cleanup-remaining-fs-traces", |_| {
             Err(anyhow::anyhow!(
@@ -533,21 +536,73 @@ impl DeleteTenantFlow {
             ))?
         });
 
-        cleanup_remaining_fs_traces(conf, &tenant.tenant_id)
+        cleanup_remaining_fs_traces(conf, &tenant.tenant_shard_id)
             .await
             .context("cleanup_remaining_fs_traces")?;
 
         {
-            let mut locked = tenants.write().unwrap();
-            if locked.remove(&tenant.tenant_id).is_none() {
-                warn!("Tenant got removed from tenants map during deletion");
-            };
-
-            // FIXME: we should not be modifying this from outside of mgr.rs.
-            // This will go away when we simplify deletion (https://github.com/neondatabase/neon/issues/5080)
-            crate::metrics::TENANT_MANAGER
-                .tenant_slots
-                .set(locked.len() as u64);
+            pausable_failpoint!("tenant-delete-before-map-remove");
+
+            // This block is simply removing the TenantSlot for this tenant.  It requires a loop because
+            // we might conflict with a TenantSlot::InProgress marker and need to wait for it.
+            //
+            // This complexity will go away when we simplify how deletion works:
+            // https://github.com/neondatabase/neon/issues/5080
+            loop {
+                // Under the TenantMap lock, try to remove the tenant.  We usually succeed, but if
+                // we encounter an InProgress marker, yield the barrier it contains and wait on it.
+                let barrier = {
+                    let mut locked = tenants.write().unwrap();
+                    let removed = locked.remove(&tenant.tenant_shard_id.tenant_id);
+
+                    // FIXME: we should not be modifying this from outside of mgr.rs.
+                    // This will go away when we simplify deletion (https://github.com/neondatabase/neon/issues/5080)
+                    crate::metrics::TENANT_MANAGER
+                        .tenant_slots
+                        .set(locked.len() as u64);
+
+                    match removed {
+                        TenantsMapRemoveResult::Occupied(TenantSlot::Attached(tenant)) => {
+                            match tenant.current_state() {
+                                TenantState::Stopping { .. } | TenantState::Broken { .. } => {
+                                    // Expected: we put the tenant into stopping state before we start deleting it
+                                }
+                                state => {
+                                    // Unexpected state
+                                    tracing::warn!(
+                                        "Tenant in unexpected state {state} after deletion"
+                                    );
+                                }
+                            }
+                            break;
+                        }
+                        TenantsMapRemoveResult::Occupied(TenantSlot::Secondary) => {
+                            // This is unexpected: this secondary tenants should not have been created, and we
+                            // are not in a position to shut it down from here.
+                            tracing::warn!("Tenant transitioned to secondary mode while deleting!");
+                            break;
+                        }
+                        TenantsMapRemoveResult::Occupied(TenantSlot::InProgress(_)) => {
+                            unreachable!("TenantsMap::remove handles InProgress separately, should never return it here");
+                        }
+                        TenantsMapRemoveResult::Vacant => {
+                            tracing::warn!(
+                                "Tenant removed from TenantsMap before deletion completed"
+                            );
+                            break;
+                        }
+                        TenantsMapRemoveResult::InProgress(barrier) => {
+                            // An InProgress entry was found, we must wait on its barrier
+                            barrier
+                        }
+                    }
+                };
+
+                tracing::info!(
+                    "Waiting for competing operation to complete before deleting state for tenant"
+                );
+                barrier.wait().await;
+            }
         }
 
         *guard = Self::Finished;
diff --git a/pageserver/src/tenant/ephemeral_file.rs b/pageserver/src/tenant/ephemeral_file.rs
index 9a06d9df611d..591eacd1046e 100644
--- a/pageserver/src/tenant/ephemeral_file.rs
+++ b/pageserver/src/tenant/ephemeral_file.rs
@@ -7,18 +7,19 @@ use crate::page_cache::{self, PAGE_SZ};
 use crate::tenant::block_io::{BlockCursor, BlockLease, BlockReader};
 use crate::virtual_file::VirtualFile;
 use camino::Utf8PathBuf;
+use pageserver_api::shard::TenantShardId;
 use std::cmp::min;
 use std::fs::OpenOptions;
 use std::io::{self, ErrorKind};
 use std::ops::DerefMut;
 use std::sync::atomic::AtomicU64;
 use tracing::*;
-use utils::id::{TenantId, TimelineId};
+use utils::id::TimelineId;
 
 pub struct EphemeralFile {
     page_cache_file_id: page_cache::FileId,
 
-    _tenant_id: TenantId,
+    _tenant_shard_id: TenantShardId,
     _timeline_id: TimelineId,
     file: VirtualFile,
     len: u64,
@@ -31,7 +32,7 @@ pub struct EphemeralFile {
 impl EphemeralFile {
     pub async fn create(
         conf: &PageServerConf,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         timeline_id: TimelineId,
     ) -> Result<EphemeralFile, io::Error> {
         static NEXT_FILENAME: AtomicU64 = AtomicU64::new(1);
@@ -39,7 +40,7 @@ impl EphemeralFile {
             NEXT_FILENAME.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
 
         let filename = conf
-            .timeline_path(&tenant_id, &timeline_id)
+            .timeline_path(&tenant_shard_id, &timeline_id)
             .join(Utf8PathBuf::from(format!(
                 "ephemeral-{filename_disambiguator}"
             )));
@@ -52,7 +53,7 @@ impl EphemeralFile {
 
         Ok(EphemeralFile {
             page_cache_file_id: page_cache::next_file_id(),
-            _tenant_id: tenant_id,
+            _tenant_shard_id: tenant_shard_id,
             _timeline_id: timeline_id,
             file,
             len: 0,
@@ -282,7 +283,7 @@ mod tests {
     ) -> Result<
         (
             &'static PageServerConf,
-            TenantId,
+            TenantShardId,
             TimelineId,
             RequestContext,
         ),
@@ -295,13 +296,13 @@ mod tests {
         // OK in a test.
         let conf: &'static PageServerConf = Box::leak(Box::new(conf));
 
-        let tenant_id = TenantId::from_str("11000000000000000000000000000000").unwrap();
+        let tenant_shard_id = TenantShardId::from_str("11000000000000000000000000000000").unwrap();
         let timeline_id = TimelineId::from_str("22000000000000000000000000000000").unwrap();
-        fs::create_dir_all(conf.timeline_path(&tenant_id, &timeline_id))?;
+        fs::create_dir_all(conf.timeline_path(&tenant_shard_id, &timeline_id))?;
 
         let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);
 
-        Ok((conf, tenant_id, timeline_id, ctx))
+        Ok((conf, tenant_shard_id, timeline_id, ctx))
     }
 
     #[tokio::test]
diff --git a/pageserver/src/tenant/metadata.rs b/pageserver/src/tenant/metadata.rs
index 38fd42674605..6fb86c65e27f 100644
--- a/pageserver/src/tenant/metadata.rs
+++ b/pageserver/src/tenant/metadata.rs
@@ -11,15 +11,12 @@
 use std::io::{self};
 
 use anyhow::{ensure, Context};
+use pageserver_api::shard::TenantShardId;
 use serde::{de::Error, Deserialize, Serialize, Serializer};
 use thiserror::Error;
 use utils::bin_ser::SerializeError;
 use utils::crashsafe::path_with_suffix_extension;
-use utils::{
-    bin_ser::BeSer,
-    id::{TenantId, TimelineId},
-    lsn::Lsn,
-};
+use utils::{bin_ser::BeSer, id::TimelineId, lsn::Lsn};
 
 use crate::config::PageServerConf;
 use crate::virtual_file::VirtualFile;
@@ -272,14 +269,14 @@ impl Serialize for TimelineMetadata {
 }
 
 /// Save timeline metadata to file
-#[tracing::instrument(skip_all, fields(%tenant_id, %timeline_id))]
+#[tracing::instrument(skip_all, fields(%tenant_id=tenant_shard_id.tenant_id, %shard_id=tenant_shard_id.shard_slug(), %timeline_id))]
 pub async fn save_metadata(
     conf: &'static PageServerConf,
-    tenant_id: &TenantId,
+    tenant_shard_id: &TenantShardId,
     timeline_id: &TimelineId,
     data: &TimelineMetadata,
 ) -> anyhow::Result<()> {
-    let path = conf.metadata_path(tenant_id, timeline_id);
+    let path = conf.metadata_path(tenant_shard_id, timeline_id);
     let temp_path = path_with_suffix_extension(&path, TEMP_FILE_SUFFIX);
     let metadata_bytes = data.to_bytes().context("serialize metadata")?;
     VirtualFile::crashsafe_overwrite(&path, &temp_path, &metadata_bytes)
@@ -299,10 +296,10 @@ pub enum LoadMetadataError {
 
 pub fn load_metadata(
     conf: &'static PageServerConf,
-    tenant_id: &TenantId,
+    tenant_shard_id: &TenantShardId,
     timeline_id: &TimelineId,
 ) -> Result<TimelineMetadata, LoadMetadataError> {
-    let metadata_path = conf.metadata_path(tenant_id, timeline_id);
+    let metadata_path = conf.metadata_path(tenant_shard_id, timeline_id);
     let metadata_bytes = std::fs::read(metadata_path)?;
 
     Ok(TimelineMetadata::from_bytes(&metadata_bytes)?)
diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs
index 3ff7425bc24c..f34d62ba5348 100644
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -29,7 +29,9 @@ use crate::control_plane_client::{
 use crate::deletion_queue::DeletionQueueClient;
 use crate::metrics::TENANT_MANAGER as METRICS;
 use crate::task_mgr::{self, TaskKind};
-use crate::tenant::config::{AttachmentMode, LocationConf, LocationMode, TenantConfOpt};
+use crate::tenant::config::{
+    AttachedLocationConfig, AttachmentMode, LocationConf, LocationMode, TenantConfOpt,
+};
 use crate::tenant::delete::DeleteTenantFlow;
 use crate::tenant::span::debug_assert_current_span_has_tenant_id;
 use crate::tenant::{create_tenant_files, AttachedTenantConf, SpawnMode, Tenant, TenantState};
@@ -122,6 +124,12 @@ fn exactly_one_or_none<'a>(
     }
 }
 
+pub(crate) enum TenantsMapRemoveResult {
+    Occupied(TenantSlot),
+    Vacant,
+    InProgress(utils::completion::Barrier),
+}
+
 impl TenantsMap {
     /// Convenience function for typical usage, where we want to get a `Tenant` object, for
     /// working with attached tenants.  If the TenantId is in the map but in Secondary state,
@@ -136,12 +144,28 @@ impl TenantsMap {
         }
     }
 
-    pub(crate) fn remove(&mut self, tenant_id: &TenantId) -> Option<TenantSlot> {
+    /// Only for use from DeleteTenantFlow.  This method directly removes a TenantSlot from the map.
+    ///
+    /// The normal way to remove a tenant is using a SlotGuard, which will gracefully remove the guarded
+    /// slot if the enclosed tenant is shutdown.
+    pub(crate) fn remove(&mut self, tenant_id: &TenantId) -> TenantsMapRemoveResult {
+        use std::collections::btree_map::Entry;
         match self {
-            TenantsMap::Initializing => None,
+            TenantsMap::Initializing => TenantsMapRemoveResult::Vacant,
             TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => {
                 let key = exactly_one_or_none(m, tenant_id).map(|(k, _)| *k);
-                key.and_then(|key| m.remove(&key))
+                match key {
+                    Some(key) => match m.entry(key) {
+                        Entry::Occupied(entry) => match entry.get() {
+                            TenantSlot::InProgress(barrier) => {
+                                TenantsMapRemoveResult::InProgress(barrier.clone())
+                            }
+                            _ => TenantsMapRemoveResult::Occupied(entry.remove()),
+                        },
+                        Entry::Vacant(_entry) => TenantsMapRemoveResult::Vacant,
+                    },
+                    None => TenantsMapRemoveResult::Vacant,
+                }
             }
         }
     }
@@ -250,8 +274,8 @@ pub struct TenantManager {
 }
 
 fn emergency_generations(
-    tenant_confs: &HashMap<TenantId, anyhow::Result<LocationConf>>,
-) -> HashMap<TenantId, Generation> {
+    tenant_confs: &HashMap<TenantShardId, anyhow::Result<LocationConf>>,
+) -> HashMap<TenantShardId, Generation> {
     tenant_confs
         .iter()
         .filter_map(|(tid, lc)| {
@@ -271,10 +295,10 @@ fn emergency_generations(
 
 async fn init_load_generations(
     conf: &'static PageServerConf,
-    tenant_confs: &HashMap<TenantId, anyhow::Result<LocationConf>>,
+    tenant_confs: &HashMap<TenantShardId, anyhow::Result<LocationConf>>,
     resources: &TenantSharedResources,
     cancel: &CancellationToken,
-) -> anyhow::Result<Option<HashMap<TenantId, Generation>>> {
+) -> anyhow::Result<Option<HashMap<TenantShardId, Generation>>> {
     let generations = if conf.control_plane_emergency_mode {
         error!(
             "Emergency mode!  Tenants will be attached unsafely using their last known generation"
@@ -317,7 +341,7 @@ async fn init_load_generations(
 fn load_tenant_config(
     conf: &'static PageServerConf,
     dentry: Utf8DirEntry,
-) -> anyhow::Result<Option<(TenantId, anyhow::Result<LocationConf>)>> {
+) -> anyhow::Result<Option<(TenantShardId, anyhow::Result<LocationConf>)>> {
     let tenant_dir_path = dentry.path().to_path_buf();
     if crate::is_temporary(&tenant_dir_path) {
         info!("Found temporary tenant directory, removing: {tenant_dir_path}");
@@ -353,10 +377,10 @@ fn load_tenant_config(
         return Ok(None);
     }
 
-    let tenant_id = match tenant_dir_path
+    let tenant_shard_id = match tenant_dir_path
         .file_name()
         .unwrap_or_default()
-        .parse::<TenantId>()
+        .parse::<TenantShardId>()
     {
         Ok(id) => id,
         Err(_) => {
@@ -366,8 +390,8 @@ fn load_tenant_config(
     };
 
     Ok(Some((
-        tenant_id,
-        Tenant::load_tenant_config(conf, &tenant_id),
+        tenant_shard_id,
+        Tenant::load_tenant_config(conf, &tenant_shard_id),
     )))
 }
 
@@ -378,7 +402,7 @@ fn load_tenant_config(
 /// seconds even on reasonably fast drives.
 async fn init_load_tenant_configs(
     conf: &'static PageServerConf,
-) -> anyhow::Result<HashMap<TenantId, anyhow::Result<LocationConf>>> {
+) -> anyhow::Result<HashMap<TenantShardId, anyhow::Result<LocationConf>>> {
     let tenants_dir = conf.tenants_path();
 
     let dentries = tokio::task::spawn_blocking(move || -> anyhow::Result<Vec<Utf8DirEntry>> {
@@ -428,19 +452,19 @@ pub async fn init_tenant_mgr(
         init_load_generations(conf, &tenant_configs, &resources, &cancel).await?;
 
     // Construct `Tenant` objects and start them running
-    for (tenant_id, location_conf) in tenant_configs {
-        let tenant_dir_path = conf.tenant_path(&tenant_id);
+    for (tenant_shard_id, location_conf) in tenant_configs {
+        let tenant_dir_path = conf.tenant_path(&tenant_shard_id);
 
         let mut location_conf = match location_conf {
             Ok(l) => l,
             Err(e) => {
-                warn!(%tenant_id, "Marking tenant broken, failed to {e:#}");
+                warn!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), "Marking tenant broken, failed to {e:#}");
 
                 tenants.insert(
-                    TenantShardId::unsharded(tenant_id),
+                    tenant_shard_id,
                     TenantSlot::Attached(Tenant::create_broken_tenant(
                         conf,
-                        tenant_id,
+                        tenant_shard_id,
                         format!("{}", e),
                     )),
                 );
@@ -451,7 +475,7 @@ pub async fn init_tenant_mgr(
         let generation = if let Some(generations) = &tenant_generations {
             // We have a generation map: treat it as the authority for whether
             // this tenant is really attached.
-            if let Some(gen) = generations.get(&tenant_id) {
+            if let Some(gen) = generations.get(&tenant_shard_id) {
                 *gen
             } else {
                 match &location_conf.mode {
@@ -459,8 +483,8 @@ pub async fn init_tenant_mgr(
                         // We do not require the control plane's permission for secondary mode
                         // tenants, because they do no remote writes and hence require no
                         // generation number
-                        info!(%tenant_id, "Loaded tenant in secondary mode");
-                        tenants.insert(TenantShardId::unsharded(tenant_id), TenantSlot::Secondary);
+                        info!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), "Loaded tenant in secondary mode");
+                        tenants.insert(tenant_shard_id, TenantSlot::Secondary);
                     }
                     LocationMode::Attached(_) => {
                         // TODO: augment re-attach API to enable the control plane to
@@ -468,9 +492,9 @@ pub async fn init_tenant_mgr(
                         // away local state, we can gracefully fall back to secondary here, if the control
                         // plane tells us so.
                         // (https://github.com/neondatabase/neon/issues/5377)
-                        info!(%tenant_id, "Detaching tenant, control plane omitted it in re-attach response");
+                        info!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), "Detaching tenant, control plane omitted it in re-attach response");
                         if let Err(e) = safe_remove_tenant_dir_all(&tenant_dir_path).await {
-                            error!(%tenant_id,
+                            error!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),
                                 "Failed to remove detached tenant directory '{tenant_dir_path}': {e:?}",
                             );
                         }
@@ -482,18 +506,18 @@ pub async fn init_tenant_mgr(
         } else {
             // Legacy mode: no generation information, any tenant present
             // on local disk may activate
-            info!(%tenant_id, "Starting tenant in legacy mode, no generation",);
+            info!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), "Starting tenant in legacy mode, no generation",);
             Generation::none()
         };
 
         // Presence of a generation number implies attachment: attach the tenant
         // if it wasn't already, and apply the generation number.
         location_conf.attach_in_generation(generation);
-        Tenant::persist_tenant_config(conf, &tenant_id, &location_conf).await?;
+        Tenant::persist_tenant_config(conf, &tenant_shard_id, &location_conf).await?;
 
         match tenant_spawn(
             conf,
-            tenant_id,
+            tenant_shard_id,
             &tenant_dir_path,
             resources.clone(),
             AttachedTenantConf::try_from(location_conf)?,
@@ -509,7 +533,7 @@ pub async fn init_tenant_mgr(
                 );
             }
             Err(e) => {
-                error!(%tenant_id, "Failed to start tenant: {e:#}");
+                error!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), "Failed to start tenant: {e:#}");
             }
         }
     }
@@ -533,7 +557,7 @@ pub async fn init_tenant_mgr(
 #[allow(clippy::too_many_arguments)]
 pub(crate) fn tenant_spawn(
     conf: &'static PageServerConf,
-    tenant_id: TenantId,
+    tenant_shard_id: TenantShardId,
     tenant_path: &Utf8Path,
     resources: TenantSharedResources,
     location_conf: AttachedTenantConf,
@@ -557,16 +581,16 @@ pub(crate) fn tenant_spawn(
         "Cannot load tenant from empty directory {tenant_path:?}"
     );
 
-    let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(&tenant_id);
+    let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(&tenant_shard_id);
     anyhow::ensure!(
-        !conf.tenant_ignore_mark_file_path(&tenant_id).exists(),
+        !conf.tenant_ignore_mark_file_path(&tenant_shard_id).exists(),
         "Cannot load tenant, ignore mark found at {tenant_ignore_mark:?}"
     );
 
-    info!("Attaching tenant {tenant_id}");
+    info!("Attaching tenant {tenant_shard_id}");
     let tenant = match Tenant::spawn(
         conf,
-        tenant_id,
+        tenant_shard_id,
         resources,
         location_conf,
         init_order,
@@ -576,8 +600,8 @@ pub(crate) fn tenant_spawn(
     ) {
         Ok(tenant) => tenant,
         Err(e) => {
-            error!("Failed to spawn tenant {tenant_id}, reason: {e:#}");
-            Tenant::create_broken_tenant(conf, tenant_id, format!("{e:#}"))
+            error!("Failed to spawn tenant {tenant_shard_id}, reason: {e:#}");
+            Tenant::create_broken_tenant(conf, tenant_shard_id, format!("{e:#}"))
         }
     };
 
@@ -732,16 +756,15 @@ pub(crate) async fn create_tenant(
     ctx: &RequestContext,
 ) -> Result<Arc<Tenant>, TenantMapInsertError> {
     let location_conf = LocationConf::attached_single(tenant_conf, generation);
+    info!("Creating tenant at location {location_conf:?}");
 
     let slot_guard =
         tenant_map_acquire_slot(&tenant_shard_id, TenantSlotAcquireMode::MustNotExist)?;
-    // TODO(sharding): make local paths shard-aware
-    let tenant_path =
-        super::create_tenant_files(conf, &location_conf, &tenant_shard_id.tenant_id).await?;
+    let tenant_path = super::create_tenant_files(conf, &location_conf, &tenant_shard_id).await?;
 
     let created_tenant = tenant_spawn(
         conf,
-        tenant_shard_id.tenant_id,
+        tenant_shard_id,
         &tenant_path,
         resources,
         AttachedTenantConf::try_from(location_conf)?,
@@ -781,8 +804,9 @@ pub(crate) async fn set_new_tenant_config(
     // API to use is the location_config/ endpoint, which lets the caller provide
     // the full LocationConf.
     let location_conf = LocationConf::attached_single(new_tenant_conf, tenant.generation);
+    let tenant_shard_id = TenantShardId::unsharded(tenant_id);
 
-    Tenant::persist_tenant_config(conf, &tenant_id, &location_conf)
+    Tenant::persist_tenant_config(conf, &tenant_shard_id, &location_conf)
         .await
         .map_err(SetNewTenantConfigError::Persist)?;
     tenant.set_new_tenant_config(new_tenant_conf);
@@ -792,8 +816,6 @@ pub(crate) async fn set_new_tenant_config(
 impl TenantManager {
     /// Gets the attached tenant from the in-memory data, erroring if it's absent, in secondary mode, or is not fitting to the query.
     /// `active_only = true` allows to query only tenants that are ready for operations, erroring on other kinds of tenants.
-    ///
-    /// This method is cancel-safe.
     pub(crate) fn get_attached_tenant_shard(
         &self,
         tenant_shard_id: TenantShardId,
@@ -842,6 +864,7 @@ impl TenantManager {
         &self,
         tenant_shard_id: TenantShardId,
         new_location_config: LocationConf,
+        flush: Option<Duration>,
         ctx: &RequestContext,
     ) -> Result<(), anyhow::Error> {
         debug_assert_current_span_has_tenant_id();
@@ -850,7 +873,7 @@ impl TenantManager {
         // Special case fast-path for updates to Tenant: if our upsert is only updating configuration,
         // then we do not need to set the slot to InProgress, we can just call into the
         // existng tenant.
-        {
+        let modify_tenant = {
             let locked = self.tenants.read().unwrap();
             let peek_slot =
                 tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Write)?;
@@ -861,22 +884,50 @@ impl TenantManager {
                         // take our fast path and just provide the updated configuration
                         // to the tenant.
                         tenant.set_new_location_config(AttachedTenantConf::try_from(
-                            new_location_config,
+                            new_location_config.clone(),
                         )?);
 
-                        // Persist the new config in the background, to avoid holding up any
-                        // locks while we do so.
-                        // TODO
-
-                        return Ok(());
+                        Some(tenant.clone())
                     } else {
                         // Different generations, fall through to general case
+                        None
                     }
                 }
                 _ => {
                     // Not an Attached->Attached transition, fall through to general case
+                    None
                 }
             }
+        };
+
+        // Fast-path continued: having dropped out of the self.tenants lock, do the async
+        // phase of waiting for flush, before returning.
+        if let Some(tenant) = modify_tenant {
+            // Transition to AttachedStale means we may well hold a valid generation
+            // still, and have been requested to go stale as part of a migration.  If
+            // the caller set `flush`, then flush to remote storage.
+            if let LocationMode::Attached(AttachedLocationConfig {
+                generation: _,
+                attach_mode: AttachmentMode::Stale,
+            }) = &new_location_config.mode
+            {
+                if let Some(flush_timeout) = flush {
+                    match tokio::time::timeout(flush_timeout, tenant.flush_remote()).await {
+                        Ok(Err(e)) => {
+                            return Err(e);
+                        }
+                        Ok(Ok(_)) => return Ok(()),
+                        Err(_) => {
+                            tracing::warn!(
+                                timeout_ms = flush_timeout.as_millis(),
+                                "Timed out waiting for flush to remote storage, proceeding anyway."
+                            )
+                        }
+                    }
+                }
+            }
+
+            return Ok(());
         }
 
         // General case for upserts to TenantsMap, excluding the case above: we will substitute an
@@ -915,8 +966,7 @@ impl TenantManager {
             slot_guard.drop_old_value().expect("We just shut it down");
         }
 
-        // TODO(sharding): make local paths sharding-aware
-        let tenant_path = self.conf.tenant_path(&tenant_shard_id.tenant_id);
+        let tenant_path = self.conf.tenant_path(&tenant_shard_id);
 
         let new_slot = match &new_location_config.mode {
             LocationMode::Secondary(_) => {
@@ -926,20 +976,14 @@ impl TenantManager {
                     .await
                     .with_context(|| format!("Creating {tenant_path}"))?;
 
-                // TODO(sharding): make local paths sharding-aware
-                Tenant::persist_tenant_config(
-                    self.conf,
-                    &tenant_shard_id.tenant_id,
-                    &new_location_config,
-                )
-                .await
-                .map_err(SetNewTenantConfigError::Persist)?;
+                Tenant::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)
+                    .await
+                    .map_err(SetNewTenantConfigError::Persist)?;
 
                 TenantSlot::Secondary
             }
             LocationMode::Attached(_attach_config) => {
-                // TODO(sharding): make local paths sharding-aware
-                let timelines_path = self.conf.timelines_path(&tenant_shard_id.tenant_id);
+                let timelines_path = self.conf.timelines_path(&tenant_shard_id);
 
                 // Directory doesn't need to be fsync'd because we do not depend on
                 // it to exist after crashes: it may be recreated when tenant is
@@ -948,19 +992,13 @@ impl TenantManager {
                     .await
                     .with_context(|| format!("Creating {timelines_path}"))?;
 
-                // TODO(sharding): make local paths sharding-aware
-                Tenant::persist_tenant_config(
-                    self.conf,
-                    &tenant_shard_id.tenant_id,
-                    &new_location_config,
-                )
-                .await
-                .map_err(SetNewTenantConfigError::Persist)?;
+                Tenant::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)
+                    .await
+                    .map_err(SetNewTenantConfigError::Persist)?;
 
-                // TODO(sharding): make spawn sharding-aware
                 let tenant = tenant_spawn(
                     self.conf,
-                    tenant_shard_id.tenant_id,
+                    tenant_shard_id,
                     &tenant_path,
                     self.resources.clone(),
                     AttachedTenantConf::try_from(new_location_config)?,
@@ -1262,8 +1300,7 @@ async fn detach_tenant0(
     deletion_queue_client: &DeletionQueueClient,
 ) -> Result<Utf8PathBuf, TenantStateError> {
     let tenant_dir_rename_operation = |tenant_id_to_clean: TenantShardId| async move {
-        // TODO(sharding): make local path helpers shard-aware
-        let local_tenant_directory = conf.tenant_path(&tenant_id_to_clean.tenant_id);
+        let local_tenant_directory = conf.tenant_path(&tenant_id_to_clean);
         safe_rename_tenant_dir(&local_tenant_directory)
             .await
             .with_context(|| format!("local tenant directory {local_tenant_directory:?} rename"))
@@ -1288,8 +1325,7 @@ async fn detach_tenant0(
             Err(TenantStateError::SlotError(TenantSlotError::NotFound(_)))
         )
     {
-        // TODO(sharding): make local paths sharding-aware
-        let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(&tenant_shard_id.tenant_id);
+        let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(&tenant_shard_id);
         if tenant_ignore_mark.exists() {
             info!("Detaching an ignored tenant");
             let tmp_path = tenant_dir_rename_operation(tenant_shard_id)
@@ -1318,9 +1354,9 @@ pub(crate) async fn load_tenant(
 
     let slot_guard =
         tenant_map_acquire_slot(&tenant_shard_id, TenantSlotAcquireMode::MustNotExist)?;
-    let tenant_path = conf.tenant_path(&tenant_id);
+    let tenant_path = conf.tenant_path(&tenant_shard_id);
 
-    let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(&tenant_id);
+    let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(&tenant_shard_id);
     if tenant_ignore_mark.exists() {
         std::fs::remove_file(&tenant_ignore_mark).with_context(|| {
             format!(
@@ -1336,14 +1372,14 @@ pub(crate) async fn load_tenant(
     };
 
     let mut location_conf =
-        Tenant::load_tenant_config(conf, &tenant_id).map_err(TenantMapInsertError::Other)?;
+        Tenant::load_tenant_config(conf, &tenant_shard_id).map_err(TenantMapInsertError::Other)?;
     location_conf.attach_in_generation(generation);
 
-    Tenant::persist_tenant_config(conf, &tenant_id, &location_conf).await?;
+    Tenant::persist_tenant_config(conf, &tenant_shard_id, &location_conf).await?;
 
     let new_tenant = tenant_spawn(
         conf,
-        tenant_id,
+        tenant_shard_id,
         &tenant_path,
         resources,
         AttachedTenantConf::try_from(location_conf)?,
@@ -1374,7 +1410,7 @@ async fn ignore_tenant0(
     let tenant_shard_id = TenantShardId::unsharded(tenant_id);
 
     remove_tenant_from_memory(tenants, tenant_shard_id, async {
-        let ignore_mark_file = conf.tenant_ignore_mark_file_path(&tenant_id);
+        let ignore_mark_file = conf.tenant_ignore_mark_file_path(&tenant_shard_id);
         fs::File::create(&ignore_mark_file)
             .await
             .context("Failed to create ignore mark file")
@@ -1432,13 +1468,13 @@ pub(crate) async fn attach_tenant(
     let slot_guard =
         tenant_map_acquire_slot(&tenant_shard_id, TenantSlotAcquireMode::MustNotExist)?;
     let location_conf = LocationConf::attached_single(tenant_conf, generation);
-    let tenant_dir = create_tenant_files(conf, &location_conf, &tenant_id).await?;
+    let tenant_dir = create_tenant_files(conf, &location_conf, &tenant_shard_id).await?;
     // TODO: tenant directory remains on disk if we bail out from here on.
     //       See https://github.com/neondatabase/neon/issues/4233
 
     let attached_tenant = tenant_spawn(
         conf,
-        tenant_id,
+        tenant_shard_id,
         &tenant_dir,
         resources,
         AttachedTenantConf::try_from(location_conf)?,
@@ -1954,6 +1990,9 @@ pub(crate) async fn immediate_gc(
         .with_context(|| format!("tenant {tenant_id}"))
         .map_err(|e| ApiError::NotFound(e.into()))?;
 
+    // TODO(sharding): make callers of this function shard-aware
+    let tenant_shard_id = TenantShardId::unsharded(tenant_id);
+
     let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| tenant.get_gc_horizon());
     // Use tenant's pitr setting
     let pitr = tenant.get_pitr_interval();
@@ -1961,6 +2000,7 @@ pub(crate) async fn immediate_gc(
     // Run in task_mgr to avoid race with tenant_detach operation
     let ctx = ctx.detached_child(TaskKind::GarbageCollector, DownloadBehavior::Download);
     let (task_done, wait_task_done) = tokio::sync::oneshot::channel();
+    // TODO: spawning is redundant now, need to hold the gate
     task_mgr::spawn(
         &tokio::runtime::Handle::current(),
         TaskKind::GarbageCollector,
@@ -1970,12 +2010,40 @@ pub(crate) async fn immediate_gc(
         false,
         async move {
             fail::fail_point!("immediate_gc_task_pre");
-            let result = tenant
+
+            #[allow(unused_mut)]
+            let mut result = tenant
                 .gc_iteration(Some(timeline_id), gc_horizon, pitr, &cancel, &ctx)
-                .instrument(info_span!("manual_gc", %tenant_id, %timeline_id))
+                .instrument(info_span!("manual_gc", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id))
                 .await;
                 // FIXME: `gc_iteration` can return an error for multiple reasons; we should handle it
                 // better once the types support it.
+
+            #[cfg(feature = "testing")]
+            {
+                if let Ok(result) = result.as_mut() {
+                    // why not futures unordered? it seems it needs very much the same task structure
+                    // but would only run on single task.
+                    let mut js = tokio::task::JoinSet::new();
+                    for layer in std::mem::take(&mut result.doomed_layers) {
+                        js.spawn(layer.wait_drop());
+                    }
+                    tracing::info!(total = js.len(), "starting to wait for the gc'd layers to be dropped");
+                    while let Some(res) = js.join_next().await {
+                        res.expect("wait_drop should not panic");
+                    }
+                }
+
+                let timeline = tenant.get_timeline(timeline_id, false).ok();
+                let rtc = timeline.as_ref().and_then(|x| x.remote_client.as_ref());
+
+                if let Some(rtc) = rtc {
+                    // layer drops schedule actions on remote timeline client to actually do the
+                    // deletions; don't care just exit fast about the shutdown error
+                    drop(rtc.wait_completion().await);
+                }
+            }
+
             match task_done.send(result) {
                 Ok(_) => (),
                 Err(result) => error!("failed to send gc result: {result:?}"),
diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs
index 99d9783f7356..5b649a420cd2 100644
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -188,6 +188,8 @@ use anyhow::Context;
 use camino::Utf8Path;
 use chrono::{NaiveDateTime, Utc};
 
+pub(crate) use download::download_initdb_tar_zst;
+use pageserver_api::shard::{ShardIndex, TenantShardId};
 use scopeguard::ScopeGuard;
 use tokio_util::sync::CancellationToken;
 pub(crate) use upload::upload_initdb_dir;
@@ -300,7 +302,7 @@ pub struct RemoteTimelineClient {
 
     runtime: tokio::runtime::Handle,
 
-    tenant_id: TenantId,
+    tenant_shard_id: TenantShardId,
     timeline_id: TimelineId,
     generation: Generation,
 
@@ -324,7 +326,7 @@ impl RemoteTimelineClient {
         remote_storage: GenericRemoteStorage,
         deletion_queue_client: DeletionQueueClient,
         conf: &'static PageServerConf,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         timeline_id: TimelineId,
         generation: Generation,
     ) -> RemoteTimelineClient {
@@ -336,13 +338,16 @@ impl RemoteTimelineClient {
             } else {
                 BACKGROUND_RUNTIME.handle().clone()
             },
-            tenant_id,
+            tenant_shard_id,
             timeline_id,
             generation,
             storage_impl: remote_storage,
             deletion_queue_client,
             upload_queue: Mutex::new(UploadQueue::Uninitialized),
-            metrics: Arc::new(RemoteTimelineClientMetrics::new(&tenant_id, &timeline_id)),
+            metrics: Arc::new(RemoteTimelineClientMetrics::new(
+                &tenant_shard_id,
+                &timeline_id,
+            )),
         }
     }
 
@@ -463,13 +468,13 @@ impl RemoteTimelineClient {
 
         let index_part = download::download_index_part(
             &self.storage_impl,
-            &self.tenant_id,
+            &self.tenant_shard_id,
             &self.timeline_id,
             self.generation,
             cancel,
         )
         .measure_remote_op(
-            self.tenant_id,
+            self.tenant_shard_id.tenant_id,
             self.timeline_id,
             RemoteOpFileKind::Index,
             RemoteOpKind::Download,
@@ -505,13 +510,13 @@ impl RemoteTimelineClient {
             download::download_layer_file(
                 self.conf,
                 &self.storage_impl,
-                self.tenant_id,
+                self.tenant_shard_id,
                 self.timeline_id,
                 layer_file_name,
                 layer_metadata,
             )
             .measure_remote_op(
-                self.tenant_id,
+                self.tenant_shard_id.tenant_id,
                 self.timeline_id,
                 RemoteOpFileKind::Layer,
                 RemoteOpKind::Download,
@@ -657,10 +662,10 @@ impl RemoteTimelineClient {
         let mut guard = self.upload_queue.lock().unwrap();
         let upload_queue = guard.initialized_mut()?;
 
-        let with_generations =
+        let with_metadata =
             self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names.iter().cloned());
 
-        self.schedule_deletion_of_unlinked0(upload_queue, with_generations);
+        self.schedule_deletion_of_unlinked0(upload_queue, with_metadata);
 
         // Launch the tasks immediately, if possible
         self.launch_queued_tasks(upload_queue);
@@ -695,7 +700,7 @@ impl RemoteTimelineClient {
         self: &Arc<Self>,
         upload_queue: &mut UploadQueueInitialized,
         names: I,
-    ) -> Vec<(LayerFileName, Generation)>
+    ) -> Vec<(LayerFileName, LayerFileMetadata)>
     where
         I: IntoIterator<Item = LayerFileName>,
     {
@@ -703,16 +708,17 @@ impl RemoteTimelineClient {
         // so we don't need update it. Just serialize it.
         let metadata = upload_queue.latest_metadata.clone();
 
-        // Decorate our list of names with each name's generation, dropping
-        // names that are unexpectedly missing from our metadata.
-        let with_generations: Vec<_> = names
+        // Decorate our list of names with each name's metadata, dropping
+        // names that are unexpectedly missing from our metadata.  This metadata
+        // is later used when physically deleting layers, to construct key paths.
+        let with_metadata: Vec<_> = names
             .into_iter()
             .filter_map(|name| {
                 let meta = upload_queue.latest_files.remove(&name);
 
                 if let Some(meta) = meta {
                     upload_queue.latest_files_changes_since_metadata_upload_scheduled += 1;
-                    Some((name, meta.generation))
+                    Some((name, meta))
                 } else {
                     // This can only happen if we forgot to to schedule the file upload
                     // before scheduling the delete. Log it because it is a rare/strange
@@ -725,9 +731,10 @@ impl RemoteTimelineClient {
             .collect();
 
         #[cfg(feature = "testing")]
-        for (name, gen) in &with_generations {
-            if let Some(unexpected) = upload_queue.dangling_files.insert(name.to_owned(), *gen) {
-                if &unexpected == gen {
+        for (name, metadata) in &with_metadata {
+            let gen = metadata.generation;
+            if let Some(unexpected) = upload_queue.dangling_files.insert(name.to_owned(), gen) {
+                if unexpected == gen {
                     tracing::error!("{name} was unlinked twice with same generation");
                 } else {
                     tracing::error!("{name} was unlinked twice with different generations {gen:?} and {unexpected:?}");
@@ -742,14 +749,14 @@ impl RemoteTimelineClient {
             self.schedule_index_upload(upload_queue, metadata);
         }
 
-        with_generations
+        with_metadata
     }
 
     /// Schedules deletion for layer files which have previously been unlinked from the
     /// `index_part.json` with [`Self::schedule_gc_update`] or [`Self::schedule_compaction_update`].
     pub(crate) fn schedule_deletion_of_unlinked(
         self: &Arc<Self>,
-        layers: Vec<(LayerFileName, Generation)>,
+        layers: Vec<(LayerFileName, LayerFileMetadata)>,
     ) -> anyhow::Result<()> {
         let mut guard = self.upload_queue.lock().unwrap();
         let upload_queue = guard.initialized_mut()?;
@@ -762,16 +769,22 @@ impl RemoteTimelineClient {
     fn schedule_deletion_of_unlinked0(
         self: &Arc<Self>,
         upload_queue: &mut UploadQueueInitialized,
-        with_generations: Vec<(LayerFileName, Generation)>,
+        with_metadata: Vec<(LayerFileName, LayerFileMetadata)>,
     ) {
-        for (name, gen) in &with_generations {
-            info!("scheduling deletion of layer {}{}", name, gen.get_suffix());
+        for (name, meta) in &with_metadata {
+            info!(
+                "scheduling deletion of layer {}{} (shard {})",
+                name,
+                meta.generation.get_suffix(),
+                meta.shard
+            );
         }
 
         #[cfg(feature = "testing")]
-        for (name, gen) in &with_generations {
+        for (name, meta) in &with_metadata {
+            let gen = meta.generation;
             match upload_queue.dangling_files.remove(name) {
-                Some(same) if &same == gen => { /* expected */ }
+                Some(same) if same == gen => { /* expected */ }
                 Some(other) => {
                     tracing::error!("{name} was unlinked with {other:?} but deleted with {gen:?}");
                 }
@@ -783,7 +796,7 @@ impl RemoteTimelineClient {
 
         // schedule the actual deletions
         let op = UploadOp::Delete(Delete {
-            layers: with_generations,
+            layers: with_metadata,
         });
         self.calls_unfinished_metric_begin(&op);
         upload_queue.queued_operations.push_back(op);
@@ -812,10 +825,8 @@ impl RemoteTimelineClient {
         Ok(())
     }
 
-    ///
     /// Wait for all previously scheduled uploads/deletions to complete
-    ///
-    pub async fn wait_completion(self: &Arc<Self>) -> anyhow::Result<()> {
+    pub(crate) async fn wait_completion(self: &Arc<Self>) -> anyhow::Result<()> {
         let mut receiver = {
             let mut guard = self.upload_queue.lock().unwrap();
             let upload_queue = guard.initialized_mut()?;
@@ -825,6 +836,7 @@ impl RemoteTimelineClient {
         if receiver.changed().await.is_err() {
             anyhow::bail!("wait_completion aborted because upload queue was stopped");
         }
+
         Ok(())
     }
 
@@ -851,6 +863,56 @@ impl RemoteTimelineClient {
         receiver
     }
 
+    /// Wait for all previously scheduled operations to complete, and then stop.
+    ///
+    /// Not cancellation safe
+    pub(crate) async fn shutdown(self: &Arc<Self>) -> Result<(), StopError> {
+        // On cancellation the queue is left in ackward state of refusing new operations but
+        // proper stop is yet to be called. On cancel the original or some later task must call
+        // `stop` or `shutdown`.
+        let sg = scopeguard::guard((), |_| {
+            tracing::error!("RemoteTimelineClient::shutdown was cancelled; this should not happen, do not make this into an allowed_error")
+        });
+
+        let fut = {
+            let mut guard = self.upload_queue.lock().unwrap();
+            let upload_queue = match &mut *guard {
+                UploadQueue::Stopped(_) => return Ok(()),
+                UploadQueue::Uninitialized => return Err(StopError::QueueUninitialized),
+                UploadQueue::Initialized(ref mut init) => init,
+            };
+
+            // if the queue is already stuck due to a shutdown operation which was cancelled, then
+            // just don't add more of these as they would never complete.
+            //
+            // TODO: if launch_queued_tasks were to be refactored to accept a &mut UploadQueue
+            // in every place we would not have to jump through this hoop, and this method could be
+            // made cancellable.
+            if !upload_queue.shutting_down {
+                upload_queue.shutting_down = true;
+                upload_queue.queued_operations.push_back(UploadOp::Shutdown);
+                // this operation is not counted similar to Barrier
+
+                self.launch_queued_tasks(upload_queue);
+            }
+
+            upload_queue.shutdown_ready.clone().acquire_owned()
+        };
+
+        let res = fut.await;
+
+        scopeguard::ScopeGuard::into_inner(sg);
+
+        match res {
+            Ok(_permit) => unreachable!("shutdown_ready should not have been added permits"),
+            Err(_closed) => {
+                // expected
+            }
+        }
+
+        self.stop()
+    }
+
     /// Set the deleted_at field in the remote index file.
     ///
     /// This fails if the upload queue has not been `stop()`ed.
@@ -902,7 +964,7 @@ impl RemoteTimelineClient {
             || {
                 upload::upload_index_part(
                     &self.storage_impl,
-                    &self.tenant_id,
+                    &self.tenant_shard_id,
                     &self.timeline_id,
                     self.generation,
                     &index_part_with_deleted_at,
@@ -960,8 +1022,9 @@ impl RemoteTimelineClient {
                 .drain()
                 .map(|(file_name, meta)| {
                     remote_layer_path(
-                        &self.tenant_id,
+                        &self.tenant_shard_id.tenant_id,
                         &self.timeline_id,
+                        meta.shard,
                         &file_name,
                         meta.generation,
                     )
@@ -974,7 +1037,7 @@ impl RemoteTimelineClient {
 
         // Do not delete index part yet, it is needed for possible retry. If we remove it first
         // and retry will arrive to different pageserver there wont be any traces of it on remote storage
-        let timeline_storage_path = remote_timeline_path(&self.tenant_id, &self.timeline_id);
+        let timeline_storage_path = remote_timeline_path(&self.tenant_shard_id, &self.timeline_id);
 
         // Execute all pending deletions, so that when we proceed to do a list_prefixes below, we aren't
         // taking the burden of listing all the layers that we already know we should delete.
@@ -1010,12 +1073,22 @@ impl RemoteTimelineClient {
             .unwrap_or(
                 // No generation-suffixed indices, assume we are dealing with
                 // a legacy index.
-                remote_index_path(&self.tenant_id, &self.timeline_id, Generation::none()),
+                remote_index_path(&self.tenant_shard_id, &self.timeline_id, Generation::none()),
             );
 
         let remaining_layers: Vec<RemotePath> = remaining
             .into_iter()
-            .filter(|p| p!= &latest_index)
+            .filter(|p| {
+                if p == &latest_index {
+                    return false;
+                }
+                if let Some(name) = p.object_name() {
+                    if name == INITDB_PATH {
+                        return false;
+                    }
+                }
+                true
+            })
             .inspect(|path| {
                 if let Some(name) = path.object_name() {
                     info!(%name, "deleting a file not referenced from index_part.json");
@@ -1081,7 +1154,9 @@ impl RemoteTimelineClient {
                     upload_queue.num_inprogress_deletions == upload_queue.inprogress_tasks.len()
                 }
 
-                UploadOp::Barrier(_) => upload_queue.inprogress_tasks.is_empty(),
+                UploadOp::Barrier(_) | UploadOp::Shutdown => {
+                    upload_queue.inprogress_tasks.is_empty()
+                }
             };
 
             // If we cannot launch this task, don't look any further.
@@ -1094,6 +1169,13 @@ impl RemoteTimelineClient {
                 break;
             }
 
+            if let UploadOp::Shutdown = next_op {
+                // leave the op in the queue but do not start more tasks; it will be dropped when
+                // the stop is called.
+                upload_queue.shutdown_ready.close();
+                break;
+            }
+
             // We can launch this task. Remove it from the queue first.
             let next_op = upload_queue.queued_operations.pop_front().unwrap();
 
@@ -1114,6 +1196,7 @@ impl RemoteTimelineClient {
                     sender.send_replace(());
                     continue;
                 }
+                UploadOp::Shutdown => unreachable!("shutdown is intentionally never popped off"),
             };
 
             // Assign unique ID to this task
@@ -1132,12 +1215,12 @@ impl RemoteTimelineClient {
 
             // Spawn task to perform the task
             let self_rc = Arc::clone(self);
-            let tenant_id = self.tenant_id;
+            let tenant_shard_id = self.tenant_shard_id;
             let timeline_id = self.timeline_id;
             task_mgr::spawn(
                 &self.runtime,
                 TaskKind::RemoteUploadTask,
-                Some(self.tenant_id),
+                Some(self.tenant_shard_id.tenant_id),
                 Some(self.timeline_id),
                 "remote upload",
                 false,
@@ -1145,7 +1228,7 @@ impl RemoteTimelineClient {
                     self_rc.perform_upload_task(task).await;
                     Ok(())
                 }
-                .instrument(info_span!(parent: None, "remote_upload", %tenant_id, %timeline_id, %upload_task_id)),
+                .instrument(info_span!(parent: None, "remote_upload", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id, %upload_task_id)),
             );
 
             // Loop back to process next task
@@ -1197,7 +1280,7 @@ impl RemoteTimelineClient {
                         self.generation,
                     )
                     .measure_remote_op(
-                        self.tenant_id,
+                        self.tenant_shard_id.tenant_id,
                         self.timeline_id,
                         RemoteOpFileKind::Layer,
                         RemoteOpKind::Upload,
@@ -1217,13 +1300,13 @@ impl RemoteTimelineClient {
 
                     let res = upload::upload_index_part(
                         &self.storage_impl,
-                        &self.tenant_id,
+                        &self.tenant_shard_id,
                         &self.timeline_id,
                         self.generation,
                         index_part,
                     )
                     .measure_remote_op(
-                        self.tenant_id,
+                        self.tenant_shard_id.tenant_id,
                         self.timeline_id,
                         RemoteOpFileKind::Index,
                         RemoteOpKind::Upload,
@@ -1243,7 +1326,7 @@ impl RemoteTimelineClient {
                     pausable_failpoint!("before-delete-layer-pausable");
                     self.deletion_queue_client
                         .push_layers(
-                            self.tenant_id,
+                            self.tenant_shard_id,
                             self.timeline_id,
                             self.generation,
                             delete.layers.clone(),
@@ -1251,10 +1334,10 @@ impl RemoteTimelineClient {
                         .await
                         .map_err(|e| anyhow::anyhow!(e))
                 }
-                UploadOp::Barrier(_) => {
+                unexpected @ UploadOp::Barrier(_) | unexpected @ UploadOp::Shutdown => {
                     // unreachable. Barrier operations are handled synchronously in
                     // launch_queued_tasks
-                    warn!("unexpected Barrier operation in perform_upload_task");
+                    warn!("unexpected {unexpected:?} operation in perform_upload_task");
                     break;
                 }
             };
@@ -1348,7 +1431,7 @@ impl RemoteTimelineClient {
                     upload_queue.num_inprogress_deletions -= 1;
                     None
                 }
-                UploadOp::Barrier(_) => unreachable!(),
+                UploadOp::Barrier(..) | UploadOp::Shutdown => unreachable!(),
             };
 
             // Launch any queued tasks that were unblocked by this one.
@@ -1362,7 +1445,7 @@ impl RemoteTimelineClient {
             // data safety guarantees (see docs/rfcs/025-generation-numbers.md)
             self.deletion_queue_client
                 .update_remote_consistent_lsn(
-                    self.tenant_id,
+                    self.tenant_shard_id,
                     self.timeline_id,
                     self.generation,
                     lsn,
@@ -1403,7 +1486,7 @@ impl RemoteTimelineClient {
                     reason: "should we track deletes? positive or negative sign?",
                 },
             ),
-            UploadOp::Barrier(_) => {
+            UploadOp::Barrier(..) | UploadOp::Shutdown => {
                 // we do not account these
                 return None;
             }
@@ -1429,10 +1512,13 @@ impl RemoteTimelineClient {
     }
 
     /// Close the upload queue for new operations and cancel queued operations.
+    ///
+    /// Use [`RemoteTimelineClient::shutdown`] for graceful stop.
+    ///
     /// In-progress operations will still be running after this function returns.
     /// Use `task_mgr::shutdown_tasks(None, Some(self.tenant_id), Some(timeline_id))`
     /// to wait for them to complete, after calling this function.
-    pub fn stop(&self) -> Result<(), StopError> {
+    pub(crate) fn stop(&self) -> Result<(), StopError> {
         // Whichever *task* for this RemoteTimelineClient grabs the mutex first will transition the queue
         // into stopped state, thereby dropping all off the queued *ops* which haven't become *tasks* yet.
         // The other *tasks* will come here and observe an already shut down queue and hence simply wrap up their business.
@@ -1470,6 +1556,8 @@ impl RemoteTimelineClient {
                         queued_operations: VecDeque::default(),
                         #[cfg(feature = "testing")]
                         dangling_files: HashMap::default(),
+                        shutting_down: false,
+                        shutdown_ready: Arc::new(tokio::sync::Semaphore::new(0)),
                     };
 
                     let upload_queue = std::mem::replace(
@@ -1515,24 +1603,32 @@ impl RemoteTimelineClient {
     }
 }
 
-pub fn remote_timelines_path(tenant_id: &TenantId) -> RemotePath {
-    let path = format!("tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}");
+pub fn remote_timelines_path(tenant_shard_id: &TenantShardId) -> RemotePath {
+    let path = format!("tenants/{tenant_shard_id}/{TIMELINES_SEGMENT_NAME}");
     RemotePath::from_string(&path).expect("Failed to construct path")
 }
 
-pub fn remote_timeline_path(tenant_id: &TenantId, timeline_id: &TimelineId) -> RemotePath {
-    remote_timelines_path(tenant_id).join(Utf8Path::new(&timeline_id.to_string()))
+pub fn remote_timeline_path(
+    tenant_shard_id: &TenantShardId,
+    timeline_id: &TimelineId,
+) -> RemotePath {
+    remote_timelines_path(tenant_shard_id).join(Utf8Path::new(&timeline_id.to_string()))
 }
 
+/// Note that the shard component of a remote layer path is _not_ always the same
+/// as in the TenantShardId of the caller: tenants may reference layers from a different
+/// ShardIndex.  Use the ShardIndex from the layer's metadata.
 pub fn remote_layer_path(
     tenant_id: &TenantId,
     timeline_id: &TimelineId,
+    shard: ShardIndex,
     layer_file_name: &LayerFileName,
     generation: Generation,
 ) -> RemotePath {
     // Generation-aware key format
     let path = format!(
-        "tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{0}{1}",
+        "tenants/{tenant_id}{0}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{1}{2}",
+        shard.get_suffix(),
         layer_file_name.file_name(),
         generation.get_suffix()
     );
@@ -1548,12 +1644,12 @@ pub fn remote_initdb_archive_path(tenant_id: &TenantId, timeline_id: &TimelineId
 }
 
 pub fn remote_index_path(
-    tenant_id: &TenantId,
+    tenant_shard_id: &TenantShardId,
     timeline_id: &TimelineId,
     generation: Generation,
 ) -> RemotePath {
     RemotePath::from_string(&format!(
-        "tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{0}{1}",
+        "tenants/{tenant_shard_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{0}{1}",
         IndexPart::FILE_NAME,
         generation.get_suffix()
     ))
@@ -1695,14 +1791,14 @@ mod tests {
             Arc::new(RemoteTimelineClient {
                 conf: self.harness.conf,
                 runtime: tokio::runtime::Handle::current(),
-                tenant_id: self.harness.tenant_id,
+                tenant_shard_id: self.harness.tenant_shard_id,
                 timeline_id: TIMELINE_ID,
                 generation,
                 storage_impl: self.harness.remote_storage.clone(),
                 deletion_queue_client: self.harness.deletion_queue.new_client(),
                 upload_queue: Mutex::new(UploadQueue::Uninitialized),
                 metrics: Arc::new(RemoteTimelineClientMetrics::new(
-                    &self.harness.tenant_id,
+                    &self.harness.tenant_shard_id,
                     &TIMELINE_ID,
                 )),
             })
@@ -1778,6 +1874,7 @@ mod tests {
         println!("remote_timeline_dir: {remote_timeline_dir}");
 
         let generation = harness.generation;
+        let shard = harness.shard;
 
         // Create a couple of dummy files,  schedule upload for them
 
@@ -1794,7 +1891,7 @@ mod tests {
                 harness.conf,
                 &timeline,
                 name,
-                LayerFileMetadata::new(contents.len() as u64, generation),
+                LayerFileMetadata::new(contents.len() as u64, generation, shard),
             )
         }).collect::<Vec<_>>();
 
@@ -1943,7 +2040,7 @@ mod tests {
             harness.conf,
             &timeline,
             layer_file_name_1.clone(),
-            LayerFileMetadata::new(content_1.len() as u64, harness.generation),
+            LayerFileMetadata::new(content_1.len() as u64, harness.generation, harness.shard),
         );
 
         #[derive(Debug, PartialEq, Clone, Copy)]
@@ -2029,7 +2126,12 @@ mod tests {
         std::fs::create_dir_all(remote_timeline_dir).expect("creating test dir should work");
 
         let index_path = test_state.harness.remote_fs_dir.join(
-            remote_index_path(&test_state.harness.tenant_id, &TIMELINE_ID, generation).get_path(),
+            remote_index_path(
+                &test_state.harness.tenant_shard_id,
+                &TIMELINE_ID,
+                generation,
+            )
+            .get_path(),
         );
         eprintln!("Writing {index_path}");
         std::fs::write(&index_path, index_part_bytes).unwrap();
diff --git a/pageserver/src/tenant/remote_timeline_client/download.rs b/pageserver/src/tenant/remote_timeline_client/download.rs
index 6039b01ab82a..3b5fe4b207b6 100644
--- a/pageserver/src/tenant/remote_timeline_client/download.rs
+++ b/pageserver/src/tenant/remote_timeline_client/download.rs
@@ -8,10 +8,12 @@ use std::future::Future;
 use std::time::Duration;
 
 use anyhow::{anyhow, Context};
-use camino::Utf8Path;
-use tokio::fs;
-use tokio::io::AsyncWriteExt;
+use camino::{Utf8Path, Utf8PathBuf};
+use pageserver_api::shard::TenantShardId;
+use tokio::fs::{self, File, OpenOptions};
+use tokio::io::{AsyncSeekExt, AsyncWriteExt};
 use tokio_util::sync::CancellationToken;
+use tracing::warn;
 use utils::{backoff, crashsafe};
 
 use crate::config::PageServerConf;
@@ -19,14 +21,15 @@ use crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_
 use crate::tenant::storage_layer::LayerFileName;
 use crate::tenant::timeline::span::debug_assert_current_span_has_tenant_and_timeline_id;
 use crate::tenant::Generation;
+use crate::TEMP_FILE_SUFFIX;
 use remote_storage::{DownloadError, GenericRemoteStorage, ListingMode};
 use utils::crashsafe::path_with_suffix_extension;
-use utils::id::{TenantId, TimelineId};
+use utils::id::TimelineId;
 
 use super::index::{IndexPart, LayerFileMetadata};
 use super::{
-    parse_remote_index_path, remote_index_path, FAILED_DOWNLOAD_WARN_THRESHOLD,
-    FAILED_REMOTE_OP_RETRIES,
+    parse_remote_index_path, remote_index_path, remote_initdb_archive_path,
+    FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES, INITDB_PATH,
 };
 
 static MAX_DOWNLOAD_DURATION: Duration = Duration::from_secs(120);
@@ -39,7 +42,7 @@ static MAX_DOWNLOAD_DURATION: Duration = Duration::from_secs(120);
 pub async fn download_layer_file<'a>(
     conf: &'static PageServerConf,
     storage: &'a GenericRemoteStorage,
-    tenant_id: TenantId,
+    tenant_shard_id: TenantShardId,
     timeline_id: TimelineId,
     layer_file_name: &'a LayerFileName,
     layer_metadata: &'a LayerFileMetadata,
@@ -47,12 +50,13 @@ pub async fn download_layer_file<'a>(
     debug_assert_current_span_has_tenant_and_timeline_id();
 
     let local_path = conf
-        .timeline_path(&tenant_id, &timeline_id)
+        .timeline_path(&tenant_shard_id, &timeline_id)
         .join(layer_file_name.file_name());
 
     let remote_path = remote_layer_path(
-        &tenant_id,
+        &tenant_shard_id.tenant_id,
         &timeline_id,
+        layer_metadata.shard,
         layer_file_name,
         layer_metadata.generation,
     );
@@ -169,10 +173,10 @@ pub fn is_temp_download_file(path: &Utf8Path) -> bool {
 /// List timelines of given tenant in remote storage
 pub async fn list_remote_timelines(
     storage: &GenericRemoteStorage,
-    tenant_id: TenantId,
+    tenant_shard_id: TenantShardId,
     cancel: CancellationToken,
 ) -> anyhow::Result<(HashSet<TimelineId>, HashSet<String>)> {
-    let remote_path = remote_timelines_path(&tenant_id);
+    let remote_path = remote_timelines_path(&tenant_shard_id);
 
     fail::fail_point!("storage-sync-list-remote-timelines", |_| {
         anyhow::bail!("storage-sync-list-remote-timelines");
@@ -180,7 +184,7 @@ pub async fn list_remote_timelines(
 
     let listing = download_retry_forever(
         || storage.list(Some(&remote_path), ListingMode::WithDelimiter),
-        &format!("list timelines for {tenant_id}"),
+        &format!("list timelines for {tenant_shard_id}"),
         cancel,
     )
     .await?;
@@ -190,7 +194,7 @@ pub async fn list_remote_timelines(
 
     for timeline_remote_storage_key in listing.prefixes {
         let object_name = timeline_remote_storage_key.object_name().ok_or_else(|| {
-            anyhow::anyhow!("failed to get timeline id for remote tenant {tenant_id}")
+            anyhow::anyhow!("failed to get timeline id for remote tenant {tenant_shard_id}")
         })?;
 
         match object_name.parse::<TimelineId>() {
@@ -211,12 +215,12 @@ pub async fn list_remote_timelines(
 
 async fn do_download_index_part(
     storage: &GenericRemoteStorage,
-    tenant_id: &TenantId,
+    tenant_shard_id: &TenantShardId,
     timeline_id: &TimelineId,
     index_generation: Generation,
     cancel: CancellationToken,
 ) -> Result<IndexPart, DownloadError> {
-    let remote_path = remote_index_path(tenant_id, timeline_id, index_generation);
+    let remote_path = remote_index_path(tenant_shard_id, timeline_id, index_generation);
 
     let index_part_bytes = download_retry_forever(
         || async {
@@ -252,7 +256,7 @@ async fn do_download_index_part(
 #[tracing::instrument(skip_all, fields(generation=?my_generation))]
 pub(super) async fn download_index_part(
     storage: &GenericRemoteStorage,
-    tenant_id: &TenantId,
+    tenant_shard_id: &TenantShardId,
     timeline_id: &TimelineId,
     my_generation: Generation,
     cancel: CancellationToken,
@@ -261,8 +265,14 @@ pub(super) async fn download_index_part(
 
     if my_generation.is_none() {
         // Operating without generations: just fetch the generation-less path
-        return do_download_index_part(storage, tenant_id, timeline_id, my_generation, cancel)
-            .await;
+        return do_download_index_part(
+            storage,
+            tenant_shard_id,
+            timeline_id,
+            my_generation,
+            cancel,
+        )
+        .await;
     }
 
     // Stale case: If we were intentionally attached in a stale generation, there may already be a remote
@@ -271,7 +281,7 @@ pub(super) async fn download_index_part(
     // This is an optimization to avoid doing the listing for the general case below.
     let res = do_download_index_part(
         storage,
-        tenant_id,
+        tenant_shard_id,
         timeline_id,
         my_generation,
         cancel.clone(),
@@ -298,7 +308,7 @@ pub(super) async fn download_index_part(
     // This is an optimization to avoid doing the listing for the general case below.
     let res = do_download_index_part(
         storage,
-        tenant_id,
+        tenant_shard_id,
         timeline_id,
         my_generation.previous(),
         cancel.clone(),
@@ -320,8 +330,9 @@ pub(super) async fn download_index_part(
     }
 
     // General case/fallback: if there is no index at my_generation or prev_generation, then list all index_part.json
-    // objects, and select the highest one with a generation <= my_generation.
-    let index_prefix = remote_index_path(tenant_id, timeline_id, Generation::none());
+    // objects, and select the highest one with a generation <= my_generation.  Constructing the prefix is equivalent
+    // to constructing a full index path with no generation, because the generation is a suffix.
+    let index_prefix = remote_index_path(tenant_shard_id, timeline_id, Generation::none());
     let indices = backoff::retry(
         || async { storage.list_files(Some(&index_prefix)).await },
         |_| false,
@@ -347,18 +358,87 @@ pub(super) async fn download_index_part(
     match max_previous_generation {
         Some(g) => {
             tracing::debug!("Found index_part in generation {g:?}");
-            do_download_index_part(storage, tenant_id, timeline_id, g, cancel).await
+            do_download_index_part(storage, tenant_shard_id, timeline_id, g, cancel).await
         }
         None => {
             // Migration from legacy pre-generation state: we have a generation but no prior
             // attached pageservers did.  Try to load from a no-generation path.
             tracing::info!("No index_part.json* found");
-            do_download_index_part(storage, tenant_id, timeline_id, Generation::none(), cancel)
-                .await
+            do_download_index_part(
+                storage,
+                tenant_shard_id,
+                timeline_id,
+                Generation::none(),
+                cancel,
+            )
+            .await
         }
     }
 }
 
+pub(crate) async fn download_initdb_tar_zst(
+    conf: &'static PageServerConf,
+    storage: &GenericRemoteStorage,
+    tenant_shard_id: &TenantShardId,
+    timeline_id: &TimelineId,
+) -> Result<(Utf8PathBuf, File), DownloadError> {
+    debug_assert_current_span_has_tenant_and_timeline_id();
+
+    let remote_path = remote_initdb_archive_path(&tenant_shard_id.tenant_id, timeline_id);
+
+    let timeline_path = conf.timelines_path(tenant_shard_id);
+
+    if !timeline_path.exists() {
+        tokio::fs::create_dir_all(&timeline_path)
+            .await
+            .with_context(|| format!("timeline dir creation {timeline_path}"))
+            .map_err(DownloadError::Other)?;
+    }
+    let temp_path = timeline_path.join(format!("{INITDB_PATH}-{timeline_id}.{TEMP_FILE_SUFFIX}"));
+
+    let file = download_retry(
+        || async {
+            let mut file = OpenOptions::new()
+                .create(true)
+                .truncate(true)
+                .read(true)
+                .write(true)
+                .open(&temp_path)
+                .await
+                .with_context(|| format!("tempfile creation {temp_path}"))
+                .map_err(DownloadError::Other)?;
+
+            let mut download = storage.download(&remote_path).await?;
+
+            tokio::io::copy(&mut download.download_stream, &mut file)
+                .await
+                .with_context(|| format!("download initdb.tar.zst at {remote_path:?}"))
+                .map_err(DownloadError::Other)?;
+
+            file.seek(std::io::SeekFrom::Start(0))
+                .await
+                .with_context(|| format!("rewinding initdb.tar.zst at: {remote_path:?}"))
+                .map_err(DownloadError::Other)?;
+
+            Ok(file)
+        },
+        &format!("download {remote_path}"),
+    )
+    .await
+    .map_err(|e| {
+        if temp_path.exists() {
+            // Do a best-effort attempt at deleting the temporary file upon encountering an error.
+            // We don't have async here nor do we want to pile on any extra errors.
+            if let Err(e) = std::fs::remove_file(&temp_path) {
+                warn!("error deleting temporary file {temp_path}: {e}");
+            }
+        }
+        e
+    })?;
+
+    Ok((temp_path, file))
+}
+
 /// Helper function to handle retries for a download operation.
 ///
 /// Remote operations can fail due to rate limits (IAM, S3), spurious network
diff --git a/pageserver/src/tenant/remote_timeline_client/index.rs b/pageserver/src/tenant/remote_timeline_client/index.rs
index 0d0b34365c9a..0abfdeef023e 100644
--- a/pageserver/src/tenant/remote_timeline_client/index.rs
+++ b/pageserver/src/tenant/remote_timeline_client/index.rs
@@ -12,6 +12,7 @@ use crate::tenant::metadata::TimelineMetadata;
 use crate::tenant::storage_layer::LayerFileName;
 use crate::tenant::upload_queue::UploadQueueInitialized;
 use crate::tenant::Generation;
+use pageserver_api::shard::ShardIndex;
 
 use utils::lsn::Lsn;
 
@@ -25,6 +26,8 @@ pub struct LayerFileMetadata {
     file_size: u64,
 
     pub(crate) generation: Generation,
+
+    pub(crate) shard: ShardIndex,
 }
 
 impl From<&'_ IndexLayerMetadata> for LayerFileMetadata {
@@ -32,15 +35,17 @@ impl From<&'_ IndexLayerMetadata> for LayerFileMetadata {
         LayerFileMetadata {
             file_size: other.file_size,
             generation: other.generation,
+            shard: other.shard,
         }
     }
 }
 
 impl LayerFileMetadata {
-    pub fn new(file_size: u64, generation: Generation) -> Self {
+    pub fn new(file_size: u64, generation: Generation, shard: ShardIndex) -> Self {
         LayerFileMetadata {
             file_size,
             generation,
+            shard,
         }
     }
 
@@ -161,6 +166,10 @@ pub struct IndexLayerMetadata {
     #[serde(default = "Generation::none")]
     #[serde(skip_serializing_if = "Generation::is_none")]
     pub generation: Generation,
+
+    #[serde(default = "ShardIndex::unsharded")]
+    #[serde(skip_serializing_if = "ShardIndex::is_unsharded")]
+    pub shard: ShardIndex,
 }
 
 impl From<LayerFileMetadata> for IndexLayerMetadata {
@@ -168,6 +177,7 @@ impl From<LayerFileMetadata> for IndexLayerMetadata {
         IndexLayerMetadata {
             file_size: other.file_size,
             generation: other.generation,
+            shard: other.shard,
         }
     }
 }
@@ -195,13 +205,15 @@ mod tests {
             layer_metadata: HashMap::from([
                 ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
                     file_size: 25600000,
-                    generation: Generation::none()
+                    generation: Generation::none(),
+                    shard: ShardIndex::unsharded()
                 }),
                 ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), IndexLayerMetadata {
                     // serde_json should always parse this but this might be a double with jq for
                     // example.
                     file_size: 9007199254741001,
-                    generation: Generation::none()
+                    generation: Generation::none(),
+                    shard: ShardIndex::unsharded()
                 })
             ]),
             disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
@@ -233,13 +245,15 @@ mod tests {
             layer_metadata: HashMap::from([
                 ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
                     file_size: 25600000,
-                    generation: Generation::none()
+                    generation: Generation::none(),
+                    shard: ShardIndex::unsharded()
                 }),
                 ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), IndexLayerMetadata {
                     // serde_json should always parse this but this might be a double with jq for
                     // example.
                     file_size: 9007199254741001,
-                    generation: Generation::none()
+                    generation: Generation::none(),
+                    shard: ShardIndex::unsharded()
                 })
             ]),
             disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
@@ -272,13 +286,15 @@ mod tests {
             layer_metadata: HashMap::from([
                 ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
                     file_size: 25600000,
-                    generation: Generation::none()
+                    generation: Generation::none(),
+                    shard: ShardIndex::unsharded()
                 }),
                 ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), IndexLayerMetadata {
                     // serde_json should always parse this but this might be a double with jq for
                     // example.
                     file_size: 9007199254741001,
-                    generation: Generation::none()
+                    generation: Generation::none(),
+                    shard: ShardIndex::unsharded()
                 })
             ]),
             disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
@@ -354,19 +370,21 @@ mod tests {
             layer_metadata: HashMap::from([
                 ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
                     file_size: 25600000,
-                    generation: Generation::none()
+                    generation: Generation::none(),
+                    shard: ShardIndex::unsharded()
                 }),
                 ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), IndexLayerMetadata {
                     // serde_json should always parse this but this might be a double with jq for
                     // example.
                     file_size: 9007199254741001,
-                    generation: Generation::none()
+                    generation: Generation::none(),
+                    shard: ShardIndex::unsharded()
                 })
             ]),
             disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
             metadata: TimelineMetadata::from_bytes(&[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]).unwrap(),
             deleted_at: Some(chrono::NaiveDateTime::parse_from_str(
-                "2023-07-31T09:00:00.123000000", "%Y-%m-%dT%H:%M:%S.%f").unwrap())
+                "2023-07-31T09:00:00.123000000", "%Y-%m-%dT%H:%M:%S.%f").unwrap()),
         };
 
         let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
diff --git a/pageserver/src/tenant/remote_timeline_client/upload.rs b/pageserver/src/tenant/remote_timeline_client/upload.rs
index 4d3e1731dc0f..4ca4438003a1 100644
--- a/pageserver/src/tenant/remote_timeline_client/upload.rs
+++ b/pageserver/src/tenant/remote_timeline_client/upload.rs
@@ -4,6 +4,7 @@ use anyhow::{bail, Context};
 use bytes::Bytes;
 use camino::Utf8Path;
 use fail::fail_point;
+use pageserver_api::shard::TenantShardId;
 use std::io::ErrorKind;
 use tokio::fs;
 
@@ -24,7 +25,7 @@ use tracing::info;
 /// Serializes and uploads the given index part data to the remote storage.
 pub(super) async fn upload_index_part<'a>(
     storage: &'a GenericRemoteStorage,
-    tenant_id: &TenantId,
+    tenant_shard_id: &TenantShardId,
     timeline_id: &TimelineId,
     generation: Generation,
     index_part: &'a IndexPart,
@@ -42,11 +43,11 @@ pub(super) async fn upload_index_part<'a>(
     let index_part_size = index_part_bytes.len();
     let index_part_bytes = tokio::io::BufReader::new(std::io::Cursor::new(index_part_bytes));
 
-    let remote_path = remote_index_path(tenant_id, timeline_id, generation);
+    let remote_path = remote_index_path(tenant_shard_id, timeline_id, generation);
     storage
         .upload_storage_object(Box::new(index_part_bytes), index_part_size, &remote_path)
         .await
-        .with_context(|| format!("upload index part for '{tenant_id} / {timeline_id}'"))
+        .with_context(|| format!("upload index part for '{tenant_shard_id} / {timeline_id}'"))
 }
 
 /// Attempts to upload given layer files.
diff --git a/pageserver/src/tenant/storage_layer.rs b/pageserver/src/tenant/storage_layer.rs
index 3b2a61dcbaa1..944e05883f5f 100644
--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -2,7 +2,7 @@
 
 pub mod delta_layer;
 mod filename;
-mod image_layer;
+pub mod image_layer;
 mod inmemory_layer;
 mod layer;
 mod layer_desc;
@@ -24,10 +24,7 @@ use tracing::warn;
 use utils::history_buffer::HistoryBufferWithDropCounter;
 use utils::rate_limit::RateLimit;
 
-use utils::{
-    id::{TenantId, TimelineId},
-    lsn::Lsn,
-};
+use utils::{id::TimelineId, lsn::Lsn};
 
 pub use delta_layer::{DeltaLayer, DeltaLayerWriter, ValueRef};
 pub use filename::{DeltaFileName, ImageFileName, LayerFileName};
@@ -304,12 +301,14 @@ pub trait AsLayerDesc {
 }
 
 pub mod tests {
+    use pageserver_api::shard::TenantShardId;
+
     use super::*;
 
     impl From<DeltaFileName> for PersistentLayerDesc {
         fn from(value: DeltaFileName) -> Self {
             PersistentLayerDesc::new_delta(
-                TenantId::from_array([0; 16]),
+                TenantShardId::from([0; 18]),
                 TimelineId::from_array([0; 16]),
                 value.key_range,
                 value.lsn_range,
@@ -321,7 +320,7 @@ pub mod tests {
     impl From<ImageFileName> for PersistentLayerDesc {
         fn from(value: ImageFileName) -> Self {
             PersistentLayerDesc::new_img(
-                TenantId::from_array([0; 16]),
+                TenantShardId::from([0; 18]),
                 TimelineId::from_array([0; 16]),
                 value.key_range,
                 value.lsn,
diff --git a/pageserver/src/tenant/storage_layer/delta_layer.rs b/pageserver/src/tenant/storage_layer/delta_layer.rs
index 79f37dcb2d50..d33920412763 100644
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -42,6 +42,7 @@ use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION};
 use anyhow::{bail, ensure, Context, Result};
 use camino::{Utf8Path, Utf8PathBuf};
 use pageserver_api::models::LayerAccessKind;
+use pageserver_api::shard::TenantShardId;
 use rand::{distributions::Alphanumeric, Rng};
 use serde::{Deserialize, Serialize};
 use std::fs::File;
@@ -69,13 +70,13 @@ use super::{AsLayerDesc, LayerAccessStats, PersistentLayerDesc, ResidentLayer};
 #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
 pub struct Summary {
     /// Magic value to identify this as a neon delta file. Always DELTA_FILE_MAGIC.
-    magic: u16,
-    format_version: u16,
+    pub magic: u16,
+    pub format_version: u16,
 
-    tenant_id: TenantId,
-    timeline_id: TimelineId,
-    key_range: Range<Key>,
-    lsn_range: Range<Lsn>,
+    pub tenant_id: TenantId,
+    pub timeline_id: TimelineId,
+    pub key_range: Range<Key>,
+    pub lsn_range: Range<Lsn>,
 
     /// Block number where the 'index' part of the file begins.
     pub index_start_blk: u32,
@@ -86,7 +87,7 @@ pub struct Summary {
 impl From<&DeltaLayer> for Summary {
     fn from(layer: &DeltaLayer) -> Self {
         Self::expected(
-            layer.desc.tenant_id,
+            layer.desc.tenant_shard_id.tenant_id,
             layer.desc.timeline_id,
             layer.desc.key_range.clone(),
             layer.desc.lsn_range.clone(),
@@ -248,7 +249,7 @@ impl DeltaLayer {
 
     fn temp_path_for(
         conf: &PageServerConf,
-        tenant_id: &TenantId,
+        tenant_shard_id: &TenantShardId,
         timeline_id: &TimelineId,
         key_start: Key,
         lsn_range: &Range<Lsn>,
@@ -259,14 +260,15 @@ impl DeltaLayer {
             .map(char::from)
             .collect();
 
-        conf.timeline_path(tenant_id, timeline_id).join(format!(
-            "{}-XXX__{:016X}-{:016X}.{}.{}",
-            key_start,
-            u64::from(lsn_range.start),
-            u64::from(lsn_range.end),
-            rand_string,
-            TEMP_FILE_SUFFIX,
-        ))
+        conf.timeline_path(tenant_shard_id, timeline_id)
+            .join(format!(
+                "{}-XXX__{:016X}-{:016X}.{}.{}",
+                key_start,
+                u64::from(lsn_range.start),
+                u64::from(lsn_range.end),
+                rand_string,
+                TEMP_FILE_SUFFIX,
+            ))
     }
 
     ///
@@ -318,10 +320,14 @@ impl DeltaLayer {
             .metadata()
             .context("get file metadata to determine size")?;
 
+        // TODO(sharding): we must get the TenantShardId from the path instead of reading the Summary.
+        // we should also validate the path against the Summary, as both should contain the same tenant, timeline, key, lsn.
+        let tenant_shard_id = TenantShardId::unsharded(summary.tenant_id);
+
         Ok(DeltaLayer {
             path: path.to_path_buf(),
             desc: PersistentLayerDesc::new_delta(
-                summary.tenant_id,
+                tenant_shard_id,
                 summary.timeline_id,
                 summary.key_range,
                 summary.lsn_range,
@@ -353,7 +359,7 @@ struct DeltaLayerWriterInner {
     conf: &'static PageServerConf,
     pub path: Utf8PathBuf,
     timeline_id: TimelineId,
-    tenant_id: TenantId,
+    tenant_shard_id: TenantShardId,
 
     key_start: Key,
     lsn_range: Range<Lsn>,
@@ -370,7 +376,7 @@ impl DeltaLayerWriterInner {
     async fn new(
         conf: &'static PageServerConf,
         timeline_id: TimelineId,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         key_start: Key,
         lsn_range: Range<Lsn>,
     ) -> anyhow::Result<Self> {
@@ -380,7 +386,8 @@ impl DeltaLayerWriterInner {
         //
         // Note: This overwrites any existing file. There shouldn't be any.
         // FIXME: throw an error instead?
-        let path = DeltaLayer::temp_path_for(conf, &tenant_id, &timeline_id, key_start, &lsn_range);
+        let path =
+            DeltaLayer::temp_path_for(conf, &tenant_shard_id, &timeline_id, key_start, &lsn_range);
 
         let mut file = VirtualFile::create(&path).await?;
         // make room for the header block
@@ -395,7 +402,7 @@ impl DeltaLayerWriterInner {
             conf,
             path,
             timeline_id,
-            tenant_id,
+            tenant_shard_id,
             key_start,
             lsn_range,
             tree: tree_builder,
@@ -457,7 +464,7 @@ impl DeltaLayerWriterInner {
         let summary = Summary {
             magic: DELTA_FILE_MAGIC,
             format_version: STORAGE_FORMAT_VERSION,
-            tenant_id: self.tenant_id,
+            tenant_id: self.tenant_shard_id.tenant_id,
             timeline_id: self.timeline_id,
             key_range: self.key_start..key_end,
             lsn_range: self.lsn_range.clone(),
@@ -498,7 +505,7 @@ impl DeltaLayerWriterInner {
         // set inner.file here. The first read will have to re-open it.
 
         let desc = PersistentLayerDesc::new_delta(
-            self.tenant_id,
+            self.tenant_shard_id,
             self.timeline_id,
             self.key_start..key_end,
             self.lsn_range.clone(),
@@ -549,14 +556,20 @@ impl DeltaLayerWriter {
     pub async fn new(
         conf: &'static PageServerConf,
         timeline_id: TimelineId,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         key_start: Key,
         lsn_range: Range<Lsn>,
     ) -> anyhow::Result<Self> {
         Ok(Self {
             inner: Some(
-                DeltaLayerWriterInner::new(conf, timeline_id, tenant_id, key_start, lsn_range)
-                    .await?,
+                DeltaLayerWriterInner::new(
+                    conf,
+                    timeline_id,
+                    tenant_shard_id,
+                    key_start,
+                    lsn_range,
+                )
+                .await?,
             ),
         })
     }
@@ -611,6 +624,61 @@ impl Drop for DeltaLayerWriter {
     }
 }
 
+#[derive(thiserror::Error, Debug)]
+pub enum RewriteSummaryError {
+    #[error("magic mismatch")]
+    MagicMismatch,
+    #[error(transparent)]
+    Other(#[from] anyhow::Error),
+}
+
+impl From<std::io::Error> for RewriteSummaryError {
+    fn from(e: std::io::Error) -> Self {
+        Self::Other(anyhow::anyhow!(e))
+    }
+}
+
+impl DeltaLayer {
+    pub async fn rewrite_summary<F>(
+        path: &Utf8Path,
+        rewrite: F,
+        ctx: &RequestContext,
+    ) -> Result<(), RewriteSummaryError>
+    where
+        F: Fn(Summary) -> Summary,
+    {
+        let file = VirtualFile::open_with_options(
+            path,
+            &*std::fs::OpenOptions::new().read(true).write(true),
+        )
+        .await
+        .with_context(|| format!("Failed to open file '{}'", path))?;
+        let file = FileBlockReader::new(file);
+        let summary_blk = file.read_blk(0, ctx).await?;
+        let actual_summary = Summary::des_prefix(summary_blk.as_ref()).context("deserialize")?;
+        let mut file = file.file;
+        if actual_summary.magic != DELTA_FILE_MAGIC {
+            return Err(RewriteSummaryError::MagicMismatch);
+        }
+
+        let new_summary = rewrite(actual_summary);
+
+        let mut buf = smallvec::SmallVec::<[u8; PAGE_SZ]>::new();
+        Summary::ser_into(&new_summary, &mut buf).context("serialize")?;
+        if buf.spilled() {
+            // The code in DeltaLayerWriterInner just warn!()s for this.
+            // It should probably error out as well.
+            return Err(RewriteSummaryError::Other(anyhow::anyhow!(
+                "Used more than one page size for summary buffer: {}",
+                buf.len()
+            )));
+        }
+        file.seek(SeekFrom::Start(0)).await?;
+        file.write_all(&buf).await?;
+        Ok(())
+    }
+}
+
 impl DeltaLayerInner {
     /// Returns nested result following Result<Result<_, OpErr>, Critical>:
     /// - inner has the success or transient failure
diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs
index c38a9f6883a8..023122c0b1b4 100644
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -41,6 +41,7 @@ use bytes::Bytes;
 use camino::{Utf8Path, Utf8PathBuf};
 use hex;
 use pageserver_api::models::LayerAccessKind;
+use pageserver_api::shard::TenantShardId;
 use rand::{distributions::Alphanumeric, Rng};
 use serde::{Deserialize, Serialize};
 use std::fs::File;
@@ -67,27 +68,27 @@ use super::{AsLayerDesc, Layer, PersistentLayerDesc, ResidentLayer};
 /// the 'index' starts at the block indicated by 'index_start_blk'
 ///
 #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
-pub(super) struct Summary {
+pub struct Summary {
     /// Magic value to identify this as a neon image file. Always IMAGE_FILE_MAGIC.
-    magic: u16,
-    format_version: u16,
+    pub magic: u16,
+    pub format_version: u16,
 
-    tenant_id: TenantId,
-    timeline_id: TimelineId,
-    key_range: Range<Key>,
-    lsn: Lsn,
+    pub tenant_id: TenantId,
+    pub timeline_id: TimelineId,
+    pub key_range: Range<Key>,
+    pub lsn: Lsn,
 
     /// Block number where the 'index' part of the file begins.
-    index_start_blk: u32,
+    pub index_start_blk: u32,
     /// Block within the 'index', where the B-tree root page is stored
-    index_root_blk: u32,
+    pub index_root_blk: u32,
     // the 'values' part starts after the summary header, on block 1.
 }
 
 impl From<&ImageLayer> for Summary {
     fn from(layer: &ImageLayer) -> Self {
         Self::expected(
-            layer.desc.tenant_id,
+            layer.desc.tenant_shard_id.tenant_id,
             layer.desc.timeline_id,
             layer.desc.key_range.clone(),
             layer.lsn,
@@ -217,7 +218,7 @@ impl ImageLayer {
     fn temp_path_for(
         conf: &PageServerConf,
         timeline_id: TimelineId,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         fname: &ImageFileName,
     ) -> Utf8PathBuf {
         let rand_string: String = rand::thread_rng()
@@ -226,7 +227,7 @@ impl ImageLayer {
             .map(char::from)
             .collect();
 
-        conf.timeline_path(&tenant_id, &timeline_id)
+        conf.timeline_path(&tenant_shard_id, &timeline_id)
             .join(format!("{fname}.{rand_string}.{TEMP_FILE_SUFFIX}"))
     }
 
@@ -276,10 +277,15 @@ impl ImageLayer {
         let metadata = file
             .metadata()
             .context("get file metadata to determine size")?;
+
+        // TODO(sharding): we should get TenantShardId from path.
+        // OR, not at all: any layer we load from disk should also get reconciled with remote IndexPart.
+        let tenant_shard_id = TenantShardId::unsharded(summary.tenant_id);
+
         Ok(ImageLayer {
             path: path.to_path_buf(),
             desc: PersistentLayerDesc::new_img(
-                summary.tenant_id,
+                tenant_shard_id,
                 summary.timeline_id,
                 summary.key_range,
                 summary.lsn,
@@ -296,6 +302,61 @@ impl ImageLayer {
     }
 }
 
+#[derive(thiserror::Error, Debug)]
+pub enum RewriteSummaryError {
+    #[error("magic mismatch")]
+    MagicMismatch,
+    #[error(transparent)]
+    Other(#[from] anyhow::Error),
+}
+
+impl From<std::io::Error> for RewriteSummaryError {
+    fn from(e: std::io::Error) -> Self {
+        Self::Other(anyhow::anyhow!(e))
+    }
+}
+
+impl ImageLayer {
+    pub async fn rewrite_summary<F>(
+        path: &Utf8Path,
+        rewrite: F,
+        ctx: &RequestContext,
+    ) -> Result<(), RewriteSummaryError>
+    where
+        F: Fn(Summary) -> Summary,
+    {
+        let file = VirtualFile::open_with_options(
+            path,
+            &*std::fs::OpenOptions::new().read(true).write(true),
+        )
+        .await
+        .with_context(|| format!("Failed to open file '{}'", path))?;
+        let file = FileBlockReader::new(file);
+        let summary_blk = file.read_blk(0, ctx).await?;
+        let actual_summary = Summary::des_prefix(summary_blk.as_ref()).context("deserialize")?;
+        let mut file = file.file;
+        if actual_summary.magic != IMAGE_FILE_MAGIC {
+            return Err(RewriteSummaryError::MagicMismatch);
+        }
+
+        let new_summary = rewrite(actual_summary);
+
+        let mut buf = smallvec::SmallVec::<[u8; PAGE_SZ]>::new();
+        Summary::ser_into(&new_summary, &mut buf).context("serialize")?;
+        if buf.spilled() {
+            // The code in ImageLayerWriterInner just warn!()s for this.
+            // It should probably error out as well.
+            return Err(RewriteSummaryError::Other(anyhow::anyhow!(
+                "Used more than one page size for summary buffer: {}",
+                buf.len()
+            )));
+        }
+        file.seek(SeekFrom::Start(0)).await?;
+        file.write_all(&buf).await?;
+        Ok(())
+    }
+}
+
 impl ImageLayerInner {
     /// Returns nested result following Result<Result<_, OpErr>, Critical>:
     /// - inner has the success or transient failure
@@ -400,7 +461,7 @@ struct ImageLayerWriterInner {
     conf: &'static PageServerConf,
     path: Utf8PathBuf,
     timeline_id: TimelineId,
-    tenant_id: TenantId,
+    tenant_shard_id: TenantShardId,
     key_range: Range<Key>,
     lsn: Lsn,
 
@@ -415,7 +476,7 @@ impl ImageLayerWriterInner {
     async fn new(
         conf: &'static PageServerConf,
         timeline_id: TimelineId,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         key_range: &Range<Key>,
         lsn: Lsn,
     ) -> anyhow::Result<Self> {
@@ -424,7 +485,7 @@ impl ImageLayerWriterInner {
         let path = ImageLayer::temp_path_for(
             conf,
             timeline_id,
-            tenant_id,
+            tenant_shard_id,
             &ImageFileName {
                 key_range: key_range.clone(),
                 lsn,
@@ -448,7 +509,7 @@ impl ImageLayerWriterInner {
             conf,
             path,
             timeline_id,
-            tenant_id,
+            tenant_shard_id,
             key_range: key_range.clone(),
             lsn,
             tree: tree_builder,
@@ -495,7 +556,7 @@ impl ImageLayerWriterInner {
         let summary = Summary {
             magic: IMAGE_FILE_MAGIC,
             format_version: STORAGE_FORMAT_VERSION,
-            tenant_id: self.tenant_id,
+            tenant_id: self.tenant_shard_id.tenant_id,
             timeline_id: self.timeline_id,
             key_range: self.key_range.clone(),
             lsn: self.lsn,
@@ -521,7 +582,7 @@ impl ImageLayerWriterInner {
             .context("get metadata to determine file size")?;
 
         let desc = PersistentLayerDesc::new_img(
-            self.tenant_id,
+            self.tenant_shard_id,
             self.timeline_id,
             self.key_range.clone(),
             self.lsn,
@@ -577,13 +638,14 @@ impl ImageLayerWriter {
     pub async fn new(
         conf: &'static PageServerConf,
         timeline_id: TimelineId,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         key_range: &Range<Key>,
         lsn: Lsn,
     ) -> anyhow::Result<ImageLayerWriter> {
         Ok(Self {
             inner: Some(
-                ImageLayerWriterInner::new(conf, timeline_id, tenant_id, key_range, lsn).await?,
+                ImageLayerWriterInner::new(conf, timeline_id, tenant_shard_id, key_range, lsn)
+                    .await?,
             ),
         })
     }
diff --git a/pageserver/src/tenant/storage_layer/inmemory_layer.rs b/pageserver/src/tenant/storage_layer/inmemory_layer.rs
index 2cb1e55b2606..003cf0e92b5e 100644
--- a/pageserver/src/tenant/storage_layer/inmemory_layer.rs
+++ b/pageserver/src/tenant/storage_layer/inmemory_layer.rs
@@ -14,15 +14,11 @@ use crate::tenant::Timeline;
 use crate::walrecord;
 use anyhow::{ensure, Result};
 use pageserver_api::models::InMemoryLayerInfo;
+use pageserver_api::shard::TenantShardId;
 use std::collections::HashMap;
 use std::sync::{Arc, OnceLock};
 use tracing::*;
-use utils::{
-    bin_ser::BeSer,
-    id::{TenantId, TimelineId},
-    lsn::Lsn,
-    vec_map::VecMap,
-};
+use utils::{bin_ser::BeSer, id::TimelineId, lsn::Lsn, vec_map::VecMap};
 // avoid binding to Write (conflicts with std::io::Write)
 // while being able to use std::fmt::Write's methods
 use std::fmt::Write as _;
@@ -33,7 +29,7 @@ use super::{DeltaLayerWriter, ResidentLayer};
 
 pub struct InMemoryLayer {
     conf: &'static PageServerConf,
-    tenant_id: TenantId,
+    tenant_shard_id: TenantShardId,
     timeline_id: TimelineId,
 
     /// This layer contains all the changes from 'start_lsn'. The
@@ -226,17 +222,17 @@ impl InMemoryLayer {
     pub async fn create(
         conf: &'static PageServerConf,
         timeline_id: TimelineId,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         start_lsn: Lsn,
     ) -> Result<InMemoryLayer> {
         trace!("initializing new empty InMemoryLayer for writing on timeline {timeline_id} at {start_lsn}");
 
-        let file = EphemeralFile::create(conf, tenant_id, timeline_id).await?;
+        let file = EphemeralFile::create(conf, tenant_shard_id, timeline_id).await?;
 
         Ok(InMemoryLayer {
             conf,
             timeline_id,
-            tenant_id,
+            tenant_shard_id,
             start_lsn,
             end_lsn: OnceLock::new(),
             inner: RwLock::new(InMemoryLayerInner {
@@ -335,7 +331,7 @@ impl InMemoryLayer {
         let mut delta_layer_writer = DeltaLayerWriter::new(
             self.conf,
             self.timeline_id,
-            self.tenant_id,
+            self.tenant_shard_id,
             Key::MIN,
             self.start_lsn..end_lsn,
         )
diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs
index f28f1c9444f5..3ed4e05beaba 100644
--- a/pageserver/src/tenant/storage_layer/layer.rs
+++ b/pageserver/src/tenant/storage_layer/layer.rs
@@ -3,6 +3,7 @@ use camino::{Utf8Path, Utf8PathBuf};
 use pageserver_api::models::{
     HistoricLayerInfo, LayerAccessKind, LayerResidenceEventReason, LayerResidenceStatus,
 };
+use pageserver_api::shard::ShardIndex;
 use std::ops::Range;
 use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
 use std::sync::{Arc, Weak};
@@ -81,7 +82,7 @@ impl Layer {
         metadata: LayerFileMetadata,
     ) -> Self {
         let desc = PersistentLayerDesc::from_filename(
-            timeline.tenant_id,
+            timeline.tenant_shard_id,
             timeline.timeline_id,
             file_name,
             metadata.file_size(),
@@ -96,6 +97,7 @@ impl Layer {
             desc,
             None,
             metadata.generation,
+            metadata.shard,
         )));
 
         debug_assert!(owner.0.needs_download_blocking().unwrap().is_some());
@@ -111,7 +113,7 @@ impl Layer {
         metadata: LayerFileMetadata,
     ) -> ResidentLayer {
         let desc = PersistentLayerDesc::from_filename(
-            timeline.tenant_id,
+            timeline.tenant_shard_id,
             timeline.timeline_id,
             file_name,
             metadata.file_size(),
@@ -136,6 +138,7 @@ impl Layer {
                 desc,
                 Some(inner),
                 metadata.generation,
+                metadata.shard,
             )
         }));
 
@@ -179,6 +182,7 @@ impl Layer {
                 desc,
                 Some(inner),
                 timeline.generation,
+                timeline.get_shard_index(),
             )
         }));
 
@@ -322,6 +326,24 @@ impl Layer {
 
         Ok(())
     }
+
+    /// Waits until this layer has been dropped (and if needed, local garbage collection and remote
+    /// deletion scheduling has completed).
+    ///
+    /// Does not start garbage collection, use [`Self::garbage_collect_on_drop`] for that
+    /// separatedly.
+    #[cfg(feature = "testing")]
+    pub(crate) fn wait_drop(&self) -> impl std::future::Future<Output = ()> + 'static {
+        let mut rx = self.0.status.subscribe();
+
+        async move {
+            loop {
+                if let Err(tokio::sync::broadcast::error::RecvError::Closed) = rx.recv().await {
+                    break;
+                }
+            }
+        }
+    }
 }
 
 /// The download-ness ([`DownloadedLayer`]) can be either resident or wanted evicted.
@@ -426,6 +448,15 @@ struct LayerInner {
     /// For loaded layers (resident or evicted) this comes from [`LayerFileMetadata::generation`],
     /// for created layers from [`Timeline::generation`].
     generation: Generation,
+
+    /// The shard of this Layer.
+    ///
+    /// For layers created in this process, this will always be the [`ShardIndex`] of the
+    /// current `ShardIdentity`` (TODO: add link once it's introduced).
+    ///
+    /// For loaded layers, this may be some other value if the tenant has undergone
+    /// a shard split since the layer was originally written.
+    shard: ShardIndex,
 }
 
 impl std::fmt::Display for LayerInner {
@@ -455,17 +486,21 @@ impl Drop for LayerInner {
             return;
         }
 
-        let span = tracing::info_span!(parent: None, "layer_gc", tenant_id = %self.layer_desc().tenant_id, timeline_id = %self.layer_desc().timeline_id);
+        let span = tracing::info_span!(parent: None, "layer_gc", tenant_id = %self.layer_desc().tenant_shard_id.tenant_id, shard_id=%self.layer_desc().tenant_shard_id.shard_slug(), timeline_id = %self.layer_desc().timeline_id);
 
         let path = std::mem::take(&mut self.path);
         let file_name = self.layer_desc().filename();
-        let gen = self.generation;
         let file_size = self.layer_desc().file_size;
         let timeline = self.timeline.clone();
+        let meta = self.metadata();
+        let status = self.status.clone();
 
         crate::task_mgr::BACKGROUND_RUNTIME.spawn_blocking(move || {
             let _g = span.entered();
 
+            // carry this until we are finished for [`Layer::wait_drop`] support
+            let _status = status;
+
             let removed = match std::fs::remove_file(path) {
                 Ok(()) => true,
                 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
@@ -489,7 +524,7 @@ impl Drop for LayerInner {
                     timeline.metrics.resident_physical_size_sub(file_size);
                 }
                 if let Some(remote_client) = timeline.remote_client.as_ref() {
-                    let res = remote_client.schedule_deletion_of_unlinked(vec![(file_name, gen)]);
+                    let res = remote_client.schedule_deletion_of_unlinked(vec![(file_name, meta)]);
 
                     if let Err(e) = res {
                         // test_timeline_deletion_with_files_stuck_in_upload_queue is good at
@@ -523,9 +558,10 @@ impl LayerInner {
         desc: PersistentLayerDesc,
         downloaded: Option<Arc<DownloadedLayer>>,
         generation: Generation,
+        shard: ShardIndex,
     ) -> Self {
         let path = conf
-            .timeline_path(&timeline.tenant_id, &timeline.timeline_id)
+            .timeline_path(&timeline.tenant_shard_id, &timeline.timeline_id)
             .join(desc.filename().to_string());
 
         let (inner, version) = if let Some(inner) = downloaded {
@@ -550,6 +586,7 @@ impl LayerInner {
             status: tokio::sync::broadcast::channel(1).0,
             consecutive_failures: AtomicUsize::new(0),
             generation,
+            shard,
         }
     }
 
@@ -795,7 +832,7 @@ impl LayerInner {
         crate::task_mgr::spawn(
             &tokio::runtime::Handle::current(),
             crate::task_mgr::TaskKind::RemoteDownloadTask,
-            Some(self.desc.tenant_id),
+            Some(self.desc.tenant_shard_id.tenant_id),
             Some(self.desc.timeline_id),
             &task_name,
             false,
@@ -960,7 +997,7 @@ impl LayerInner {
         if gc {
             // do nothing now, only in LayerInner::drop
         } else if can_evict && evict {
-            let span = tracing::info_span!(parent: None, "layer_evict", tenant_id = %self.desc.tenant_id, timeline_id = %self.desc.timeline_id, layer=%self, %version);
+            let span = tracing::info_span!(parent: None, "layer_evict", tenant_id = %self.desc.tenant_shard_id.tenant_id, shard_id = %self.desc.tenant_shard_id.shard_slug(), timeline_id = %self.desc.timeline_id, layer=%self, %version);
 
             // downgrade for queueing, in case there's a tear down already ongoing we should not
             // hold it alive.
@@ -1077,7 +1114,7 @@ impl LayerInner {
     }
 
     fn metadata(&self) -> LayerFileMetadata {
-        LayerFileMetadata::new(self.desc.file_size, self.generation)
+        LayerFileMetadata::new(self.desc.file_size, self.generation, self.shard)
     }
 }
 
@@ -1192,7 +1229,7 @@ impl DownloadedLayer {
 
             let res = if owner.desc.is_delta {
                 let summary = Some(delta_layer::Summary::expected(
-                    owner.desc.tenant_id,
+                    owner.desc.tenant_shard_id.tenant_id,
                     owner.desc.timeline_id,
                     owner.desc.key_range.clone(),
                     owner.desc.lsn_range.clone(),
@@ -1203,7 +1240,7 @@ impl DownloadedLayer {
             } else {
                 let lsn = owner.desc.image_layer_lsn();
                 let summary = Some(image_layer::Summary::expected(
-                    owner.desc.tenant_id,
+                    owner.desc.tenant_shard_id.tenant_id,
                     owner.desc.timeline_id,
                     owner.desc.key_range.clone(),
                     lsn,
@@ -1401,6 +1438,7 @@ impl Default for LayerImplMetrics {
         )
         .unwrap();
 
+        // reminder: this will be pageserver_layer_gcs_count_total with "_total" suffix
         let gcs = metrics::register_int_counter_vec!(
             "pageserver_layer_gcs_count",
             "Garbage collections started and completed in the Layer implementation",
diff --git a/pageserver/src/tenant/storage_layer/layer_desc.rs b/pageserver/src/tenant/storage_layer/layer_desc.rs
index 2e0b0b3e645c..bf24407fc582 100644
--- a/pageserver/src/tenant/storage_layer/layer_desc.rs
+++ b/pageserver/src/tenant/storage_layer/layer_desc.rs
@@ -1,9 +1,7 @@
 use core::fmt::Display;
+use pageserver_api::shard::TenantShardId;
 use std::ops::Range;
-use utils::{
-    id::{TenantId, TimelineId},
-    lsn::Lsn,
-};
+use utils::{id::TimelineId, lsn::Lsn};
 
 use crate::repository::Key;
 
@@ -11,12 +9,15 @@ use super::{DeltaFileName, ImageFileName, LayerFileName};
 
 use serde::{Deserialize, Serialize};
 
+#[cfg(test)]
+use utils::id::TenantId;
+
 /// A unique identifier of a persistent layer. This is different from `LayerDescriptor`, which is only used in the
 /// benchmarks. This struct contains all necessary information to find the image / delta layer. It also provides
 /// a unified way to generate layer information like file name.
 #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
 pub struct PersistentLayerDesc {
-    pub tenant_id: TenantId,
+    pub tenant_shard_id: TenantShardId,
     pub timeline_id: TimelineId,
     /// Range of keys that this layer covers
     pub key_range: Range<Key>,
@@ -56,7 +57,7 @@ impl PersistentLayerDesc {
     #[cfg(test)]
     pub fn new_test(key_range: Range<Key>) -> Self {
         Self {
-            tenant_id: TenantId::generate(),
+            tenant_shard_id: TenantShardId::unsharded(TenantId::generate()),
             timeline_id: TimelineId::generate(),
             key_range,
             lsn_range: Lsn(0)..Lsn(1),
@@ -66,14 +67,14 @@ impl PersistentLayerDesc {
     }
 
     pub fn new_img(
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         timeline_id: TimelineId,
         key_range: Range<Key>,
         lsn: Lsn,
         file_size: u64,
     ) -> Self {
         Self {
-            tenant_id,
+            tenant_shard_id,
             timeline_id,
             key_range,
             lsn_range: Self::image_layer_lsn_range(lsn),
@@ -83,14 +84,14 @@ impl PersistentLayerDesc {
     }
 
     pub fn new_delta(
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         timeline_id: TimelineId,
         key_range: Range<Key>,
         lsn_range: Range<Lsn>,
         file_size: u64,
     ) -> Self {
         Self {
-            tenant_id,
+            tenant_shard_id,
             timeline_id,
             key_range,
             lsn_range,
@@ -100,18 +101,22 @@ impl PersistentLayerDesc {
     }
 
     pub fn from_filename(
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         timeline_id: TimelineId,
         filename: LayerFileName,
         file_size: u64,
     ) -> Self {
         match filename {
             LayerFileName::Image(i) => {
-                Self::new_img(tenant_id, timeline_id, i.key_range, i.lsn, file_size)
-            }
-            LayerFileName::Delta(d) => {
-                Self::new_delta(tenant_id, timeline_id, d.key_range, d.lsn_range, file_size)
+                Self::new_img(tenant_shard_id, timeline_id, i.key_range, i.lsn, file_size)
             }
+            LayerFileName::Delta(d) => Self::new_delta(
+                tenant_shard_id,
+                timeline_id,
+                d.key_range,
+                d.lsn_range,
+                file_size,
+            ),
         }
     }
 
@@ -172,10 +177,6 @@ impl PersistentLayerDesc {
         self.timeline_id
     }
 
-    pub fn get_tenant_id(&self) -> TenantId {
-        self.tenant_id
-    }
-
     /// Does this layer only contain some data for the key-range (incremental),
     /// or does it contain a version of every page? This is important to know
     /// for garbage collecting old layers: an incremental layer depends on
@@ -192,7 +193,7 @@ impl PersistentLayerDesc {
         if self.is_delta {
             println!(
                 "----- delta layer for ten {} tli {} keys {}-{} lsn {}-{} is_incremental {} size {} ----",
-                self.tenant_id,
+                self.tenant_shard_id,
                 self.timeline_id,
                 self.key_range.start,
                 self.key_range.end,
@@ -204,7 +205,7 @@ impl PersistentLayerDesc {
         } else {
             println!(
                 "----- image layer for ten {} tli {} key {}-{} at {} is_incremental {} size {} ----",
-                self.tenant_id,
+                self.tenant_shard_id,
                 self.timeline_id,
                 self.key_range.start,
                 self.key_range.end,
diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs
index 860bb255ca43..138578ec8ae1 100644
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -86,7 +86,7 @@ pub fn start_background_loops(
     tenant: &Arc<Tenant>,
     background_jobs_can_start: Option<&completion::Barrier>,
 ) {
-    let tenant_id = tenant.tenant_id;
+    let tenant_id = tenant.tenant_shard_id.tenant_id;
     task_mgr::spawn(
         BACKGROUND_RUNTIME.handle(),
         TaskKind::Compaction,
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index 9493ed1c9a33..bf4e19e5fbaa 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -2,7 +2,7 @@ pub mod delete;
 mod eviction_task;
 mod init;
 pub mod layer_manager;
-mod logical_size;
+pub(crate) mod logical_size;
 pub mod span;
 pub mod uninit;
 mod walreceiver;
@@ -13,8 +13,12 @@ use camino::{Utf8Path, Utf8PathBuf};
 use enumset::EnumSet;
 use fail::fail_point;
 use itertools::Itertools;
-use pageserver_api::models::{
-    DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, LayerMapInfo, TimelineState,
+use pageserver_api::{
+    models::{
+        DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, LayerMapInfo,
+        TimelineState,
+    },
+    shard::TenantShardId,
 };
 use serde_with::serde_as;
 use storage_broker::BrokerClientChannel;
@@ -62,6 +66,7 @@ use crate::pgdatadir_mapping::{is_rel_fsm_block_key, is_rel_vm_block_key};
 use crate::pgdatadir_mapping::{BlockNumber, CalculateLogicalSizeError};
 use crate::tenant::config::{EvictionPolicy, TenantConfOpt};
 use pageserver_api::reltag::RelTag;
+use pageserver_api::shard::ShardIndex;
 
 use postgres_connection::PgConnectionConfig;
 use postgres_ffi::to_pg_timestamp;
@@ -148,7 +153,7 @@ pub struct Timeline {
 
     myself: Weak<Self>,
 
-    pub tenant_id: TenantId,
+    pub(crate) tenant_shard_id: TenantShardId,
     pub timeline_id: TimelineId,
 
     /// The generation of the tenant that instantiated us: this is used for safety when writing remote objects.
@@ -250,14 +255,6 @@ pub struct Timeline {
     /// to be notified when layer flushing has finished, subscribe to the layer_flush_done channel
     layer_flush_done_tx: tokio::sync::watch::Sender<(u64, Result<(), FlushLayerError>)>,
 
-    /// Layer removal lock.
-    /// A lock to ensure that no layer of the timeline is removed concurrently by other tasks.
-    /// This lock is acquired in [`Timeline::gc`] and [`Timeline::compact`].
-    /// This is an `Arc<Mutex>` lock because we need an owned
-    /// lock guard in functions that will be spawned to tokio I/O pool (which requires `'static`).
-    /// Note that [`DeleteTimelineFlow`] uses `delete_progress` field.
-    pub(super) layer_removal_cs: Arc<tokio::sync::Mutex<()>>,
-
     // Needed to ensure that we can't create a branch at a point that was already garbage collected
     pub latest_gc_cutoff_lsn: Rcu<Lsn>,
 
@@ -318,6 +315,24 @@ pub struct Timeline {
     /// Cancellation token scoped to this timeline: anything doing long-running work relating
     /// to the timeline should drop out when this token fires.
     pub(crate) cancel: CancellationToken,
+
+    /// Make sure we only have one running compaction at a time in tests.
+    ///
+    /// Must only be taken in two places:
+    /// - [`Timeline::compact`] (this file)
+    /// - [`delete::delete_local_layer_files`]
+    ///
+    /// Timeline deletion will acquire both compaction and gc locks in whatever order.
+    compaction_lock: tokio::sync::Mutex<()>,
+
+    /// Make sure we only have one running gc at a time.
+    ///
+    /// Must only be taken in two places:
+    /// - [`Timeline::gc`] (this file)
+    /// - [`delete::delete_local_layer_files`]
+    ///
+    /// Timeline deletion will acquire both compaction and gc locks in whatever order.
+    gc_lock: tokio::sync::Mutex<()>,
 }
 
 pub struct WalReceiverInfo {
@@ -690,7 +705,7 @@ impl Timeline {
     }
 
     /// Flush to disk all data that was written with the put_* functions
-    #[instrument(skip(self), fields(tenant_id=%self.tenant_id, timeline_id=%self.timeline_id))]
+    #[instrument(skip(self), fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%self.timeline_id))]
     pub async fn freeze_and_flush(&self) -> anyhow::Result<()> {
         self.freeze_inmem_layer(false).await;
         self.flush_frozen_layers_and_wait().await
@@ -703,6 +718,8 @@ impl Timeline {
         flags: EnumSet<CompactFlags>,
         ctx: &RequestContext,
     ) -> Result<(), CompactionError> {
+        let _g = self.compaction_lock.lock().await;
+
         // this wait probably never needs any "long time spent" logging, because we already nag if
         // compaction task goes over it's period (20s) which is quite often in production.
         let _permit = match super::tasks::concurrent_background_tasks_rate_limit(
@@ -757,7 +774,7 @@ impl Timeline {
         // Below are functions compact_level0() and create_image_layers()
         // but they are a bit ad hoc and don't quite work like it's explained
         // above. Rewrite it.
-        let layer_removal_cs = Arc::new(self.layer_removal_cs.clone().lock_owned().await);
+
         // Is the timeline being deleted?
         if self.is_stopping() {
             trace!("Dropping out of compaction on timeline shutdown");
@@ -798,8 +815,7 @@ impl Timeline {
 
                 // 3. Compact
                 let timer = self.metrics.compact_time_histo.start_timer();
-                self.compact_level0(layer_removal_cs.clone(), target_file_size, ctx)
-                    .await?;
+                self.compact_level0(target_file_size, ctx).await?;
                 timer.stop_and_record();
 
                 if let Some(remote_client) = &self.remote_client {
@@ -839,23 +855,38 @@ impl Timeline {
     /// the initial size calculation has not been run (gets triggered on the first size access).
     ///
     /// return size and boolean flag that shows if the size is exact
-    pub fn get_current_logical_size(
+    pub(crate) fn get_current_logical_size(
         self: &Arc<Self>,
         ctx: &RequestContext,
-    ) -> anyhow::Result<(u64, bool)> {
-        let current_size = self.current_logical_size.current_size()?;
+    ) -> logical_size::CurrentLogicalSize {
+        let current_size = self.current_logical_size.current_size();
         debug!("Current size: {current_size:?}");
 
-        let mut is_exact = true;
-        let size = current_size.size();
         if let (CurrentLogicalSize::Approximate(_), Some(initial_part_end)) =
             (current_size, self.current_logical_size.initial_part_end)
         {
-            is_exact = false;
             self.try_spawn_size_init_task(initial_part_end, ctx);
         }
 
-        Ok((size, is_exact))
+        if let CurrentLogicalSize::Approximate(_) = &current_size {
+            if ctx.task_kind() == TaskKind::WalReceiverConnectionHandler {
+                let first = self
+                    .current_logical_size
+                    .did_return_approximate_to_walreceiver
+                    .compare_exchange(
+                        false,
+                        true,
+                        AtomicOrdering::Relaxed,
+                        AtomicOrdering::Relaxed,
+                    )
+                    .is_ok();
+                if first {
+                    crate::metrics::initial_logical_size::TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE.inc();
+                }
+            }
+        }
+
+        current_size
     }
 
     /// Check if more than 'checkpoint_distance' of WAL has been accumulated in
@@ -925,7 +956,7 @@ impl Timeline {
         tracing::debug!("Waiting for WalReceiverManager...");
         task_mgr::shutdown_tasks(
             Some(TaskKind::WalReceiverManager),
-            Some(self.tenant_id),
+            Some(self.tenant_shard_id.tenant_id),
             Some(self.timeline_id),
         )
         .await;
@@ -945,7 +976,7 @@ impl Timeline {
                     // what is problematic is the shutting down of RemoteTimelineClient, because
                     // obviously it does not make sense to stop while we wait for it, but what
                     // about corner cases like s3 suddenly hanging up?
-                    if let Err(e) = client.wait_completion().await {
+                    if let Err(e) = client.shutdown().await {
                         // Non-fatal.  Shutdown is infallible.  Failures to flush just mean that
                         // we have some extra WAL replay to do next time the timeline starts.
                         warn!("failed to flush to remote storage: {e:#}");
@@ -976,7 +1007,7 @@ impl Timeline {
         // Shut down the layer flush task before the remote client, as one depends on the other
         task_mgr::shutdown_tasks(
             Some(TaskKind::LayerFlushTask),
-            Some(self.tenant_id),
+            Some(self.tenant_shard_id.tenant_id),
             Some(self.timeline_id),
         )
         .await;
@@ -994,7 +1025,12 @@ impl Timeline {
 
         tracing::debug!("Waiting for tasks...");
 
-        task_mgr::shutdown_tasks(None, Some(self.tenant_id), Some(self.timeline_id)).await;
+        task_mgr::shutdown_tasks(
+            None,
+            Some(self.tenant_shard_id.tenant_id),
+            Some(self.timeline_id),
+        )
+        .await;
 
         // Finally wait until any gate-holders are complete
         self.gate.close().await;
@@ -1113,7 +1149,7 @@ impl Timeline {
         }
     }
 
-    #[instrument(skip_all, fields(tenant_id = %self.tenant_id, timeline_id = %self.timeline_id))]
+    #[instrument(skip_all, fields(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))]
     pub async fn download_layer(&self, layer_file_name: &str) -> anyhow::Result<Option<bool>> {
         let Some(layer) = self.find_layer(layer_file_name).await else {
             return Ok(None);
@@ -1200,16 +1236,6 @@ impl Timeline {
         remote_client: &Arc<RemoteTimelineClient>,
         layers_to_evict: &[Layer],
     ) -> anyhow::Result<Vec<Option<Result<(), EvictionError>>>> {
-        // ensure that the layers have finished uploading
-        // (don't hold the layer_removal_cs while we do it, we're not removing anything yet)
-        remote_client
-            .wait_completion()
-            .await
-            .context("wait for layer upload ops to complete")?;
-
-        // now lock out layer removal (compaction, gc, timeline deletion)
-        let _layer_removal_guard = self.layer_removal_cs.lock().await;
-
         {
             // to avoid racing with detach and delete_timeline
             let state = self.current_state();
@@ -1328,7 +1354,11 @@ impl Timeline {
                 &self.tenant_conf.read().unwrap().tenant_conf,
                 &self.conf.default_tenant_conf,
             );
-            let tenant_id_str = self.tenant_id.to_string();
+
+            // TODO(sharding): make evictions state shard aware
+            // (https://github.com/neondatabase/neon/issues/5953)
+            let tenant_id_str = self.tenant_shard_id.tenant_id.to_string();
+
             let timeline_id_str = self.timeline_id.to_string();
             self.metrics
                 .evictions_with_low_residence_duration
@@ -1348,7 +1378,7 @@ impl Timeline {
         metadata: &TimelineMetadata,
         ancestor: Option<Arc<Timeline>>,
         timeline_id: TimelineId,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         generation: Generation,
         walredo_mgr: Arc<super::WalRedoManager>,
         resources: TimelineResources,
@@ -1379,7 +1409,7 @@ impl Timeline {
                 tenant_conf,
                 myself: myself.clone(),
                 timeline_id,
-                tenant_id,
+                tenant_shard_id,
                 generation,
                 pg_version,
                 layers: Arc::new(tokio::sync::RwLock::new(LayerManager::create())),
@@ -1406,7 +1436,7 @@ impl Timeline {
                 ancestor_lsn: metadata.ancestor_lsn(),
 
                 metrics: TimelineMetrics::new(
-                    &tenant_id,
+                    &tenant_shard_id.tenant_id,
                     &timeline_id,
                     crate::metrics::EvictionsWithLowResidenceDurationBuilder::new(
                         "mtime",
@@ -1420,7 +1450,6 @@ impl Timeline {
                 layer_flush_done_tx,
 
                 write_lock: tokio::sync::Mutex::new(()),
-                layer_removal_cs: Default::default(),
 
                 gc_info: std::sync::RwLock::new(GcInfo {
                     retain_lsns: Vec::new(),
@@ -1458,7 +1487,10 @@ impl Timeline {
                 initial_logical_size_can_start,
                 initial_logical_size_attempt: Mutex::new(initial_logical_size_attempt),
                 cancel,
-                gate: Gate::new(format!("Timeline<{tenant_id}/{timeline_id}>")),
+                gate: Gate::new(format!("Timeline<{tenant_shard_id}/{timeline_id}>")),
+
+                compaction_lock: tokio::sync::Mutex::default(),
+                gc_lock: tokio::sync::Mutex::default(),
             };
             result.repartition_threshold =
                 result.get_checkpoint_distance() / REPARTITION_FREQ_IN_CHECKPOINT_DISTANCE;
@@ -1471,20 +1503,24 @@ impl Timeline {
     }
 
     pub(super) fn maybe_spawn_flush_loop(self: &Arc<Self>) {
+        let Ok(guard) = self.gate.enter() else {
+            info!("cannot start flush loop when the timeline gate has already been closed");
+            return;
+        };
         let mut flush_loop_state = self.flush_loop_state.lock().unwrap();
         match *flush_loop_state {
             FlushLoopState::NotStarted => (),
             FlushLoopState::Running { .. } => {
                 info!(
                     "skipping attempt to start flush_loop twice {}/{}",
-                    self.tenant_id, self.timeline_id
+                    self.tenant_shard_id, self.timeline_id
                 );
                 return;
             }
             FlushLoopState::Exited => {
                 warn!(
                     "ignoring attempt to restart exited flush_loop {}/{}",
-                    self.tenant_id, self.timeline_id
+                    self.tenant_shard_id, self.timeline_id
                 );
                 return;
             }
@@ -1503,11 +1539,12 @@ impl Timeline {
         task_mgr::spawn(
             task_mgr::BACKGROUND_RUNTIME.handle(),
             task_mgr::TaskKind::LayerFlushTask,
-            Some(self.tenant_id),
+            Some(self.tenant_shard_id.tenant_id),
             Some(self.timeline_id),
             "layer flush task",
             false,
             async move {
+                let _guard = guard;
                 let background_ctx = RequestContext::todo_child(TaskKind::LayerFlushTask, DownloadBehavior::Error);
                 self_clone.flush_loop(layer_flush_start_rx, &background_ctx).await;
                 let mut flush_loop_state = self_clone.flush_loop_state.lock().unwrap();
@@ -1515,7 +1552,7 @@ impl Timeline {
                 *flush_loop_state  = FlushLoopState::Exited;
                 Ok(())
             }
-            .instrument(info_span!(parent: None, "layer flush task", tenant_id = %self.tenant_id, timeline_id = %self.timeline_id))
+            .instrument(info_span!(parent: None, "layer flush task", tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))
         );
     }
 
@@ -1530,7 +1567,7 @@ impl Timeline {
     ) {
         info!(
             "launching WAL receiver for timeline {} of tenant {}",
-            self.timeline_id, self.tenant_id
+            self.timeline_id, self.tenant_shard_id
         );
 
         let tenant_conf_guard = self.tenant_conf.read().unwrap();
@@ -1591,12 +1628,15 @@ impl Timeline {
 
         // Scan timeline directory and create ImageFileName and DeltaFilename
         // structs representing all files on disk
-        let timeline_path = self.conf.timeline_path(&self.tenant_id, &self.timeline_id);
+        let timeline_path = self
+            .conf
+            .timeline_path(&self.tenant_shard_id, &self.timeline_id);
         let conf = self.conf;
         let span = tracing::Span::current();
 
         // Copy to move into the task we're about to spawn
         let generation = self.generation;
+        let shard = self.get_shard_index();
         let this = self.myself.upgrade().expect("&self method holds the arc");
 
         let (loaded_layers, needs_cleanup, total_physical_size) = tokio::task::spawn_blocking({
@@ -1645,6 +1685,7 @@ impl Timeline {
                     index_part.as_ref(),
                     disk_consistent_lsn,
                     generation,
+                    shard,
                 );
 
                 let mut loaded_layers = Vec::new();
@@ -1786,6 +1827,7 @@ impl Timeline {
             "spawning logical size computation from context of task kind {:?}",
             ctx.task_kind()
         );
+        let causing_task_kind = ctx.task_kind();
         // We need to start the computation task.
         // It gets a separate context since it will outlive the request that called this function.
         let self_clone = Arc::clone(self);
@@ -1796,7 +1838,7 @@ impl Timeline {
         task_mgr::spawn(
             task_mgr::BACKGROUND_RUNTIME.handle(),
             task_mgr::TaskKind::InitialLogicalSizeCalculation,
-            Some(self.tenant_id),
+            Some(self.tenant_shard_id.tenant_id),
             Some(self.timeline_id),
             "initial size calculation",
             false,
@@ -1813,6 +1855,8 @@ impl Timeline {
                     _ = completion::Barrier::maybe_wait(self_clone.initial_logical_size_can_start.clone()) => {}
                 };
 
+
+
                 // hold off background tasks from starting until all timelines get to try at least
                 // once initial logical size calculation; though retry will rarely be useful.
                 // holding off is done because heavier tasks execute blockingly on the same
@@ -1820,7 +1864,12 @@ impl Timeline {
                 //
                 // dropping this at every outcome is probably better than trying to cling on to it,
                 // delay will be terminated by a timeout regardless.
-                let _completion = { self_clone.initial_logical_size_attempt.lock().expect("unexpected initial_logical_size_attempt poisoned").take() };
+                let completion = { self_clone.initial_logical_size_attempt.lock().expect("unexpected initial_logical_size_attempt poisoned").take() };
+
+                let metrics_guard = match &completion {
+                    Some(_) => crate::metrics::initial_logical_size::START_CALCULATION.first(Some(causing_task_kind)),
+                    None => crate::metrics::initial_logical_size::START_CALCULATION.retry(Some(causing_task_kind)),
+                };
 
                 let calculated_size = match self_clone
                     .logical_size_calculation_task(lsn, LogicalSizeCalculationCause::Initial, &background_ctx)
@@ -1865,11 +1914,11 @@ impl Timeline {
                 match self_clone
                     .current_logical_size
                     .initial_logical_size
-                    .set(calculated_size)
+                    .set((calculated_size, metrics_guard.calculation_result_saved()))
                 {
                     Ok(()) => (),
                     Err(_what_we_just_attempted_to_set) => {
-                        let existing_size = self_clone
+                        let (existing_size, _) = self_clone
                             .current_logical_size
                             .initial_logical_size
                             .get()
@@ -1906,7 +1955,7 @@ impl Timeline {
         task_mgr::spawn(
             task_mgr::BACKGROUND_RUNTIME.handle(),
             task_mgr::TaskKind::OndemandLogicalSizeCalculation,
-            Some(self.tenant_id),
+            Some(self.tenant_shard_id.tenant_id),
             Some(self.timeline_id),
             "ondemand logical size calculation",
             false,
@@ -1982,7 +2031,7 @@ impl Timeline {
         fail::fail_point!("timeline-calculate-logical-size-check-dir-exists", |_| {
             if !self
                 .conf
-                .metadata_path(&self.tenant_id, &self.timeline_id)
+                .metadata_path(&self.tenant_shard_id, &self.timeline_id)
                 .exists()
             {
                 error!("timeline-calculate-logical-size-pre metadata file does not exist")
@@ -2023,16 +2072,14 @@ impl Timeline {
         // one value while current_logical_size is set to the
         // other.
         match logical_size.current_size() {
-            Ok(CurrentLogicalSize::Exact(new_current_size)) => self
+            CurrentLogicalSize::Exact(ref new_current_size) => self
                 .metrics
                 .current_logical_size_gauge
-                .set(new_current_size),
-            Ok(CurrentLogicalSize::Approximate(_)) => {
+                .set(new_current_size.into()),
+            CurrentLogicalSize::Approximate(_) => {
                 // don't update the gauge yet, this allows us not to update the gauge back and
                 // forth between the initial size calculation task.
             }
-            // this is overflow
-            Err(e) => error!("Failed to compute current logical size for metrics update: {e:?}"),
         }
     }
 
@@ -2335,7 +2382,13 @@ impl Timeline {
         // FIXME: It's pointless to check the cache for things that are not 8kB pages.
         // We should look at the key to determine if it's a cacheable object
         let (lsn, read_guard) = cache
-            .lookup_materialized_page(self.tenant_id, self.timeline_id, key, lsn, ctx)
+            .lookup_materialized_page(
+                self.tenant_shard_id.tenant_id,
+                self.timeline_id,
+                key,
+                lsn,
+                ctx,
+            )
             .await?;
         let img = Bytes::from(read_guard.to_vec());
         Some((lsn, img))
@@ -2363,7 +2416,7 @@ impl Timeline {
                 self.get_last_record_lsn(),
                 self.conf,
                 self.timeline_id,
-                self.tenant_id,
+                self.tenant_shard_id,
             )
             .await?;
         Ok(layer)
@@ -2529,7 +2582,7 @@ impl Timeline {
     }
 
     /// Flush one frozen in-memory layer to disk, as a new delta layer.
-    #[instrument(skip_all, fields(tenant_id=%self.tenant_id, timeline_id=%self.timeline_id, layer=%frozen_layer))]
+    #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id=%self.timeline_id, layer=%frozen_layer))]
     async fn flush_frozen_layer(
         self: &Arc<Self>,
         frozen_layer: Arc<InMemoryLayer>,
@@ -2650,9 +2703,14 @@ impl Timeline {
 
         // If we updated our disk_consistent_lsn, persist the updated metadata to local disk.
         if let Some(metadata) = metadata {
-            save_metadata(self.conf, &self.tenant_id, &self.timeline_id, &metadata)
-                .await
-                .context("save_metadata")?;
+            save_metadata(
+                self.conf,
+                &self.tenant_shard_id,
+                &self.timeline_id,
+                &metadata,
+            )
+            .await
+            .context("save_metadata")?;
         }
         Ok(())
     }
@@ -2716,9 +2774,14 @@ impl Timeline {
     ) -> anyhow::Result<()> {
         let metadata = self.schedule_uploads(disk_consistent_lsn, layers_to_upload)?;
 
-        save_metadata(self.conf, &self.tenant_id, &self.timeline_id, &metadata)
-            .await
-            .context("save_metadata")?;
+        save_metadata(
+            self.conf,
+            &self.tenant_shard_id,
+            &self.timeline_id,
+            &metadata,
+        )
+        .await
+        .context("save_metadata")?;
 
         Ok(())
     }
@@ -2766,7 +2829,7 @@ impl Timeline {
                 par_fsync::par_fsync(&[new_delta_path]).context("fsync of delta layer")?;
                 par_fsync::par_fsync(&[self_clone
                     .conf
-                    .timeline_path(&self_clone.tenant_id, &self_clone.timeline_id)])
+                    .timeline_path(&self_clone.tenant_shard_id, &self_clone.timeline_id)])
                 .context("fsync of timeline dir")?;
 
                 anyhow::Ok(new_delta)
@@ -2922,7 +2985,7 @@ impl Timeline {
                 let mut image_layer_writer = ImageLayerWriter::new(
                     self.conf,
                     self.timeline_id,
-                    self.tenant_id,
+                    self.tenant_shard_id,
                     &img_range,
                     lsn,
                 )
@@ -2995,9 +3058,11 @@ impl Timeline {
             .await
             .context("fsync of newly created layer files")?;
 
-        par_fsync::par_fsync_async(&[self.conf.timeline_path(&self.tenant_id, &self.timeline_id)])
-            .await
-            .context("fsync of timeline dir")?;
+        par_fsync::par_fsync_async(&[self
+            .conf
+            .timeline_path(&self.tenant_shard_id, &self.timeline_id)])
+        .await
+        .context("fsync of timeline dir")?;
 
         let mut guard = self.layers.write().await;
 
@@ -3147,13 +3212,8 @@ impl TryFrom<CompactLevel0Phase1StatsBuilder> for CompactLevel0Phase1Stats {
 
 impl Timeline {
     /// Level0 files first phase of compaction, explained in the [`Self::compact`] comment.
-    ///
-    /// This method takes the `_layer_removal_cs` guard to highlight it required downloads are
-    /// returned as an error. If the `layer_removal_cs` boundary is changed not to be taken in the
-    /// start of level0 files compaction, the on-demand download should be revisited as well.
     async fn compact_level0_phase1(
         self: &Arc<Self>,
-        _layer_removal_cs: Arc<tokio::sync::OwnedMutexGuard<()>>,
         guard: tokio::sync::OwnedRwLockReadGuard<LayerManager>,
         mut stats: CompactLevel0Phase1StatsBuilder,
         target_file_size: u64,
@@ -3240,8 +3300,6 @@ impl Timeline {
         let mut prev_lsn_end = first_level0_delta.layer_desc().lsn_range.end;
         let mut deltas_to_compact = Vec::with_capacity(level0_deltas.len());
 
-        // FIXME: downloading while holding layer_removal_cs is not great, but we will remove that
-        // soon
         deltas_to_compact.push(first_level0_delta.download_and_keep_resident().await?);
         for l in level0_deltas_iter {
             let lsn_range = &l.layer_desc().lsn_range;
@@ -3490,7 +3548,7 @@ impl Timeline {
                     DeltaLayerWriter::new(
                         self.conf,
                         self.timeline_id,
-                        self.tenant_id,
+                        self.tenant_shard_id,
                         key,
                         if dup_end_lsn.is_valid() {
                             // this is a layer containing slice of values of the same key
@@ -3551,7 +3609,9 @@ impl Timeline {
                 .await
                 .context("fsync all new layers")?;
 
-            let timeline_dir = self.conf.timeline_path(&self.tenant_id, &self.timeline_id);
+            let timeline_dir = self
+                .conf
+                .timeline_path(&self.tenant_shard_id, &self.timeline_id);
 
             par_fsync::par_fsync_async(&[timeline_dir])
                 .await
@@ -3591,7 +3651,6 @@ impl Timeline {
     ///
     async fn compact_level0(
         self: &Arc<Self>,
-        layer_removal_cs: Arc<tokio::sync::OwnedMutexGuard<()>>,
         target_file_size: u64,
         ctx: &RequestContext,
     ) -> Result<(), CompactionError> {
@@ -3603,7 +3662,7 @@ impl Timeline {
             let ctx = ctx.attached_child();
             let mut stats = CompactLevel0Phase1StatsBuilder {
                 version: Some(2),
-                tenant_id: Some(self.tenant_id),
+                tenant_id: Some(self.tenant_shard_id.tenant_id),
                 timeline_id: Some(self.timeline_id),
                 ..Default::default()
             };
@@ -3613,16 +3672,9 @@ impl Timeline {
             let now = tokio::time::Instant::now();
             stats.read_lock_acquisition_micros =
                 DurationRecorder::Recorded(RecordedDuration(now - begin), now);
-            let layer_removal_cs = layer_removal_cs.clone();
-            self.compact_level0_phase1(
-                layer_removal_cs,
-                phase1_layers_locked,
-                stats,
-                target_file_size,
-                &ctx,
-            )
-            .instrument(phase1_span)
-            .await?
+            self.compact_level0_phase1(phase1_layers_locked, stats, target_file_size, &ctx)
+                .instrument(phase1_span)
+                .await?
         };
 
         if new_layers.is_empty() && deltas_to_compact.is_empty() {
@@ -3630,17 +3682,6 @@ impl Timeline {
             return Ok(());
         }
 
-        // Before deleting any layers, we need to wait for their upload ops to finish.
-        // See remote_timeline_client module level comment on consistency.
-        // Do it here because we don't want to hold self.layers.write() while waiting.
-        if let Some(remote_client) = &self.remote_client {
-            debug!("waiting for upload ops to complete");
-            remote_client
-                .wait_completion()
-                .await
-                .context("wait for layer upload ops to complete")?;
-        }
-
         let mut guard = self.layers.write().await;
 
         let mut duplicated_layers = HashSet::new();
@@ -3672,12 +3713,7 @@ impl Timeline {
         };
 
         // deletion will happen later, the layer file manager calls garbage_collect_on_drop
-        guard.finish_compact_l0(
-            &layer_removal_cs,
-            &remove_layers,
-            &insert_layers,
-            &self.metrics,
-        );
+        guard.finish_compact_l0(&remove_layers, &insert_layers, &self.metrics);
 
         if let Some(remote_client) = self.remote_client.as_ref() {
             remote_client.schedule_compaction_update(&remove_layers, &new_layers)?;
@@ -3788,19 +3824,17 @@ impl Timeline {
         Ok(())
     }
 
-    ///
     /// Garbage collect layer files on a timeline that are no longer needed.
     ///
     /// Currently, we don't make any attempt at removing unneeded page versions
     /// within a layer file. We can only remove the whole file if it's fully
     /// obsolete.
-    ///
     pub(super) async fn gc(&self) -> anyhow::Result<GcResult> {
+        let _g = self.gc_lock.lock().await;
         let timer = self.metrics.garbage_collect_histo.start_timer();
 
         fail_point!("before-timeline-gc");
 
-        let layer_removal_cs = Arc::new(self.layer_removal_cs.clone().lock_owned().await);
         // Is the timeline being deleted?
         if self.is_stopping() {
             anyhow::bail!("timeline is Stopping");
@@ -3818,13 +3852,7 @@ impl Timeline {
         let new_gc_cutoff = Lsn::min(horizon_cutoff, pitr_cutoff);
 
         let res = self
-            .gc_timeline(
-                layer_removal_cs.clone(),
-                horizon_cutoff,
-                pitr_cutoff,
-                retain_lsns,
-                new_gc_cutoff,
-            )
+            .gc_timeline(horizon_cutoff, pitr_cutoff, retain_lsns, new_gc_cutoff)
             .instrument(
                 info_span!("gc_timeline", timeline_id = %self.timeline_id, cutoff = %new_gc_cutoff),
             )
@@ -3838,7 +3866,6 @@ impl Timeline {
 
     async fn gc_timeline(
         &self,
-        layer_removal_cs: Arc<tokio::sync::OwnedMutexGuard<()>>,
         horizon_cutoff: Lsn,
         pitr_cutoff: Lsn,
         retain_lsns: Vec<Lsn>,
@@ -3876,17 +3903,6 @@ impl Timeline {
 
         debug!("retain_lsns: {:?}", retain_lsns);
 
-        // Before deleting any layers, we need to wait for their upload ops to finish.
-        // See storage_sync module level comment on consistency.
-        // Do it here because we don't want to hold self.layers.write() while waiting.
-        if let Some(remote_client) = &self.remote_client {
-            debug!("waiting for upload ops to complete");
-            remote_client
-                .wait_completion()
-                .await
-                .context("wait for layer upload ops to complete")?;
-        }
-
         let mut layers_to_remove = Vec::new();
         let mut wanted_image_layers = KeySpaceRandomAccum::default();
 
@@ -4002,6 +4018,11 @@ impl Timeline {
             //
             // This does not in fact have any effect as we no longer consider local metadata unless
             // running without remote storage.
+            //
+            // This unconditionally schedules also an index_part.json update, even though, we will
+            // be doing one a bit later with the unlinked gc'd layers.
+            //
+            // TODO: remove when implementing <https://github.com/neondatabase/neon/issues/4099>.
             self.update_metadata_file(self.disk_consistent_lsn.load(), None)
                 .await?;
 
@@ -4016,11 +4037,16 @@ impl Timeline {
                 remote_client.schedule_gc_update(&gc_layers)?;
             }
 
-            guard.finish_gc_timeline(&layer_removal_cs, gc_layers);
+            guard.finish_gc_timeline(&gc_layers);
 
             if result.layers_removed != 0 {
                 fail_point!("after-timeline-gc-removed-layers");
             }
+
+            #[cfg(feature = "testing")]
+            {
+                result.doomed_layers = gc_layers;
+            }
         }
 
         info!(
@@ -4032,9 +4058,7 @@ impl Timeline {
         Ok(result)
     }
 
-    ///
     /// Reconstruct a value, using the given base image and WAL records in 'data'.
-    ///
     async fn reconstruct_value(
         &self,
         key: Key,
@@ -4099,7 +4123,7 @@ impl Timeline {
                     let cache = page_cache::get();
                     if let Err(e) = cache
                         .memorize_materialized_page(
-                            self.tenant_id,
+                            self.tenant_shard_id.tenant_id,
                             self.timeline_id,
                             key,
                             last_rec_lsn,
@@ -4143,7 +4167,7 @@ impl Timeline {
         let task_id = task_mgr::spawn(
             task_mgr::BACKGROUND_RUNTIME.handle(),
             task_mgr::TaskKind::DownloadAllRemoteLayers,
-            Some(self.tenant_id),
+            Some(self.tenant_shard_id.tenant_id),
             Some(self.timeline_id),
             "download all remote layers task",
             false,
@@ -4165,7 +4189,7 @@ impl Timeline {
                 };
                 Ok(())
             }
-            .instrument(info_span!(parent: None, "download_all_remote_layers", tenant_id = %self.tenant_id, timeline_id = %self.timeline_id))
+            .instrument(info_span!(parent: None, "download_all_remote_layers", tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))
         );
 
         let initial_info = DownloadRemoteLayersTaskInfo {
@@ -4364,6 +4388,13 @@ impl Timeline {
             resident_layers,
         }
     }
+
+    pub(crate) fn get_shard_index(&self) -> ShardIndex {
+        ShardIndex {
+            shard_number: self.tenant_shard_id.shard_number,
+            shard_count: self.tenant_shard_id.shard_count,
+        }
+    }
 }
 
 type TraversalPathItem = (
diff --git a/pageserver/src/tenant/timeline/delete.rs b/pageserver/src/tenant/timeline/delete.rs
index 56a99a25cf75..497796c80ab5 100644
--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -4,13 +4,10 @@ use std::{
 };
 
 use anyhow::Context;
-use pageserver_api::models::TimelineState;
+use pageserver_api::{models::TimelineState, shard::TenantShardId};
 use tokio::sync::OwnedMutexGuard;
 use tracing::{debug, error, info, instrument, warn, Instrument, Span};
-use utils::{
-    crashsafe, fs_ext,
-    id::{TenantId, TimelineId},
-};
+use utils::{crashsafe, fs_ext, id::TimelineId};
 
 use crate::{
     config::PageServerConf,
@@ -47,7 +44,7 @@ async fn stop_tasks(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
     // Shut down the layer flush task before the remote client, as one depends on the other
     task_mgr::shutdown_tasks(
         Some(TaskKind::LayerFlushTask),
-        Some(timeline.tenant_id),
+        Some(timeline.tenant_shard_id.tenant_id),
         Some(timeline.timeline_id),
     )
     .await;
@@ -73,7 +70,12 @@ async fn stop_tasks(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
     // NB: This and other delete_timeline calls do not run as a task_mgr task,
     //     so, they are not affected by this shutdown_tasks() call.
     info!("waiting for timeline tasks to shutdown");
-    task_mgr::shutdown_tasks(None, Some(timeline.tenant_id), Some(timeline.timeline_id)).await;
+    task_mgr::shutdown_tasks(
+        None,
+        Some(timeline.tenant_shard_id.tenant_id),
+        Some(timeline.timeline_id),
+    )
+    .await;
 
     fail::fail_point!("timeline-delete-before-index-deleted-at", |_| {
         Err(anyhow::anyhow!(
@@ -110,40 +112,11 @@ async fn set_deleted_in_remote_index(timeline: &Timeline) -> Result<(), DeleteTi
     Ok(())
 }
 
-// We delete local files first, so if pageserver restarts after local files deletion then remote deletion is not continued.
-// This can be solved with inversion of these steps. But even if these steps are inverted then, when index_part.json
-// gets deleted there is no way to distinguish between "this timeline is good, we just didnt upload it to remote"
-// and "this timeline is deleted we should continue with removal of local state". So to avoid the ambiguity we use a mark file.
-// After index part is deleted presence of this mark file indentifies that it was a deletion intention.
-// So we can just remove the mark file.
-async fn create_delete_mark(
-    conf: &PageServerConf,
-    tenant_id: TenantId,
-    timeline_id: TimelineId,
-) -> Result<(), DeleteTimelineError> {
-    fail::fail_point!("timeline-delete-before-delete-mark", |_| {
-        Err(anyhow::anyhow!(
-            "failpoint: timeline-delete-before-delete-mark"
-        ))?
-    });
-    let marker_path = conf.timeline_delete_mark_file_path(tenant_id, timeline_id);
-
-    // Note: we're ok to replace existing file.
-    let _ = std::fs::OpenOptions::new()
-        .write(true)
-        .create(true)
-        .open(&marker_path)
-        .with_context(|| format!("could not create delete marker file {marker_path:?}"))?;
-
-    crashsafe::fsync_file_and_parent(&marker_path).context("sync_mark")?;
-    Ok(())
-}
-
-/// Grab the layer_removal_cs lock, and actually perform the deletion.
+/// Grab the compaction and gc locks, and actually perform the deletion.
 ///
-/// This lock prevents prevents GC or compaction from running at the same time.
-/// The GC task doesn't register itself with the timeline it's operating on,
-/// so it might still be running even though we called `shutdown_tasks`.
+/// The locks prevent GC or compaction from running at the same time. The background tasks do not
+/// register themselves with the timeline it's operating on, so it might still be running even
+/// though we called `shutdown_tasks`.
 ///
 /// Note that there are still other race conditions between
 /// GC, compaction and timeline deletion. See
@@ -151,19 +124,24 @@ async fn create_delete_mark(
 ///
 /// No timeout here, GC & Compaction should be responsive to the
 /// `TimelineState::Stopping` change.
-async fn delete_local_layer_files(
+// pub(super): documentation link
+pub(super) async fn delete_local_layer_files(
     conf: &PageServerConf,
-    tenant_id: TenantId,
+    tenant_shard_id: TenantShardId,
     timeline: &Timeline,
 ) -> anyhow::Result<()> {
-    info!("waiting for layer_removal_cs.lock()");
-    let layer_removal_guard = timeline.layer_removal_cs.lock().await;
-    info!("got layer_removal_cs.lock(), deleting layer files");
+    let guards = async { tokio::join!(timeline.gc_lock.lock(), timeline.compaction_lock.lock()) };
+    let guards = crate::timed(
+        guards,
+        "acquire gc and compaction locks",
+        std::time::Duration::from_secs(5),
+    )
+    .await;
 
     // NB: storage_sync upload tasks that reference these layers have been cancelled
     //     by the caller.
 
-    let local_timeline_directory = conf.timeline_path(&tenant_id, &timeline.timeline_id);
+    let local_timeline_directory = conf.timeline_path(&tenant_shard_id, &timeline.timeline_id);
 
     fail::fail_point!("timeline-delete-before-rm", |_| {
         Err(anyhow::anyhow!("failpoint: timeline-delete-before-rm"))?
@@ -179,8 +157,8 @@ async fn delete_local_layer_files(
     // because of a previous failure/cancellation at/after
     // failpoint timeline-delete-after-rm.
     //
-    // It can also happen if we race with tenant detach, because,
-    // it doesn't grab the layer_removal_cs lock.
+    // ErrorKind::NotFound can also happen if we race with tenant detach, because,
+    // no locks are shared.
     //
     // For now, log and continue.
     // warn! level is technically not appropriate for the
@@ -199,7 +177,7 @@ async fn delete_local_layer_files(
         return Ok(());
     }
 
-    let metadata_path = conf.metadata_path(&tenant_id, &timeline.timeline_id);
+    let metadata_path = conf.metadata_path(&tenant_shard_id, &timeline.timeline_id);
 
     for entry in walkdir::WalkDir::new(&local_timeline_directory).contents_first(true) {
         #[cfg(feature = "testing")]
@@ -248,8 +226,8 @@ async fn delete_local_layer_files(
         .with_context(|| format!("Failed to remove: {}", entry.path().display()))?;
     }
 
-    info!("finished deleting layer files, releasing layer_removal_cs.lock()");
-    drop(layer_removal_guard);
+    info!("finished deleting layer files, releasing locks");
+    drop(guards);
 
     fail::fail_point!("timeline-delete-after-rm", |_| {
         Err(anyhow::anyhow!("failpoint: timeline-delete-after-rm"))?
@@ -274,11 +252,11 @@ async fn delete_remote_layers_and_index(timeline: &Timeline) -> anyhow::Result<(
 // (nothing can fail after its deletion)
 async fn cleanup_remaining_timeline_fs_traces(
     conf: &PageServerConf,
-    tenant_id: TenantId,
+    tenant_shard_id: TenantShardId,
     timeline_id: TimelineId,
 ) -> anyhow::Result<()> {
     // Remove local metadata
-    tokio::fs::remove_file(conf.metadata_path(&tenant_id, &timeline_id))
+    tokio::fs::remove_file(conf.metadata_path(&tenant_shard_id, &timeline_id))
         .await
         .or_else(fs_ext::ignore_not_found)
         .context("remove metadata")?;
@@ -290,7 +268,7 @@ async fn cleanup_remaining_timeline_fs_traces(
     });
 
     // Remove timeline dir
-    tokio::fs::remove_dir(conf.timeline_path(&tenant_id, &timeline_id))
+    tokio::fs::remove_dir(conf.timeline_path(&tenant_shard_id, &timeline_id))
         .await
         .or_else(fs_ext::ignore_not_found)
         .context("timeline dir")?;
@@ -305,13 +283,15 @@ async fn cleanup_remaining_timeline_fs_traces(
     // to be reordered later and thus missed if a crash occurs.
     // Note that we dont need to sync after mark file is removed
     // because we can tolerate the case when mark file reappears on startup.
-    let timeline_path = conf.timelines_path(&tenant_id);
+    let timeline_path = conf.timelines_path(&tenant_shard_id);
     crashsafe::fsync_async(timeline_path)
         .await
         .context("fsync_pre_mark_remove")?;
 
     // Remove delete mark
-    tokio::fs::remove_file(conf.timeline_delete_mark_file_path(tenant_id, timeline_id))
+    // TODO: once we are confident that no more exist in the field, remove this
+    // line.  It cleans up a legacy marker file that might in rare cases be present.
+    tokio::fs::remove_file(conf.timeline_delete_mark_file_path(tenant_shard_id, timeline_id))
         .await
         .or_else(fs_ext::ignore_not_found)
         .context("remove delete mark")
@@ -377,7 +357,7 @@ impl DeleteTimelineFlow {
     // NB: If this fails half-way through, and is retried, the retry will go through
     // all the same steps again. Make sure the code here is idempotent, and don't
     // error out if some of the shutdown tasks have already been completed!
-    #[instrument(skip(tenant), fields(tenant_id=%tenant.tenant_id))]
+    #[instrument(skip(tenant), fields(tenant_id=%tenant.tenant_shard_id.tenant_id, shard_id=%tenant.tenant_shard_id.shard_slug()))]
     pub async fn run(
         tenant: &Arc<Tenant>,
         timeline_id: TimelineId,
@@ -391,8 +371,6 @@ impl DeleteTimelineFlow {
 
         set_deleted_in_remote_index(&timeline).await?;
 
-        create_delete_mark(tenant.conf, timeline.tenant_id, timeline.timeline_id).await?;
-
         fail::fail_point!("timeline-delete-before-schedule", |_| {
             Err(anyhow::anyhow!(
                 "failpoint: timeline-delete-before-schedule"
@@ -464,10 +442,6 @@ impl DeleteTimelineFlow {
 
         guard.mark_in_progress()?;
 
-        // Note that delete mark can be missing on resume
-        // because we create delete mark after we set deleted_at in the index part.
-        create_delete_mark(tenant.conf, tenant.tenant_id, timeline_id).await?;
-
         Self::schedule_background(guard, tenant.conf, tenant, timeline);
 
         Ok(())
@@ -479,7 +453,8 @@ impl DeleteTimelineFlow {
         timeline_id: TimelineId,
     ) -> anyhow::Result<()> {
         let r =
-            cleanup_remaining_timeline_fs_traces(tenant.conf, tenant.tenant_id, timeline_id).await;
+            cleanup_remaining_timeline_fs_traces(tenant.conf, tenant.tenant_shard_id, timeline_id)
+                .await;
         info!("Done");
         r
     }
@@ -550,13 +525,13 @@ impl DeleteTimelineFlow {
         tenant: Arc<Tenant>,
         timeline: Arc<Timeline>,
     ) {
-        let tenant_id = timeline.tenant_id;
+        let tenant_shard_id = timeline.tenant_shard_id;
         let timeline_id = timeline.timeline_id;
 
         task_mgr::spawn(
             task_mgr::BACKGROUND_RUNTIME.handle(),
             TaskKind::TimelineDeletionWorker,
-            Some(tenant_id),
+            Some(tenant_shard_id.tenant_id),
             Some(timeline_id),
             "timeline_delete",
             false,
@@ -569,7 +544,7 @@ impl DeleteTimelineFlow {
             }
             .instrument({
                 let span =
-                    tracing::info_span!(parent: None, "delete_timeline", tenant_id=%tenant_id, timeline_id=%timeline_id);
+                    tracing::info_span!(parent: None, "delete_timeline", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),timeline_id=%timeline_id);
                 span.follows_from(Span::current());
                 span
             }),
@@ -582,13 +557,14 @@ impl DeleteTimelineFlow {
         tenant: &Tenant,
         timeline: &Timeline,
     ) -> Result<(), DeleteTimelineError> {
-        delete_local_layer_files(conf, tenant.tenant_id, timeline).await?;
+        delete_local_layer_files(conf, tenant.tenant_shard_id, timeline).await?;
 
         delete_remote_layers_and_index(timeline).await?;
 
         pausable_failpoint!("in_progress_delete");
 
-        cleanup_remaining_timeline_fs_traces(conf, tenant.tenant_id, timeline.timeline_id).await?;
+        cleanup_remaining_timeline_fs_traces(conf, tenant.tenant_shard_id, timeline.timeline_id)
+            .await?;
 
         remove_timeline_from_tenant(tenant, timeline.timeline_id, &guard).await?;
 
diff --git a/pageserver/src/tenant/timeline/eviction_task.rs b/pageserver/src/tenant/timeline/eviction_task.rs
index f4a4c26c06c3..3fe4bc0f83d2 100644
--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -60,9 +60,12 @@ impl Timeline {
         task_mgr::spawn(
             BACKGROUND_RUNTIME.handle(),
             TaskKind::Eviction,
-            Some(self.tenant_id),
+            Some(self.tenant_shard_id.tenant_id),
             Some(self.timeline_id),
-            &format!("layer eviction for {}/{}", self.tenant_id, self.timeline_id),
+            &format!(
+                "layer eviction for {}/{}",
+                self.tenant_shard_id, self.timeline_id
+            ),
             false,
             async move {
                 let cancel = task_mgr::shutdown_token();
@@ -77,7 +80,7 @@ impl Timeline {
         );
     }
 
-    #[instrument(skip_all, fields(tenant_id = %self.tenant_id, timeline_id = %self.timeline_id))]
+    #[instrument(skip_all, fields(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))]
     async fn eviction_task(self: Arc<Self>, cancel: CancellationToken) {
         use crate::tenant::tasks::random_init_delay;
         {
@@ -296,7 +299,6 @@ impl Timeline {
                     stats.evicted += 1;
                 }
                 Some(Err(EvictionError::NotFound | EvictionError::Downloaded)) => {
-                    // compaction/gc removed the file while we were waiting on layer_removal_cs
                     stats.not_evictable += 1;
                 }
             }
@@ -341,7 +343,7 @@ impl Timeline {
         // Make one of the tenant's timelines draw the short straw and run the calculation.
         // The others wait until the calculation is done so that they take into account the
         // imitated accesses that the winner made.
-        let tenant = match crate::tenant::mgr::get_tenant(self.tenant_id, true) {
+        let tenant = match crate::tenant::mgr::get_tenant(self.tenant_shard_id.tenant_id, true) {
             Ok(t) => t,
             Err(_) => {
                 return ControlFlow::Break(());
diff --git a/pageserver/src/tenant/timeline/init.rs b/pageserver/src/tenant/timeline/init.rs
index 96bf847fbb4b..916ebfc6d9f2 100644
--- a/pageserver/src/tenant/timeline/init.rs
+++ b/pageserver/src/tenant/timeline/init.rs
@@ -13,6 +13,7 @@ use crate::{
 };
 use anyhow::Context;
 use camino::Utf8Path;
+use pageserver_api::shard::ShardIndex;
 use std::{collections::HashMap, str::FromStr};
 use utils::lsn::Lsn;
 
@@ -107,6 +108,7 @@ pub(super) fn reconcile(
     index_part: Option<&IndexPart>,
     disk_consistent_lsn: Lsn,
     generation: Generation,
+    shard: ShardIndex,
 ) -> Vec<(LayerFileName, Result<Decision, DismissedLayer>)> {
     use Decision::*;
 
@@ -118,10 +120,13 @@ pub(super) fn reconcile(
         .map(|(name, file_size)| {
             (
                 name,
-                // The generation here will be corrected to match IndexPart in the merge below, unless
+                // The generation and shard here will be corrected to match IndexPart in the merge below, unless
                 // it is not in IndexPart, in which case using our current generation makes sense
                 // because it will be uploaded in this generation.
-                (Some(LayerFileMetadata::new(file_size, generation)), None),
+                (
+                    Some(LayerFileMetadata::new(file_size, generation, shard)),
+                    None,
+                ),
             )
         })
         .collect::<Collected>();
diff --git a/pageserver/src/tenant/timeline/layer_manager.rs b/pageserver/src/tenant/timeline/layer_manager.rs
index e4991e08659c..dcd82949dd0b 100644
--- a/pageserver/src/tenant/timeline/layer_manager.rs
+++ b/pageserver/src/tenant/timeline/layer_manager.rs
@@ -1,8 +1,9 @@
 use anyhow::{bail, ensure, Context, Result};
+use pageserver_api::shard::TenantShardId;
 use std::{collections::HashMap, sync::Arc};
 use tracing::trace;
 use utils::{
-    id::{TenantId, TimelineId},
+    id::TimelineId,
     lsn::{AtomicLsn, Lsn},
 };
 
@@ -73,7 +74,7 @@ impl LayerManager {
         last_record_lsn: Lsn,
         conf: &'static PageServerConf,
         timeline_id: TimelineId,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
     ) -> Result<Arc<InMemoryLayer>> {
         ensure!(lsn.is_aligned());
 
@@ -109,7 +110,8 @@ impl LayerManager {
                 lsn
             );
 
-            let new_layer = InMemoryLayer::create(conf, timeline_id, tenant_id, start_lsn).await?;
+            let new_layer =
+                InMemoryLayer::create(conf, timeline_id, tenant_shard_id, start_lsn).await?;
             let layer = Arc::new(new_layer);
 
             self.layer_map.open_layer = Some(layer.clone());
@@ -190,7 +192,6 @@ impl LayerManager {
     /// Called when compaction is completed.
     pub(crate) fn finish_compact_l0(
         &mut self,
-        layer_removal_cs: &Arc<tokio::sync::OwnedMutexGuard<()>>,
         compact_from: &[Layer],
         compact_to: &[ResidentLayer],
         metrics: &TimelineMetrics,
@@ -201,25 +202,16 @@ impl LayerManager {
             metrics.record_new_file_metrics(l.layer_desc().file_size);
         }
         for l in compact_from {
-            Self::delete_historic_layer(layer_removal_cs, l, &mut updates, &mut self.layer_fmgr);
+            Self::delete_historic_layer(l, &mut updates, &mut self.layer_fmgr);
         }
         updates.flush();
     }
 
-    /// Called when garbage collect the timeline. Returns a guard that will apply the updates to the layer map.
-    pub(crate) fn finish_gc_timeline(
-        &mut self,
-        layer_removal_cs: &Arc<tokio::sync::OwnedMutexGuard<()>>,
-        gc_layers: Vec<Layer>,
-    ) {
+    /// Called when garbage collect has selected the layers to be removed.
+    pub(crate) fn finish_gc_timeline(&mut self, gc_layers: &[Layer]) {
         let mut updates = self.layer_map.batch_update();
         for doomed_layer in gc_layers {
-            Self::delete_historic_layer(
-                layer_removal_cs,
-                &doomed_layer,
-                &mut updates,
-                &mut self.layer_fmgr,
-            );
+            Self::delete_historic_layer(doomed_layer, &mut updates, &mut self.layer_fmgr);
         }
         updates.flush()
     }
@@ -238,7 +230,6 @@ impl LayerManager {
     /// Remote storage is not affected by this operation.
     fn delete_historic_layer(
         // we cannot remove layers otherwise, since gc and compaction will race
-        _layer_removal_cs: &Arc<tokio::sync::OwnedMutexGuard<()>>,
         layer: &Layer,
         updates: &mut BatchedUpdates<'_>,
         mapping: &mut LayerFileManager<Layer>,
diff --git a/pageserver/src/tenant/timeline/logical_size.rs b/pageserver/src/tenant/timeline/logical_size.rs
index d9c2bc4cb970..a33fb28ebd83 100644
--- a/pageserver/src/tenant/timeline/logical_size.rs
+++ b/pageserver/src/tenant/timeline/logical_size.rs
@@ -4,7 +4,7 @@ use once_cell::sync::OnceCell;
 use tokio::sync::Semaphore;
 use utils::lsn::Lsn;
 
-use std::sync::atomic::{AtomicI64, Ordering as AtomicOrdering};
+use std::sync::atomic::{AtomicBool, AtomicI64, Ordering as AtomicOrdering};
 use std::sync::Arc;
 
 /// Internal structure to hold all data needed for logical size calculation.
@@ -23,7 +23,10 @@ pub(super) struct LogicalSize {
     ///
     /// NOTE: size at a given LSN is constant, but after a restart we will calculate
     /// the initial size at a different LSN.
-    pub initial_logical_size: OnceCell<u64>,
+    pub initial_logical_size: OnceCell<(
+        u64,
+        crate::metrics::initial_logical_size::FinishedCalculationGuard,
+    )>,
 
     /// Semaphore to track ongoing calculation of `initial_logical_size`.
     pub initial_size_computation: Arc<tokio::sync::Semaphore>,
@@ -52,25 +55,57 @@ pub(super) struct LogicalSize {
     /// see `current_logical_size_gauge`. Use the `update_current_logical_size`
     /// to modify this, it will also keep the prometheus metric in sync.
     pub size_added_after_initial: AtomicI64,
+
+    /// For [`crate::metrics::initial_logical_size::TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE`].
+    pub(super) did_return_approximate_to_walreceiver: AtomicBool,
 }
 
 /// Normalized current size, that the data in pageserver occupies.
 #[derive(Debug, Clone, Copy)]
-pub(super) enum CurrentLogicalSize {
+pub(crate) enum CurrentLogicalSize {
     /// The size is not yet calculated to the end, this is an intermediate result,
     /// constructed from walreceiver increments and normalized: logical data could delete some objects, hence be negative,
     /// yet total logical size cannot be below 0.
-    Approximate(u64),
+    Approximate(Approximate),
     // Fully calculated logical size, only other future walreceiver increments are changing it, and those changes are
     // available for observation without any calculations.
-    Exact(u64),
+    Exact(Exact),
+}
+
+#[derive(Debug, Copy, Clone)]
+pub(crate) enum Accuracy {
+    Approximate,
+    Exact,
+}
+
+#[derive(Debug, Clone, Copy)]
+pub(crate) struct Approximate(u64);
+#[derive(Debug, Clone, Copy)]
+pub(crate) struct Exact(u64);
+
+impl From<&Approximate> for u64 {
+    fn from(value: &Approximate) -> Self {
+        value.0
+    }
+}
+
+impl From<&Exact> for u64 {
+    fn from(val: &Exact) -> Self {
+        val.0
+    }
 }
 
 impl CurrentLogicalSize {
-    pub(super) fn size(&self) -> u64 {
-        *match self {
-            Self::Approximate(size) => size,
-            Self::Exact(size) => size,
+    pub(crate) fn size_dont_care_about_accuracy(&self) -> u64 {
+        match self {
+            Self::Approximate(size) => size.into(),
+            Self::Exact(size) => size.into(),
+        }
+    }
+    pub(crate) fn accuracy(&self) -> Accuracy {
+        match self {
+            Self::Approximate(_) => Accuracy::Approximate,
+            Self::Exact(_) => Accuracy::Exact,
         }
     }
 }
@@ -78,11 +113,16 @@ impl CurrentLogicalSize {
 impl LogicalSize {
     pub(super) fn empty_initial() -> Self {
         Self {
-            initial_logical_size: OnceCell::with_value(0),
+            initial_logical_size: OnceCell::with_value((0, {
+                crate::metrics::initial_logical_size::START_CALCULATION
+                    .first(None)
+                    .calculation_result_saved()
+            })),
             //  initial_logical_size already computed, so, don't admit any calculations
             initial_size_computation: Arc::new(Semaphore::new(0)),
             initial_part_end: None,
             size_added_after_initial: AtomicI64::new(0),
+            did_return_approximate_to_walreceiver: AtomicBool::new(false),
         }
     }
 
@@ -92,22 +132,24 @@ impl LogicalSize {
             initial_size_computation: Arc::new(Semaphore::new(1)),
             initial_part_end: Some(compute_to),
             size_added_after_initial: AtomicI64::new(0),
+            did_return_approximate_to_walreceiver: AtomicBool::new(false),
         }
     }
 
-    pub(super) fn current_size(&self) -> anyhow::Result<CurrentLogicalSize> {
+    pub(super) fn current_size(&self) -> CurrentLogicalSize {
         let size_increment: i64 = self.size_added_after_initial.load(AtomicOrdering::Acquire);
         //                  ^^^ keep this type explicit so that the casts in this function break if
         //                  we change the type.
         match self.initial_logical_size.get() {
-            Some(initial_size) => {
-                initial_size.checked_add_signed(size_increment)
+            Some((initial_size, _)) => {
+                CurrentLogicalSize::Exact(Exact(initial_size.checked_add_signed(size_increment)
                     .with_context(|| format!("Overflow during logical size calculation, initial_size: {initial_size}, size_increment: {size_increment}"))
-                    .map(CurrentLogicalSize::Exact)
+                    .unwrap()))
             }
             None => {
+
                 let non_negative_size_increment = u64::try_from(size_increment).unwrap_or(0);
-                Ok(CurrentLogicalSize::Approximate(non_negative_size_increment))
+                CurrentLogicalSize::Approximate(Approximate(non_negative_size_increment))
             }
         }
     }
@@ -121,7 +163,7 @@ impl LogicalSize {
     /// available for re-use. This doesn't contain the incremental part.
     pub(super) fn initialized_size(&self, lsn: Lsn) -> Option<u64> {
         match self.initial_part_end {
-            Some(v) if v == lsn => self.initial_logical_size.get().copied(),
+            Some(v) if v == lsn => self.initial_logical_size.get().map(|(s, _)| *s),
             _ => None,
         }
     }
diff --git a/pageserver/src/tenant/timeline/uninit.rs b/pageserver/src/tenant/timeline/uninit.rs
index f9bb6ca4195a..61130f541a0c 100644
--- a/pageserver/src/tenant/timeline/uninit.rs
+++ b/pageserver/src/tenant/timeline/uninit.rs
@@ -43,11 +43,11 @@ impl<'t> UninitializedTimeline<'t> {
     /// The caller is responsible for activating the timeline (function `.activate()`).
     pub(crate) fn finish_creation(mut self) -> anyhow::Result<Arc<Timeline>> {
         let timeline_id = self.timeline_id;
-        let tenant_id = self.owning_tenant.tenant_id;
+        let tenant_shard_id = self.owning_tenant.tenant_shard_id;
 
         if self.raw_timeline.is_none() {
             return Err(anyhow::anyhow!(
-                "No timeline for initialization found for {tenant_id}/{timeline_id}"
+                "No timeline for initialization found for {tenant_shard_id}/{timeline_id}"
             ));
         }
 
@@ -61,13 +61,13 @@ impl<'t> UninitializedTimeline<'t> {
 
         anyhow::ensure!(
             new_disk_consistent_lsn.is_valid(),
-            "new timeline {tenant_id}/{timeline_id} has invalid disk_consistent_lsn"
+            "new timeline {tenant_shard_id}/{timeline_id} has invalid disk_consistent_lsn"
         );
 
         let mut timelines = self.owning_tenant.timelines.lock().unwrap();
         match timelines.entry(timeline_id) {
             Entry::Occupied(_) => anyhow::bail!(
-                "Found freshly initialized timeline {tenant_id}/{timeline_id} in the tenant map"
+                "Found freshly initialized timeline {tenant_shard_id}/{timeline_id} in the tenant map"
             ),
             Entry::Vacant(v) => {
                 // after taking here should be no fallible operations, because the drop guard will not
@@ -79,7 +79,7 @@ impl<'t> UninitializedTimeline<'t> {
                 // this should be an assertion.
                 uninit_mark.remove_uninit_mark().with_context(|| {
                     format!(
-                        "Failed to remove uninit mark file for timeline {tenant_id}/{timeline_id}"
+                        "Failed to remove uninit mark file for timeline {tenant_shard_id}/{timeline_id}"
                     )
                 })?;
                 v.insert(Arc::clone(&new_timeline));
@@ -134,7 +134,7 @@ impl<'t> UninitializedTimeline<'t> {
             .with_context(|| {
                 format!(
                     "No raw timeline {}/{} found",
-                    self.owning_tenant.tenant_id, self.timeline_id
+                    self.owning_tenant.tenant_shard_id, self.timeline_id
                 )
             })?
             .0)
@@ -144,7 +144,7 @@ impl<'t> UninitializedTimeline<'t> {
 impl Drop for UninitializedTimeline<'_> {
     fn drop(&mut self) {
         if let Some((_, uninit_mark)) = self.raw_timeline.take() {
-            let _entered = info_span!("drop_uninitialized_timeline", tenant_id = %self.owning_tenant.tenant_id, timeline_id = %self.timeline_id).entered();
+            let _entered = info_span!("drop_uninitialized_timeline", tenant_id = %self.owning_tenant.tenant_shard_id.tenant_id, shard_id = %self.owning_tenant.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id).entered();
             error!("Timeline got dropped without initializing, cleaning its files");
             cleanup_timeline_directory(uninit_mark);
         }
diff --git a/pageserver/src/tenant/timeline/walreceiver.rs b/pageserver/src/tenant/timeline/walreceiver.rs
index 842bc3675c5a..04ff8602d65b 100644
--- a/pageserver/src/tenant/timeline/walreceiver.rs
+++ b/pageserver/src/tenant/timeline/walreceiver.rs
@@ -71,7 +71,7 @@ impl WalReceiver {
         mut broker_client: BrokerClientChannel,
         ctx: &RequestContext,
     ) -> Self {
-        let tenant_id = timeline.tenant_id;
+        let tenant_id = timeline.tenant_shard_id.tenant_id;
         let timeline_id = timeline.timeline_id;
         let walreceiver_ctx =
             ctx.detached_child(TaskKind::WalReceiverManager, DownloadBehavior::Error);
diff --git a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
index 30777124457a..7bfa246eeb0f 100644
--- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
@@ -75,7 +75,7 @@ pub(super) async fn connection_manager_loop_step(
     }
 
     let id = TenantTimelineId {
-        tenant_id: connection_manager_state.timeline.tenant_id,
+        tenant_id: connection_manager_state.timeline.tenant_shard_id.tenant_id,
         timeline_id: connection_manager_state.timeline.timeline_id,
     };
 
@@ -388,7 +388,7 @@ struct BrokerSkTimeline {
 impl ConnectionManagerState {
     pub(super) fn new(timeline: Arc<Timeline>, conf: WalReceiverConf) -> Self {
         let id = TenantTimelineId {
-            tenant_id: timeline.tenant_id,
+            tenant_id: timeline.tenant_shard_id.tenant_id,
             timeline_id: timeline.timeline_id,
         };
         Self {
diff --git a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
index 3e56753ad495..7045658f2415 100644
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -163,7 +163,7 @@ pub(super) async fn handle_walreceiver_connection(
     task_mgr::spawn(
         WALRECEIVER_RUNTIME.handle(),
         TaskKind::WalReceiverConnectionPoller,
-        Some(timeline.tenant_id),
+        Some(timeline.tenant_shard_id.tenant_id),
         Some(timeline.timeline_id),
         "walreceiver connection",
         false,
@@ -396,11 +396,12 @@ pub(super) async fn handle_walreceiver_connection(
 
             // Send the replication feedback message.
             // Regular standby_status_update fields are put into this message.
-            let (timeline_logical_size, _) = timeline
+            let current_timeline_size = timeline
                 .get_current_logical_size(&ctx)
-                .context("Status update creation failed to get current logical size")?;
+                // FIXME: https://github.com/neondatabase/neon/issues/5963
+                .size_dont_care_about_accuracy();
             let status_update = PageserverFeedback {
-                current_timeline_size: timeline_logical_size,
+                current_timeline_size,
                 last_received_lsn,
                 disk_consistent_lsn,
                 remote_consistent_lsn,
diff --git a/pageserver/src/tenant/upload_queue.rs b/pageserver/src/tenant/upload_queue.rs
index b47878aacce8..32f14f40c532 100644
--- a/pageserver/src/tenant/upload_queue.rs
+++ b/pageserver/src/tenant/upload_queue.rs
@@ -1,6 +1,5 @@
 use super::storage_layer::LayerFileName;
 use super::storage_layer::ResidentLayer;
-use super::Generation;
 use crate::tenant::metadata::TimelineMetadata;
 use crate::tenant::remote_timeline_client::index::IndexPart;
 use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
@@ -15,6 +14,9 @@ use utils::lsn::AtomicLsn;
 use std::sync::atomic::AtomicU32;
 use utils::lsn::Lsn;
 
+#[cfg(feature = "testing")]
+use utils::generation::Generation;
+
 // clippy warns that Uninitialized is much smaller than Initialized, which wastes
 // memory for Uninitialized variants. Doesn't matter in practice, there are not
 // that many upload queues in a running pageserver, and most of them are initialized
@@ -88,6 +90,14 @@ pub(crate) struct UploadQueueInitialized {
     /// bug causing leaks, then it's better to not leave this enabled for production builds.
     #[cfg(feature = "testing")]
     pub(crate) dangling_files: HashMap<LayerFileName, Generation>,
+
+    /// Set to true when we have inserted the `UploadOp::Shutdown` into the `inprogress_tasks`.
+    pub(crate) shutting_down: bool,
+
+    /// Permitless semaphore on which any number of `RemoteTimelineClient::shutdown` futures can
+    /// wait on until one of them stops the queue. The semaphore is closed when
+    /// `RemoteTimelineClient::launch_queued_tasks` encounters `UploadOp::Shutdown`.
+    pub(crate) shutdown_ready: Arc<tokio::sync::Semaphore>,
 }
 
 impl UploadQueueInitialized {
@@ -146,6 +156,8 @@ impl UploadQueue {
             queued_operations: VecDeque::new(),
             #[cfg(feature = "testing")]
             dangling_files: HashMap::new(),
+            shutting_down: false,
+            shutdown_ready: Arc::new(tokio::sync::Semaphore::new(0)),
         };
 
         *self = UploadQueue::Initialized(state);
@@ -193,6 +205,8 @@ impl UploadQueue {
             queued_operations: VecDeque::new(),
             #[cfg(feature = "testing")]
             dangling_files: HashMap::new(),
+            shutting_down: false,
+            shutdown_ready: Arc::new(tokio::sync::Semaphore::new(0)),
         };
 
         *self = UploadQueue::Initialized(state);
@@ -204,7 +218,13 @@ impl UploadQueue {
             UploadQueue::Uninitialized | UploadQueue::Stopped(_) => {
                 anyhow::bail!("queue is in state {}", self.as_str())
             }
-            UploadQueue::Initialized(x) => Ok(x),
+            UploadQueue::Initialized(x) => {
+                if !x.shutting_down {
+                    Ok(x)
+                } else {
+                    anyhow::bail!("queue is shutting down")
+                }
+            }
         }
     }
 
@@ -232,7 +252,7 @@ pub(crate) struct UploadTask {
 /// for timeline deletion, which skips this queue and goes directly to DeletionQueue.
 #[derive(Debug)]
 pub(crate) struct Delete {
-    pub(crate) layers: Vec<(LayerFileName, Generation)>,
+    pub(crate) layers: Vec<(LayerFileName, LayerFileMetadata)>,
 }
 
 #[derive(Debug)]
@@ -248,6 +268,10 @@ pub(crate) enum UploadOp {
 
     /// Barrier. When the barrier operation is reached,
     Barrier(tokio::sync::watch::Sender<()>),
+
+    /// Shutdown; upon encountering this operation no new operations will be spawned, otherwise
+    /// this is the same as a Barrier.
+    Shutdown,
 }
 
 impl std::fmt::Display for UploadOp {
@@ -269,6 +293,7 @@ impl std::fmt::Display for UploadOp {
                 write!(f, "Delete({} layers)", delete.layers.len())
             }
             UploadOp::Barrier(_) => write!(f, "Barrier"),
+            UploadOp::Shutdown => write!(f, "Shutdown"),
         }
     }
 }
diff --git a/pageserver/src/walingest.rs b/pageserver/src/walingest.rs
index 23367928d35a..cbb08f7ff147 100644
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -98,261 +98,258 @@ impl<'a> WalIngest<'a> {
             self.checkpoint_modified = true;
         }
 
-        // Heap AM records need some special handling, because they modify VM pages
-        // without registering them with the standard mechanism.
-        if decoded.xl_rmid == pg_constants::RM_HEAP_ID
-            || decoded.xl_rmid == pg_constants::RM_HEAP2_ID
-        {
-            self.ingest_heapam_record(&mut buf, modification, decoded, ctx)
-                .await?;
-        }
-        if decoded.xl_rmid == pg_constants::RM_NEON_ID {
-            self.ingest_neonrmgr_record(&mut buf, modification, decoded, ctx)
-                .await?;
-        }
-        // Handle other special record types
-        if decoded.xl_rmid == pg_constants::RM_SMGR_ID
-            && (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK)
-                == pg_constants::XLOG_SMGR_CREATE
-        {
-            let create = XlSmgrCreate::decode(&mut buf);
-            self.ingest_xlog_smgr_create(modification, &create, ctx)
-                .await?;
-        } else if decoded.xl_rmid == pg_constants::RM_SMGR_ID
-            && (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK)
-                == pg_constants::XLOG_SMGR_TRUNCATE
-        {
-            let truncate = XlSmgrTruncate::decode(&mut buf);
-            self.ingest_xlog_smgr_truncate(modification, &truncate, ctx)
-                .await?;
-        } else if decoded.xl_rmid == pg_constants::RM_DBASE_ID {
-            debug!(
-                "handle RM_DBASE_ID for Postgres version {:?}",
-                self.timeline.pg_version
-            );
-            if self.timeline.pg_version == 14 {
-                if (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK)
-                    == postgres_ffi::v14::bindings::XLOG_DBASE_CREATE
-                {
-                    let createdb = XlCreateDatabase::decode(&mut buf);
-                    debug!("XLOG_DBASE_CREATE v14");
+        match decoded.xl_rmid {
+            pg_constants::RM_HEAP_ID | pg_constants::RM_HEAP2_ID => {
+                // Heap AM records need some special handling, because they modify VM pages
+                // without registering them with the standard mechanism.
+                self.ingest_heapam_record(&mut buf, modification, decoded, ctx)
+                    .await?;
+            }
+            pg_constants::RM_NEON_ID => {
+                self.ingest_neonrmgr_record(&mut buf, modification, decoded, ctx)
+                    .await?;
+            }
+            // Handle other special record types
+            pg_constants::RM_SMGR_ID => {
+                let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
 
-                    self.ingest_xlog_dbase_create(modification, &createdb, ctx)
+                if info == pg_constants::XLOG_SMGR_CREATE {
+                    let create = XlSmgrCreate::decode(&mut buf);
+                    self.ingest_xlog_smgr_create(modification, &create, ctx)
                         .await?;
-                } else if (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK)
-                    == postgres_ffi::v14::bindings::XLOG_DBASE_DROP
-                {
-                    let dropdb = XlDropDatabase::decode(&mut buf);
-                    for tablespace_id in dropdb.tablespace_ids {
-                        trace!("Drop db {}, {}", tablespace_id, dropdb.db_id);
-                        modification
-                            .drop_dbdir(tablespace_id, dropdb.db_id, ctx)
+                } else if info == pg_constants::XLOG_SMGR_TRUNCATE {
+                    let truncate = XlSmgrTruncate::decode(&mut buf);
+                    self.ingest_xlog_smgr_truncate(modification, &truncate, ctx)
+                        .await?;
+                }
+            }
+            pg_constants::RM_DBASE_ID => {
+                let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
+                debug!(%info, pg_version=%self.timeline.pg_version, "handle RM_DBASE_ID");
+
+                if self.timeline.pg_version == 14 {
+                    if info == postgres_ffi::v14::bindings::XLOG_DBASE_CREATE {
+                        let createdb = XlCreateDatabase::decode(&mut buf);
+                        debug!("XLOG_DBASE_CREATE v14");
+
+                        self.ingest_xlog_dbase_create(modification, &createdb, ctx)
                             .await?;
+                    } else if info == postgres_ffi::v14::bindings::XLOG_DBASE_DROP {
+                        let dropdb = XlDropDatabase::decode(&mut buf);
+                        for tablespace_id in dropdb.tablespace_ids {
+                            trace!("Drop db {}, {}", tablespace_id, dropdb.db_id);
+                            modification
+                                .drop_dbdir(tablespace_id, dropdb.db_id, ctx)
+                                .await?;
+                        }
                     }
-                }
-            } else if self.timeline.pg_version == 15 {
-                if (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK)
-                    == postgres_ffi::v15::bindings::XLOG_DBASE_CREATE_WAL_LOG
-                {
-                    debug!("XLOG_DBASE_CREATE_WAL_LOG: noop");
-                } else if (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK)
-                    == postgres_ffi::v15::bindings::XLOG_DBASE_CREATE_FILE_COPY
-                {
-                    // The XLOG record was renamed between v14 and v15,
-                    // but the record format is the same.
-                    // So we can reuse XlCreateDatabase here.
-                    debug!("XLOG_DBASE_CREATE_FILE_COPY");
-                    let createdb = XlCreateDatabase::decode(&mut buf);
-                    self.ingest_xlog_dbase_create(modification, &createdb, ctx)
-                        .await?;
-                } else if (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK)
-                    == postgres_ffi::v15::bindings::XLOG_DBASE_DROP
-                {
-                    let dropdb = XlDropDatabase::decode(&mut buf);
-                    for tablespace_id in dropdb.tablespace_ids {
-                        trace!("Drop db {}, {}", tablespace_id, dropdb.db_id);
-                        modification
-                            .drop_dbdir(tablespace_id, dropdb.db_id, ctx)
+                } else if self.timeline.pg_version == 15 {
+                    if info == postgres_ffi::v15::bindings::XLOG_DBASE_CREATE_WAL_LOG {
+                        debug!("XLOG_DBASE_CREATE_WAL_LOG: noop");
+                    } else if info == postgres_ffi::v15::bindings::XLOG_DBASE_CREATE_FILE_COPY {
+                        // The XLOG record was renamed between v14 and v15,
+                        // but the record format is the same.
+                        // So we can reuse XlCreateDatabase here.
+                        debug!("XLOG_DBASE_CREATE_FILE_COPY");
+                        let createdb = XlCreateDatabase::decode(&mut buf);
+                        self.ingest_xlog_dbase_create(modification, &createdb, ctx)
                             .await?;
+                    } else if info == postgres_ffi::v15::bindings::XLOG_DBASE_DROP {
+                        let dropdb = XlDropDatabase::decode(&mut buf);
+                        for tablespace_id in dropdb.tablespace_ids {
+                            trace!("Drop db {}, {}", tablespace_id, dropdb.db_id);
+                            modification
+                                .drop_dbdir(tablespace_id, dropdb.db_id, ctx)
+                                .await?;
+                        }
                     }
-                }
-            } else if self.timeline.pg_version == 16 {
-                if (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK)
-                    == postgres_ffi::v16::bindings::XLOG_DBASE_CREATE_WAL_LOG
-                {
-                    debug!("XLOG_DBASE_CREATE_WAL_LOG: noop");
-                } else if (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK)
-                    == postgres_ffi::v16::bindings::XLOG_DBASE_CREATE_FILE_COPY
-                {
-                    // The XLOG record was renamed between v14 and v15,
-                    // but the record format is the same.
-                    // So we can reuse XlCreateDatabase here.
-                    debug!("XLOG_DBASE_CREATE_FILE_COPY");
-                    let createdb = XlCreateDatabase::decode(&mut buf);
-                    self.ingest_xlog_dbase_create(modification, &createdb, ctx)
-                        .await?;
-                } else if (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK)
-                    == postgres_ffi::v16::bindings::XLOG_DBASE_DROP
-                {
-                    let dropdb = XlDropDatabase::decode(&mut buf);
-                    for tablespace_id in dropdb.tablespace_ids {
-                        trace!("Drop db {}, {}", tablespace_id, dropdb.db_id);
-                        modification
-                            .drop_dbdir(tablespace_id, dropdb.db_id, ctx)
+                } else if self.timeline.pg_version == 16 {
+                    if info == postgres_ffi::v16::bindings::XLOG_DBASE_CREATE_WAL_LOG {
+                        debug!("XLOG_DBASE_CREATE_WAL_LOG: noop");
+                    } else if info == postgres_ffi::v16::bindings::XLOG_DBASE_CREATE_FILE_COPY {
+                        // The XLOG record was renamed between v14 and v15,
+                        // but the record format is the same.
+                        // So we can reuse XlCreateDatabase here.
+                        debug!("XLOG_DBASE_CREATE_FILE_COPY");
+                        let createdb = XlCreateDatabase::decode(&mut buf);
+                        self.ingest_xlog_dbase_create(modification, &createdb, ctx)
                             .await?;
+                    } else if info == postgres_ffi::v16::bindings::XLOG_DBASE_DROP {
+                        let dropdb = XlDropDatabase::decode(&mut buf);
+                        for tablespace_id in dropdb.tablespace_ids {
+                            trace!("Drop db {}, {}", tablespace_id, dropdb.db_id);
+                            modification
+                                .drop_dbdir(tablespace_id, dropdb.db_id, ctx)
+                                .await?;
+                        }
                     }
                 }
             }
-        } else if decoded.xl_rmid == pg_constants::RM_TBLSPC_ID {
-            trace!("XLOG_TBLSPC_CREATE/DROP is not handled yet");
-        } else if decoded.xl_rmid == pg_constants::RM_CLOG_ID {
-            let info = decoded.xl_info & !pg_constants::XLR_INFO_MASK;
-            if info == pg_constants::CLOG_ZEROPAGE {
-                let pageno = buf.get_u32_le();
-                let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
-                let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
-                self.put_slru_page_image(
-                    modification,
-                    SlruKind::Clog,
-                    segno,
-                    rpageno,
-                    ZERO_PAGE.clone(),
-                    ctx,
-                )
-                .await?;
-            } else {
-                assert!(info == pg_constants::CLOG_TRUNCATE);
-                let xlrec = XlClogTruncate::decode(&mut buf);
-                self.ingest_clog_truncate_record(modification, &xlrec, ctx)
+            pg_constants::RM_TBLSPC_ID => {
+                trace!("XLOG_TBLSPC_CREATE/DROP is not handled yet");
+            }
+            pg_constants::RM_CLOG_ID => {
+                let info = decoded.xl_info & !pg_constants::XLR_INFO_MASK;
+
+                if info == pg_constants::CLOG_ZEROPAGE {
+                    let pageno = buf.get_u32_le();
+                    let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
+                    let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
+                    self.put_slru_page_image(
+                        modification,
+                        SlruKind::Clog,
+                        segno,
+                        rpageno,
+                        ZERO_PAGE.clone(),
+                        ctx,
+                    )
                     .await?;
+                } else {
+                    assert!(info == pg_constants::CLOG_TRUNCATE);
+                    let xlrec = XlClogTruncate::decode(&mut buf);
+                    self.ingest_clog_truncate_record(modification, &xlrec, ctx)
+                        .await?;
+                }
             }
-        } else if decoded.xl_rmid == pg_constants::RM_XACT_ID {
-            let info = decoded.xl_info & pg_constants::XLOG_XACT_OPMASK;
-            if info == pg_constants::XLOG_XACT_COMMIT || info == pg_constants::XLOG_XACT_ABORT {
-                let parsed_xact =
-                    XlXactParsedRecord::decode(&mut buf, decoded.xl_xid, decoded.xl_info);
-                self.ingest_xact_record(
-                    modification,
-                    &parsed_xact,
-                    info == pg_constants::XLOG_XACT_COMMIT,
-                    ctx,
-                )
-                .await?;
-            } else if info == pg_constants::XLOG_XACT_COMMIT_PREPARED
-                || info == pg_constants::XLOG_XACT_ABORT_PREPARED
-            {
-                let parsed_xact =
-                    XlXactParsedRecord::decode(&mut buf, decoded.xl_xid, decoded.xl_info);
-                self.ingest_xact_record(
-                    modification,
-                    &parsed_xact,
-                    info == pg_constants::XLOG_XACT_COMMIT_PREPARED,
-                    ctx,
-                )
-                .await?;
-                // Remove twophase file. see RemoveTwoPhaseFile() in postgres code
-                trace!(
-                    "Drop twophaseFile for xid {} parsed_xact.xid {} here at {}",
-                    decoded.xl_xid,
-                    parsed_xact.xid,
-                    lsn,
-                );
-                modification
-                    .drop_twophase_file(parsed_xact.xid, ctx)
+            pg_constants::RM_XACT_ID => {
+                let info = decoded.xl_info & pg_constants::XLOG_XACT_OPMASK;
+
+                if info == pg_constants::XLOG_XACT_COMMIT || info == pg_constants::XLOG_XACT_ABORT {
+                    let parsed_xact =
+                        XlXactParsedRecord::decode(&mut buf, decoded.xl_xid, decoded.xl_info);
+                    self.ingest_xact_record(
+                        modification,
+                        &parsed_xact,
+                        info == pg_constants::XLOG_XACT_COMMIT,
+                        ctx,
+                    )
                     .await?;
-            } else if info == pg_constants::XLOG_XACT_PREPARE {
-                modification
-                    .put_twophase_file(decoded.xl_xid, Bytes::copy_from_slice(&buf[..]), ctx)
+                } else if info == pg_constants::XLOG_XACT_COMMIT_PREPARED
+                    || info == pg_constants::XLOG_XACT_ABORT_PREPARED
+                {
+                    let parsed_xact =
+                        XlXactParsedRecord::decode(&mut buf, decoded.xl_xid, decoded.xl_info);
+                    self.ingest_xact_record(
+                        modification,
+                        &parsed_xact,
+                        info == pg_constants::XLOG_XACT_COMMIT_PREPARED,
+                        ctx,
+                    )
                     .await?;
+                    // Remove twophase file. see RemoveTwoPhaseFile() in postgres code
+                    trace!(
+                        "Drop twophaseFile for xid {} parsed_xact.xid {} here at {}",
+                        decoded.xl_xid,
+                        parsed_xact.xid,
+                        lsn,
+                    );
+                    modification
+                        .drop_twophase_file(parsed_xact.xid, ctx)
+                        .await?;
+                } else if info == pg_constants::XLOG_XACT_PREPARE {
+                    modification
+                        .put_twophase_file(decoded.xl_xid, Bytes::copy_from_slice(&buf[..]), ctx)
+                        .await?;
+                }
             }
-        } else if decoded.xl_rmid == pg_constants::RM_MULTIXACT_ID {
-            let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
-
-            if info == pg_constants::XLOG_MULTIXACT_ZERO_OFF_PAGE {
-                let pageno = buf.get_u32_le();
-                let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
-                let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
-                self.put_slru_page_image(
-                    modification,
-                    SlruKind::MultiXactOffsets,
-                    segno,
-                    rpageno,
-                    ZERO_PAGE.clone(),
-                    ctx,
-                )
-                .await?;
-            } else if info == pg_constants::XLOG_MULTIXACT_ZERO_MEM_PAGE {
-                let pageno = buf.get_u32_le();
-                let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
-                let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
-                self.put_slru_page_image(
-                    modification,
-                    SlruKind::MultiXactMembers,
-                    segno,
-                    rpageno,
-                    ZERO_PAGE.clone(),
-                    ctx,
-                )
-                .await?;
-            } else if info == pg_constants::XLOG_MULTIXACT_CREATE_ID {
-                let xlrec = XlMultiXactCreate::decode(&mut buf);
-                self.ingest_multixact_create_record(modification, &xlrec)?;
-            } else if info == pg_constants::XLOG_MULTIXACT_TRUNCATE_ID {
-                let xlrec = XlMultiXactTruncate::decode(&mut buf);
-                self.ingest_multixact_truncate_record(modification, &xlrec, ctx)
+            pg_constants::RM_MULTIXACT_ID => {
+                let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
+
+                if info == pg_constants::XLOG_MULTIXACT_ZERO_OFF_PAGE {
+                    let pageno = buf.get_u32_le();
+                    let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
+                    let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
+                    self.put_slru_page_image(
+                        modification,
+                        SlruKind::MultiXactOffsets,
+                        segno,
+                        rpageno,
+                        ZERO_PAGE.clone(),
+                        ctx,
+                    )
                     .await?;
-            }
-        } else if decoded.xl_rmid == pg_constants::RM_RELMAP_ID {
-            let xlrec = XlRelmapUpdate::decode(&mut buf);
-            self.ingest_relmap_page(modification, &xlrec, decoded, ctx)
-                .await?;
-        } else if decoded.xl_rmid == pg_constants::RM_XLOG_ID {
-            let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
-            if info == pg_constants::XLOG_NEXTOID {
-                let next_oid = buf.get_u32_le();
-                if self.checkpoint.nextOid != next_oid {
-                    self.checkpoint.nextOid = next_oid;
-                    self.checkpoint_modified = true;
+                } else if info == pg_constants::XLOG_MULTIXACT_ZERO_MEM_PAGE {
+                    let pageno = buf.get_u32_le();
+                    let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
+                    let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
+                    self.put_slru_page_image(
+                        modification,
+                        SlruKind::MultiXactMembers,
+                        segno,
+                        rpageno,
+                        ZERO_PAGE.clone(),
+                        ctx,
+                    )
+                    .await?;
+                } else if info == pg_constants::XLOG_MULTIXACT_CREATE_ID {
+                    let xlrec = XlMultiXactCreate::decode(&mut buf);
+                    self.ingest_multixact_create_record(modification, &xlrec)?;
+                } else if info == pg_constants::XLOG_MULTIXACT_TRUNCATE_ID {
+                    let xlrec = XlMultiXactTruncate::decode(&mut buf);
+                    self.ingest_multixact_truncate_record(modification, &xlrec, ctx)
+                        .await?;
                 }
-            } else if info == pg_constants::XLOG_CHECKPOINT_ONLINE
-                || info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN
-            {
-                let mut checkpoint_bytes = [0u8; SIZEOF_CHECKPOINT];
-                buf.copy_to_slice(&mut checkpoint_bytes);
-                let xlog_checkpoint = CheckPoint::decode(&checkpoint_bytes)?;
-                trace!(
-                    "xlog_checkpoint.oldestXid={}, checkpoint.oldestXid={}",
-                    xlog_checkpoint.oldestXid,
-                    self.checkpoint.oldestXid
-                );
-                if (self
-                    .checkpoint
-                    .oldestXid
-                    .wrapping_sub(xlog_checkpoint.oldestXid) as i32)
-                    < 0
+            }
+            pg_constants::RM_RELMAP_ID => {
+                let xlrec = XlRelmapUpdate::decode(&mut buf);
+                self.ingest_relmap_page(modification, &xlrec, decoded, ctx)
+                    .await?;
+            }
+            pg_constants::RM_XLOG_ID => {
+                let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
+
+                if info == pg_constants::XLOG_NEXTOID {
+                    let next_oid = buf.get_u32_le();
+                    if self.checkpoint.nextOid != next_oid {
+                        self.checkpoint.nextOid = next_oid;
+                        self.checkpoint_modified = true;
+                    }
+                } else if info == pg_constants::XLOG_CHECKPOINT_ONLINE
+                    || info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN
                 {
-                    self.checkpoint.oldestXid = xlog_checkpoint.oldestXid;
-                    self.checkpoint_modified = true;
+                    let mut checkpoint_bytes = [0u8; SIZEOF_CHECKPOINT];
+                    buf.copy_to_slice(&mut checkpoint_bytes);
+                    let xlog_checkpoint = CheckPoint::decode(&checkpoint_bytes)?;
+                    trace!(
+                        "xlog_checkpoint.oldestXid={}, checkpoint.oldestXid={}",
+                        xlog_checkpoint.oldestXid,
+                        self.checkpoint.oldestXid
+                    );
+                    if (self
+                        .checkpoint
+                        .oldestXid
+                        .wrapping_sub(xlog_checkpoint.oldestXid) as i32)
+                        < 0
+                    {
+                        self.checkpoint.oldestXid = xlog_checkpoint.oldestXid;
+                        self.checkpoint_modified = true;
+                    }
                 }
             }
-        } else if decoded.xl_rmid == pg_constants::RM_LOGICALMSG_ID {
-            let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
-            if info == pg_constants::XLOG_LOGICAL_MESSAGE {
-                let xlrec = XlLogicalMessage::decode(&mut buf);
-                let prefix = std::str::from_utf8(&buf[0..xlrec.prefix_size - 1])?;
-                let message = &buf[xlrec.prefix_size..xlrec.prefix_size + xlrec.message_size];
-                if prefix == "neon-test" {
-                    // This is a convenient way to make the WAL ingestion pause at
-                    // particular point in the WAL. For more fine-grained control,
-                    // we could peek into the message and only pause if it contains
-                    // a particular string, for example, but this is enough for now.
-                    crate::failpoint_support::sleep_millis_async!(
-                        "wal-ingest-logical-message-sleep"
-                    );
-                } else if let Some(path) = prefix.strip_prefix("neon-file:") {
-                    modification.put_file(path, message, ctx).await?;
+            pg_constants::RM_LOGICALMSG_ID => {
+                let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
+
+                if info == pg_constants::XLOG_LOGICAL_MESSAGE {
+                    let xlrec = XlLogicalMessage::decode(&mut buf);
+                    let prefix = std::str::from_utf8(&buf[0..xlrec.prefix_size - 1])?;
+                    let message = &buf[xlrec.prefix_size..xlrec.prefix_size + xlrec.message_size];
+                    if prefix == "neon-test" {
+                        // This is a convenient way to make the WAL ingestion pause at
+                        // particular point in the WAL. For more fine-grained control,
+                        // we could peek into the message and only pause if it contains
+                        // a particular string, for example, but this is enough for now.
+                        crate::failpoint_support::sleep_millis_async!(
+                            "wal-ingest-logical-message-sleep"
+                        );
+                    } else if let Some(path) = prefix.strip_prefix("neon-file:") {
+                        modification.put_file(path, message, ctx).await?;
+                    }
                 }
             }
+            _x => {
+                // TODO: should probably log & fail here instead of blindly
+                // doing something without understanding the protocol
+            }
         }
 
         // Iterate through all the blocks that the record modifies, and
@@ -1440,7 +1437,16 @@ impl<'a> WalIngest<'a> {
         // record.
         // TODO: would be nice if to be more explicit about it
         let last_lsn = modification.lsn;
-        let old_nblocks = if !self
+
+        // Get current size and put rel creation if rel doesn't exist
+        //
+        // NOTE: we check the cache first even though get_rel_exists and get_rel_size would
+        //       check the cache too. This is because eagerly checking the cache results in
+        //       less work overall and 10% better performance. It's more work on cache miss
+        //       but cache miss is rare.
+        let old_nblocks = if let Some(nblocks) = self.timeline.get_cached_rel_size(&rel, last_lsn) {
+            nblocks
+        } else if !self
             .timeline
             .get_rel_exists(rel, last_lsn, true, ctx)
             .await?
@@ -2079,4 +2085,88 @@ mod tests {
 
         Ok(())
     }
+
+    /// Replay a wal segment file taken directly from safekeepers.
+    ///
+    /// This test is useful for benchmarking since it allows us to profile only
+    /// the walingest code in a single-threaded executor, and iterate more quickly
+    /// without waiting for unrelated steps.
+    #[tokio::test]
+    async fn test_ingest_real_wal() {
+        use crate::tenant::harness::*;
+        use postgres_ffi::waldecoder::WalStreamDecoder;
+        use postgres_ffi::WAL_SEGMENT_SIZE;
+
+        // Define test data path and constants.
+        //
+        // Steps to reconstruct the data, if needed:
+        // 1. Run the pgbench python test
+        // 2. Take the first wal segment file from safekeeper
+        // 3. Compress it using `zstd --long input_file`
+        // 4. Copy initdb.tar.zst from local_fs_remote_storage
+        // 5. Grep sk logs for "restart decoder" to get startpoint
+        // 6. Run just the decoder from this test to get the endpoint.
+        //    It's the last LSN the decoder will output.
+        let pg_version = 15; // The test data was generated by pg15
+        let path = "test_data/sk_wal_segment_from_pgbench";
+        let wal_segment_path = format!("{path}/000000010000000000000001.zst");
+        let startpoint = Lsn::from_hex("14AEC08").unwrap();
+        let endpoint = Lsn::from_hex("1FFFF98").unwrap();
+
+        // Bootstrap a real timeline. We can't use create_test_timeline because
+        // it doesn't create a real checkpoint, and Walingest::new tries to parse
+        // the garbage data.
+        //
+        // TODO use the initdb.tar.zst file stored with the test data to avoid
+        //      problems with inconsistent initdb results after pg minor version bumps.
+        let (tenant, ctx) = TenantHarness::create("test_ingest_real_wal")
+            .unwrap()
+            .load()
+            .await;
+        let tline = tenant
+            .bootstrap_timeline(TIMELINE_ID, pg_version, None, &ctx)
+            .await
+            .unwrap();
+
+        // We fully read and decompress this into memory before decoding
+        // to get a more accurate perf profile of the decoder.
+        let bytes = {
+            use async_compression::tokio::bufread::ZstdDecoder;
+            let file = tokio::fs::File::open(wal_segment_path).await.unwrap();
+            let reader = tokio::io::BufReader::new(file);
+            let decoder = ZstdDecoder::new(reader);
+            let mut reader = tokio::io::BufReader::new(decoder);
+            let mut buffer = Vec::new();
+            tokio::io::copy_buf(&mut reader, &mut buffer).await.unwrap();
+            buffer
+        };
+
+        // TODO start a profiler too
+        let started_at = std::time::Instant::now();
+
+        // Initialize walingest
+        let xlogoff: usize = startpoint.segment_offset(WAL_SEGMENT_SIZE);
+        let mut decoder = WalStreamDecoder::new(startpoint, pg_version);
+        let mut walingest = WalIngest::new(tline.as_ref(), startpoint, &ctx)
+            .await
+            .unwrap();
+        let mut modification = tline.begin_modification(endpoint);
+        let mut decoded = DecodedWALRecord::default();
+        println!("decoding {} bytes", bytes.len() - xlogoff);
+
+        // Decode and ingest wal. We process the wal in chunks because
+        // that's what happens when we get bytes from safekeepers.
+        for chunk in bytes[xlogoff..].chunks(50) {
+            decoder.feed_bytes(chunk);
+            while let Some((lsn, recdata)) = decoder.poll_decode().unwrap() {
+                walingest
+                    .ingest_record(recdata, lsn, &mut modification, &mut decoded, &ctx)
+                    .await
+                    .unwrap();
+            }
+        }
+
+        let duration = started_at.elapsed();
+        println!("done in {:?}", duration);
+    }
 }
diff --git a/pageserver/src/walredo.rs b/pageserver/src/walredo.rs
index 5d8cc0e181d1..edce158e75fe 100644
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -41,10 +41,14 @@ use utils::{bin_ser::BeSer, id::TenantId, lsn::Lsn, nonblock::set_nonblock};
 #[cfg(feature = "testing")]
 use std::sync::atomic::{AtomicUsize, Ordering};
 
+#[cfg(feature = "testing")]
+use pageserver_api::shard::TenantShardId;
+
 use crate::config::PageServerConf;
 use crate::metrics::{
     WalRedoKillCause, WAL_REDO_BYTES_HISTOGRAM, WAL_REDO_PROCESS_COUNTERS,
-    WAL_REDO_RECORDS_HISTOGRAM, WAL_REDO_RECORD_COUNTER, WAL_REDO_TIME,
+    WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM, WAL_REDO_RECORDS_HISTOGRAM,
+    WAL_REDO_RECORD_COUNTER, WAL_REDO_TIME,
 };
 use crate::pgdatadir_mapping::{key_to_rel_block, key_to_slru_block};
 use crate::repository::Key;
@@ -238,10 +242,13 @@ impl PostgresRedoManager {
                         let mut proc_guard = self.redo_process.write().unwrap();
                         match &*proc_guard {
                             None => {
+                                let timer =
+                                    WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM.start_timer();
                                 let proc = Arc::new(
                                     WalRedoProcess::launch(self.conf, self.tenant_id, pg_version)
                                         .context("launch walredo process")?,
                                 );
+                                timer.observe_duration();
                                 *proc_guard = Some(Arc::clone(&proc));
                                 proc
                             }
@@ -991,7 +998,11 @@ impl WalRedoProcess {
         // these files will be collected to an allure report
         let filename = format!("walredo-{millis}-{}-{seq}.walredo", writebuf.len());
 
-        let path = self.conf.tenant_path(&self.tenant_id).join(&filename);
+        // TODO(sharding): update this call when WalRedoProcess gets a TenantShardId.
+        let path = self
+            .conf
+            .tenant_path(&TenantShardId::unsharded(self.tenant_id))
+            .join(&filename);
 
         let res = std::fs::OpenOptions::new()
             .write(true)
@@ -1182,7 +1193,7 @@ mod tests {
 
     #[tokio::test]
     async fn short_v14_redo() {
-        let expected = std::fs::read("fixtures/short_v14_redo.page").unwrap();
+        let expected = std::fs::read("test_data/short_v14_redo.page").unwrap();
 
         let h = RedoHarness::new().unwrap();
 
diff --git a/pageserver/fixtures/short_v14_redo.page b/pageserver/test_data/short_v14_redo.page
similarity index 100%
rename from pageserver/fixtures/short_v14_redo.page
rename to pageserver/test_data/short_v14_redo.page
diff --git a/pageserver/test_data/sk_wal_segment_from_pgbench/000000010000000000000001.zst b/pageserver/test_data/sk_wal_segment_from_pgbench/000000010000000000000001.zst
new file mode 100644
index 000000000000..3c478e78272c
Binary files /dev/null and b/pageserver/test_data/sk_wal_segment_from_pgbench/000000010000000000000001.zst differ
diff --git a/pageserver/test_data/sk_wal_segment_from_pgbench/initdb.tar.zst b/pageserver/test_data/sk_wal_segment_from_pgbench/initdb.tar.zst
new file mode 100644
index 000000000000..17e9c7ea0849
Binary files /dev/null and b/pageserver/test_data/sk_wal_segment_from_pgbench/initdb.tar.zst differ
diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c
index cc09fb849d59..8eb9ebb9159a 100644
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -21,6 +21,7 @@
 #include "storage/buf_internals.h"
 #include "storage/lwlock.h"
 #include "storage/ipc.h"
+#include "storage/pg_shmem.h"
 #include "c.h"
 #include "postmaster/interrupt.h"
 
@@ -87,6 +88,12 @@ bool	(*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id) =
 static bool pageserver_flush(void);
 static void pageserver_disconnect(void);
 
+static bool
+PagestoreShmemIsValid()
+{
+    return pagestore_shared && UsedShmemSegAddr;
+}
+
 static bool
 CheckPageserverConnstring(char **newval, void **extra, GucSource source)
 {
@@ -96,7 +103,7 @@ CheckPageserverConnstring(char **newval, void **extra, GucSource source)
 static void
 AssignPageserverConnstring(const char *newval, void *extra)
 {
-    if(!pagestore_shared)
+    if(!PagestoreShmemIsValid())
         return;
     LWLockAcquire(pagestore_shared->lock, LW_EXCLUSIVE);
     strlcpy(pagestore_shared->pageserver_connstring, newval, MAX_PAGESERVER_CONNSTRING_SIZE);
@@ -107,7 +114,7 @@ AssignPageserverConnstring(const char *newval, void *extra)
 static bool
 CheckConnstringUpdated()
 {
-    if(!pagestore_shared)
+    if(!PagestoreShmemIsValid())
         return false;
     return pagestore_local_counter < pg_atomic_read_u64(&pagestore_shared->update_counter);
 }
@@ -115,7 +122,7 @@ CheckConnstringUpdated()
 static void
 ReloadConnstring()
 {
-    if(!pagestore_shared)
+    if(!PagestoreShmemIsValid())
         return;
     LWLockAcquire(pagestore_shared->lock, LW_SHARED);
     strlcpy(local_pageserver_connstring, pagestore_shared->pageserver_connstring, sizeof(local_pageserver_connstring));
diff --git a/poetry.lock b/poetry.lock
index 58ab4e70f231..a85325b696d2 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,112 +1,100 @@
-# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
 
 [[package]]
 name = "aiohttp"
-version = "3.8.6"
+version = "3.9.0"
 description = "Async http client/server framework (asyncio)"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.8"
 files = [
-    {file = "aiohttp-3.8.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:41d55fc043954cddbbd82503d9cc3f4814a40bcef30b3569bc7b5e34130718c1"},
-    {file = "aiohttp-3.8.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1d84166673694841d8953f0a8d0c90e1087739d24632fe86b1a08819168b4566"},
-    {file = "aiohttp-3.8.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:253bf92b744b3170eb4c4ca2fa58f9c4b87aeb1df42f71d4e78815e6e8b73c9e"},
-    {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3fd194939b1f764d6bb05490987bfe104287bbf51b8d862261ccf66f48fb4096"},
-    {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c5f938d199a6fdbdc10bbb9447496561c3a9a565b43be564648d81e1102ac22"},
-    {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2817b2f66ca82ee699acd90e05c95e79bbf1dc986abb62b61ec8aaf851e81c93"},
-    {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0fa375b3d34e71ccccf172cab401cd94a72de7a8cc01847a7b3386204093bb47"},
-    {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9de50a199b7710fa2904be5a4a9b51af587ab24c8e540a7243ab737b45844543"},
-    {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e1d8cb0b56b3587c5c01de3bf2f600f186da7e7b5f7353d1bf26a8ddca57f965"},
-    {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8e31e9db1bee8b4f407b77fd2507337a0a80665ad7b6c749d08df595d88f1cf5"},
-    {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:7bc88fc494b1f0311d67f29fee6fd636606f4697e8cc793a2d912ac5b19aa38d"},
-    {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ec00c3305788e04bf6d29d42e504560e159ccaf0be30c09203b468a6c1ccd3b2"},
-    {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ad1407db8f2f49329729564f71685557157bfa42b48f4b93e53721a16eb813ed"},
-    {file = "aiohttp-3.8.6-cp310-cp310-win32.whl", hash = "sha256:ccc360e87341ad47c777f5723f68adbb52b37ab450c8bc3ca9ca1f3e849e5fe2"},
-    {file = "aiohttp-3.8.6-cp310-cp310-win_amd64.whl", hash = "sha256:93c15c8e48e5e7b89d5cb4613479d144fda8344e2d886cf694fd36db4cc86865"},
-    {file = "aiohttp-3.8.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e2f9cc8e5328f829f6e1fb74a0a3a939b14e67e80832975e01929e320386b34"},
-    {file = "aiohttp-3.8.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e6a00ffcc173e765e200ceefb06399ba09c06db97f401f920513a10c803604ca"},
-    {file = "aiohttp-3.8.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:41bdc2ba359032e36c0e9de5a3bd00d6fb7ea558a6ce6b70acedf0da86458321"},
-    {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14cd52ccf40006c7a6cd34a0f8663734e5363fd981807173faf3a017e202fec9"},
-    {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2d5b785c792802e7b275c420d84f3397668e9d49ab1cb52bd916b3b3ffcf09ad"},
-    {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1bed815f3dc3d915c5c1e556c397c8667826fbc1b935d95b0ad680787896a358"},
-    {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96603a562b546632441926cd1293cfcb5b69f0b4159e6077f7c7dbdfb686af4d"},
-    {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d76e8b13161a202d14c9584590c4df4d068c9567c99506497bdd67eaedf36403"},
-    {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e3f1e3f1a1751bb62b4a1b7f4e435afcdade6c17a4fd9b9d43607cebd242924a"},
-    {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:76b36b3124f0223903609944a3c8bf28a599b2cc0ce0be60b45211c8e9be97f8"},
-    {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:a2ece4af1f3c967a4390c284797ab595a9f1bc1130ef8b01828915a05a6ae684"},
-    {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:16d330b3b9db87c3883e565340d292638a878236418b23cc8b9b11a054aaa887"},
-    {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:42c89579f82e49db436b69c938ab3e1559e5a4409eb8639eb4143989bc390f2f"},
-    {file = "aiohttp-3.8.6-cp311-cp311-win32.whl", hash = "sha256:efd2fcf7e7b9d7ab16e6b7d54205beded0a9c8566cb30f09c1abe42b4e22bdcb"},
-    {file = "aiohttp-3.8.6-cp311-cp311-win_amd64.whl", hash = "sha256:3b2ab182fc28e7a81f6c70bfbd829045d9480063f5ab06f6e601a3eddbbd49a0"},
-    {file = "aiohttp-3.8.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:fdee8405931b0615220e5ddf8cd7edd8592c606a8e4ca2a00704883c396e4479"},
-    {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d25036d161c4fe2225d1abff2bd52c34ed0b1099f02c208cd34d8c05729882f0"},
-    {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5d791245a894be071d5ab04bbb4850534261a7d4fd363b094a7b9963e8cdbd31"},
-    {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0cccd1de239afa866e4ce5c789b3032442f19c261c7d8a01183fd956b1935349"},
-    {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f13f60d78224f0dace220d8ab4ef1dbc37115eeeab8c06804fec11bec2bbd07"},
-    {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8a9b5a0606faca4f6cc0d338359d6fa137104c337f489cd135bb7fbdbccb1e39"},
-    {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:13da35c9ceb847732bf5c6c5781dcf4780e14392e5d3b3c689f6d22f8e15ae31"},
-    {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:4d4cbe4ffa9d05f46a28252efc5941e0462792930caa370a6efaf491f412bc66"},
-    {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:229852e147f44da0241954fc6cb910ba074e597f06789c867cb7fb0621e0ba7a"},
-    {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:713103a8bdde61d13490adf47171a1039fd880113981e55401a0f7b42c37d071"},
-    {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:45ad816b2c8e3b60b510f30dbd37fe74fd4a772248a52bb021f6fd65dff809b6"},
-    {file = "aiohttp-3.8.6-cp36-cp36m-win32.whl", hash = "sha256:2b8d4e166e600dcfbff51919c7a3789ff6ca8b3ecce16e1d9c96d95dd569eb4c"},
-    {file = "aiohttp-3.8.6-cp36-cp36m-win_amd64.whl", hash = "sha256:0912ed87fee967940aacc5306d3aa8ba3a459fcd12add0b407081fbefc931e53"},
-    {file = "aiohttp-3.8.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e2a988a0c673c2e12084f5e6ba3392d76c75ddb8ebc6c7e9ead68248101cd446"},
-    {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebf3fd9f141700b510d4b190094db0ce37ac6361a6806c153c161dc6c041ccda"},
-    {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3161ce82ab85acd267c8f4b14aa226047a6bee1e4e6adb74b798bd42c6ae1f80"},
-    {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d95fc1bf33a9a81469aa760617b5971331cdd74370d1214f0b3109272c0e1e3c"},
-    {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c43ecfef7deaf0617cee936836518e7424ee12cb709883f2c9a1adda63cc460"},
-    {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca80e1b90a05a4f476547f904992ae81eda5c2c85c66ee4195bb8f9c5fb47f28"},
-    {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:90c72ebb7cb3a08a7f40061079817133f502a160561d0675b0a6adf231382c92"},
-    {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:bb54c54510e47a8c7c8e63454a6acc817519337b2b78606c4e840871a3e15349"},
-    {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:de6a1c9f6803b90e20869e6b99c2c18cef5cc691363954c93cb9adeb26d9f3ae"},
-    {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:a3628b6c7b880b181a3ae0a0683698513874df63783fd89de99b7b7539e3e8a8"},
-    {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:fc37e9aef10a696a5a4474802930079ccfc14d9f9c10b4662169671ff034b7df"},
-    {file = "aiohttp-3.8.6-cp37-cp37m-win32.whl", hash = "sha256:f8ef51e459eb2ad8e7a66c1d6440c808485840ad55ecc3cafefadea47d1b1ba2"},
-    {file = "aiohttp-3.8.6-cp37-cp37m-win_amd64.whl", hash = "sha256:b2fe42e523be344124c6c8ef32a011444e869dc5f883c591ed87f84339de5976"},
-    {file = "aiohttp-3.8.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:9e2ee0ac5a1f5c7dd3197de309adfb99ac4617ff02b0603fd1e65b07dc772e4b"},
-    {file = "aiohttp-3.8.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:01770d8c04bd8db568abb636c1fdd4f7140b284b8b3e0b4584f070180c1e5c62"},
-    {file = "aiohttp-3.8.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3c68330a59506254b556b99a91857428cab98b2f84061260a67865f7f52899f5"},
-    {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89341b2c19fb5eac30c341133ae2cc3544d40d9b1892749cdd25892bbc6ac951"},
-    {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:71783b0b6455ac8f34b5ec99d83e686892c50498d5d00b8e56d47f41b38fbe04"},
-    {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f628dbf3c91e12f4d6c8b3f092069567d8eb17814aebba3d7d60c149391aee3a"},
-    {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b04691bc6601ef47c88f0255043df6f570ada1a9ebef99c34bd0b72866c217ae"},
-    {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ee912f7e78287516df155f69da575a0ba33b02dd7c1d6614dbc9463f43066e3"},
-    {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9c19b26acdd08dd239e0d3669a3dddafd600902e37881f13fbd8a53943079dbc"},
-    {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:99c5ac4ad492b4a19fc132306cd57075c28446ec2ed970973bbf036bcda1bcc6"},
-    {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:f0f03211fd14a6a0aed2997d4b1c013d49fb7b50eeb9ffdf5e51f23cfe2c77fa"},
-    {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:8d399dade330c53b4106160f75f55407e9ae7505263ea86f2ccca6bfcbdb4921"},
-    {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ec4fd86658c6a8964d75426517dc01cbf840bbf32d055ce64a9e63a40fd7b771"},
-    {file = "aiohttp-3.8.6-cp38-cp38-win32.whl", hash = "sha256:33164093be11fcef3ce2571a0dccd9041c9a93fa3bde86569d7b03120d276c6f"},
-    {file = "aiohttp-3.8.6-cp38-cp38-win_amd64.whl", hash = "sha256:bdf70bfe5a1414ba9afb9d49f0c912dc524cf60141102f3a11143ba3d291870f"},
-    {file = "aiohttp-3.8.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d52d5dc7c6682b720280f9d9db41d36ebe4791622c842e258c9206232251ab2b"},
-    {file = "aiohttp-3.8.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4ac39027011414dbd3d87f7edb31680e1f430834c8cef029f11c66dad0670aa5"},
-    {file = "aiohttp-3.8.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3f5c7ce535a1d2429a634310e308fb7d718905487257060e5d4598e29dc17f0b"},
-    {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b30e963f9e0d52c28f284d554a9469af073030030cef8693106d918b2ca92f54"},
-    {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:918810ef188f84152af6b938254911055a72e0f935b5fbc4c1a4ed0b0584aed1"},
-    {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:002f23e6ea8d3dd8d149e569fd580c999232b5fbc601c48d55398fbc2e582e8c"},
-    {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4fcf3eabd3fd1a5e6092d1242295fa37d0354b2eb2077e6eb670accad78e40e1"},
-    {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:255ba9d6d5ff1a382bb9a578cd563605aa69bec845680e21c44afc2670607a95"},
-    {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d67f8baed00870aa390ea2590798766256f31dc5ed3ecc737debb6e97e2ede78"},
-    {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:86f20cee0f0a317c76573b627b954c412ea766d6ada1a9fcf1b805763ae7feeb"},
-    {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:39a312d0e991690ccc1a61f1e9e42daa519dcc34ad03eb6f826d94c1190190dd"},
-    {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:e827d48cf802de06d9c935088c2924e3c7e7533377d66b6f31ed175c1620e05e"},
-    {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bd111d7fc5591ddf377a408ed9067045259ff2770f37e2d94e6478d0f3fc0c17"},
-    {file = "aiohttp-3.8.6-cp39-cp39-win32.whl", hash = "sha256:caf486ac1e689dda3502567eb89ffe02876546599bbf915ec94b1fa424eeffd4"},
-    {file = "aiohttp-3.8.6-cp39-cp39-win_amd64.whl", hash = "sha256:3f0e27e5b733803333bb2371249f41cf42bae8884863e8e8965ec69bebe53132"},
-    {file = "aiohttp-3.8.6.tar.gz", hash = "sha256:b0cf2a4501bff9330a8a5248b4ce951851e415bdcce9dc158e76cfd55e15085c"},
+    {file = "aiohttp-3.9.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6896b8416be9ada4d22cd359d7cb98955576ce863eadad5596b7cdfbf3e17c6c"},
+    {file = "aiohttp-3.9.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1736d87dad8ef46a8ec9cddd349fa9f7bd3a064c47dd6469c0d6763d3d49a4fc"},
+    {file = "aiohttp-3.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8c9e5f4d7208cda1a2bb600e29069eecf857e6980d0ccc922ccf9d1372c16f4b"},
+    {file = "aiohttp-3.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8488519aa05e636c5997719fe543c8daf19f538f4fa044f3ce94bee608817cff"},
+    {file = "aiohttp-3.9.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5ab16c254e2312efeb799bc3c06897f65a133b38b69682bf75d1f1ee1a9c43a9"},
+    {file = "aiohttp-3.9.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7a94bde005a8f926d0fa38b88092a03dea4b4875a61fbcd9ac6f4351df1b57cd"},
+    {file = "aiohttp-3.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b777c9286b6c6a94f50ddb3a6e730deec327e9e2256cb08b5530db0f7d40fd8"},
+    {file = "aiohttp-3.9.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:571760ad7736b34d05597a1fd38cbc7d47f7b65deb722cb8e86fd827404d1f6b"},
+    {file = "aiohttp-3.9.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:deac0a32aec29608eb25d730f4bc5a261a65b6c48ded1ed861d2a1852577c932"},
+    {file = "aiohttp-3.9.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:4ee1b4152bc3190cc40ddd6a14715e3004944263ea208229ab4c297712aa3075"},
+    {file = "aiohttp-3.9.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:3607375053df58ed6f23903aa10cf3112b1240e8c799d243bbad0f7be0666986"},
+    {file = "aiohttp-3.9.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:65b0a70a25456d329a5e1426702dde67be0fb7a4ead718005ba2ca582d023a94"},
+    {file = "aiohttp-3.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5a2eb5311a37fe105aa35f62f75a078537e1a9e4e1d78c86ec9893a3c97d7a30"},
+    {file = "aiohttp-3.9.0-cp310-cp310-win32.whl", hash = "sha256:2cbc14a13fb6b42d344e4f27746a4b03a2cb0c1c3c5b932b0d6ad8881aa390e3"},
+    {file = "aiohttp-3.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:ac9669990e2016d644ba8ae4758688534aabde8dbbc81f9af129c3f5f01ca9cd"},
+    {file = "aiohttp-3.9.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f8e05f5163528962ce1d1806fce763ab893b1c5b7ace0a3538cd81a90622f844"},
+    {file = "aiohttp-3.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4afa8f71dba3a5a2e1e1282a51cba7341ae76585345c43d8f0e624882b622218"},
+    {file = "aiohttp-3.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f929f4c9b9a00f3e6cc0587abb95ab9c05681f8b14e0fe1daecfa83ea90f8318"},
+    {file = "aiohttp-3.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28185e36a78d247c55e9fbea2332d16aefa14c5276a582ce7a896231c6b1c208"},
+    {file = "aiohttp-3.9.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a486ddf57ab98b6d19ad36458b9f09e6022de0381674fe00228ca7b741aacb2f"},
+    {file = "aiohttp-3.9.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:70e851f596c00f40a2f00a46126c95c2e04e146015af05a9da3e4867cfc55911"},
+    {file = "aiohttp-3.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5b7bf8fe4d39886adc34311a233a2e01bc10eb4e842220235ed1de57541a896"},
+    {file = "aiohttp-3.9.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c67a51ea415192c2e53e4e048c78bab82d21955b4281d297f517707dc836bf3d"},
+    {file = "aiohttp-3.9.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:694df243f394629bcae2d8ed94c589a181e8ba8604159e6e45e7b22e58291113"},
+    {file = "aiohttp-3.9.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3dd8119752dd30dd7bca7d4bc2a92a59be6a003e4e5c2cf7e248b89751b8f4b7"},
+    {file = "aiohttp-3.9.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:eb6dfd52063186ac97b4caa25764cdbcdb4b10d97f5c5f66b0fa95052e744eb7"},
+    {file = "aiohttp-3.9.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:d97c3e286d0ac9af6223bc132dc4bad6540b37c8d6c0a15fe1e70fb34f9ec411"},
+    {file = "aiohttp-3.9.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:816f4db40555026e4cdda604a1088577c1fb957d02f3f1292e0221353403f192"},
+    {file = "aiohttp-3.9.0-cp311-cp311-win32.whl", hash = "sha256:3abf0551874fecf95f93b58f25ef4fc9a250669a2257753f38f8f592db85ddea"},
+    {file = "aiohttp-3.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:e18d92c3e9e22553a73e33784fcb0ed484c9874e9a3e96c16a8d6a1e74a0217b"},
+    {file = "aiohttp-3.9.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:99ae01fb13a618b9942376df77a1f50c20a281390dad3c56a6ec2942e266220d"},
+    {file = "aiohttp-3.9.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:05857848da443c8c12110d99285d499b4e84d59918a21132e45c3f0804876994"},
+    {file = "aiohttp-3.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:317719d7f824eba55857fe0729363af58e27c066c731bc62cd97bc9c3d9c7ea4"},
+    {file = "aiohttp-3.9.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1e3b3c107ccb0e537f309f719994a55621acd2c8fdf6d5ce5152aed788fb940"},
+    {file = "aiohttp-3.9.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:45820ddbb276113ead8d4907a7802adb77548087ff5465d5c554f9aa3928ae7d"},
+    {file = "aiohttp-3.9.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:05a183f1978802588711aed0dea31e697d760ce9055292db9dc1604daa9a8ded"},
+    {file = "aiohttp-3.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51a4cd44788ea0b5e6bb8fa704597af3a30be75503a7ed1098bc5b8ffdf6c982"},
+    {file = "aiohttp-3.9.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:673343fbc0c1ac44d0d2640addc56e97a052504beacd7ade0dc5e76d3a4c16e8"},
+    {file = "aiohttp-3.9.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7e8a3b79b6d186a9c99761fd4a5e8dd575a48d96021f220ac5b5fa856e5dd029"},
+    {file = "aiohttp-3.9.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:6777a390e41e78e7c45dab43a4a0196c55c3b8c30eebe017b152939372a83253"},
+    {file = "aiohttp-3.9.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7ae5f99a32c53731c93ac3075abd3e1e5cfbe72fc3eaac4c27c9dd64ba3b19fe"},
+    {file = "aiohttp-3.9.0-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:f1e4f254e9c35d8965d377e065c4a8a55d396fe87c8e7e8429bcfdeeb229bfb3"},
+    {file = "aiohttp-3.9.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:11ca808f9a6b63485059f5f6e164ef7ec826483c1212a44f268b3653c91237d8"},
+    {file = "aiohttp-3.9.0-cp312-cp312-win32.whl", hash = "sha256:de3cc86f4ea8b4c34a6e43a7306c40c1275e52bfa9748d869c6b7d54aa6dad80"},
+    {file = "aiohttp-3.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:ca4fddf84ac7d8a7d0866664936f93318ff01ee33e32381a115b19fb5a4d1202"},
+    {file = "aiohttp-3.9.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f09960b5bb1017d16c0f9e9f7fc42160a5a49fa1e87a175fd4a2b1a1833ea0af"},
+    {file = "aiohttp-3.9.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8303531e2c17b1a494ffaeba48f2da655fe932c4e9a2626c8718403c83e5dd2b"},
+    {file = "aiohttp-3.9.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4790e44f46a4aa07b64504089def5744d3b6780468c4ec3a1a36eb7f2cae9814"},
+    {file = "aiohttp-3.9.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1d7edf74a36de0e5ca50787e83a77cf352f5504eb0ffa3f07000a911ba353fb"},
+    {file = "aiohttp-3.9.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:94697c7293199c2a2551e3e3e18438b4cba293e79c6bc2319f5fd652fccb7456"},
+    {file = "aiohttp-3.9.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a1b66dbb8a7d5f50e9e2ea3804b01e766308331d0cac76eb30c563ac89c95985"},
+    {file = "aiohttp-3.9.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9623cfd9e85b76b83ef88519d98326d4731f8d71869867e47a0b979ffec61c73"},
+    {file = "aiohttp-3.9.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f32c86dc967ab8c719fd229ce71917caad13cc1e8356ee997bf02c5b368799bf"},
+    {file = "aiohttp-3.9.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f50b4663c3e0262c3a361faf440761fbef60ccdde5fe8545689a4b3a3c149fb4"},
+    {file = "aiohttp-3.9.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:dcf71c55ec853826cd70eadb2b6ac62ec577416442ca1e0a97ad875a1b3a0305"},
+    {file = "aiohttp-3.9.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:42fe4fd9f0dfcc7be4248c162d8056f1d51a04c60e53366b0098d1267c4c9da8"},
+    {file = "aiohttp-3.9.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:76a86a9989ebf82ee61e06e2bab408aec4ea367dc6da35145c3352b60a112d11"},
+    {file = "aiohttp-3.9.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f9e09a1c83521d770d170b3801eea19b89f41ccaa61d53026ed111cb6f088887"},
+    {file = "aiohttp-3.9.0-cp38-cp38-win32.whl", hash = "sha256:a00ce44c21612d185c5275c5cba4bab8d7c1590f248638b667ed8a782fa8cd6f"},
+    {file = "aiohttp-3.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:d5b9345ab92ebe6003ae11d8092ce822a0242146e6fa270889b9ba965457ca40"},
+    {file = "aiohttp-3.9.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:98d21092bf2637c5fa724a428a69e8f5955f2182bff61f8036827cf6ce1157bf"},
+    {file = "aiohttp-3.9.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:35a68cd63ca6aaef5707888f17a70c36efe62b099a4e853d33dc2e9872125be8"},
+    {file = "aiohttp-3.9.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3d7f6235c7475658acfc1769d968e07ab585c79f6ca438ddfecaa9a08006aee2"},
+    {file = "aiohttp-3.9.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db04d1de548f7a62d1dd7e7cdf7c22893ee168e22701895067a28a8ed51b3735"},
+    {file = "aiohttp-3.9.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:536b01513d67d10baf6f71c72decdf492fb7433c5f2f133e9a9087379d4b6f31"},
+    {file = "aiohttp-3.9.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87c8b0a6487e8109427ccf638580865b54e2e3db4a6e0e11c02639231b41fc0f"},
+    {file = "aiohttp-3.9.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7276fe0017664414fdc3618fca411630405f1aaf0cc3be69def650eb50441787"},
+    {file = "aiohttp-3.9.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:23170247ef89ffa842a02bbfdc425028574d9e010611659abeb24d890bc53bb8"},
+    {file = "aiohttp-3.9.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b1a2ea8252cacc7fd51df5a56d7a2bb1986ed39be9397b51a08015727dfb69bd"},
+    {file = "aiohttp-3.9.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2d71abc15ff7047412ef26bf812dfc8d0d1020d664617f4913df2df469f26b76"},
+    {file = "aiohttp-3.9.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:2d820162c8c2bdbe97d328cd4f417c955ca370027dce593345e437b2e9ffdc4d"},
+    {file = "aiohttp-3.9.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:2779f5e7c70f7b421915fd47db332c81de365678180a9f3ab404088f87ba5ff9"},
+    {file = "aiohttp-3.9.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:366bc870d7ac61726f32a489fbe3d1d8876e87506870be66b01aeb84389e967e"},
+    {file = "aiohttp-3.9.0-cp39-cp39-win32.whl", hash = "sha256:1df43596b826022b14998f0460926ce261544fedefe0d2f653e1b20f49e96454"},
+    {file = "aiohttp-3.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:9c196b30f1b1aa3363a69dd69079ae9bec96c2965c4707eaa6914ba099fb7d4f"},
+    {file = "aiohttp-3.9.0.tar.gz", hash = "sha256:09f23292d29135025e19e8ff4f0a68df078fe4ee013bca0105b2e803989de92d"},
 ]
 
 [package.dependencies]
 aiosignal = ">=1.1.2"
-async-timeout = ">=4.0.0a3,<5.0"
+async-timeout = {version = ">=4.0,<5.0", markers = "python_version < \"3.11\""}
 attrs = ">=17.3.0"
-charset-normalizer = ">=2.0,<4.0"
 frozenlist = ">=1.1.1"
 multidict = ">=4.5,<7.0"
 yarl = ">=1.0,<2.0"
 
 [package.extras]
-speedups = ["Brotli", "aiodns", "cchardet"]
+speedups = ["Brotli", "aiodns", "brotlicffi"]
 
 [[package]]
 name = "aiopg"
@@ -887,34 +875,34 @@ files = [
 
 [[package]]
 name = "cryptography"
-version = "41.0.4"
+version = "41.0.6"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "cryptography-41.0.4-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:80907d3faa55dc5434a16579952ac6da800935cd98d14dbd62f6f042c7f5e839"},
-    {file = "cryptography-41.0.4-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:35c00f637cd0b9d5b6c6bd11b6c3359194a8eba9c46d4e875a3660e3b400005f"},
-    {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cecfefa17042941f94ab54f769c8ce0fe14beff2694e9ac684176a2535bf9714"},
-    {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e40211b4923ba5a6dc9769eab704bdb3fbb58d56c5b336d30996c24fcf12aadb"},
-    {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:23a25c09dfd0d9f28da2352503b23e086f8e78096b9fd585d1d14eca01613e13"},
-    {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2ed09183922d66c4ec5fdaa59b4d14e105c084dd0febd27452de8f6f74704143"},
-    {file = "cryptography-41.0.4-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:5a0f09cefded00e648a127048119f77bc2b2ec61e736660b5789e638f43cc397"},
-    {file = "cryptography-41.0.4-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:9eeb77214afae972a00dee47382d2591abe77bdae166bda672fb1e24702a3860"},
-    {file = "cryptography-41.0.4-cp37-abi3-win32.whl", hash = "sha256:3b224890962a2d7b57cf5eeb16ccaafba6083f7b811829f00476309bce2fe0fd"},
-    {file = "cryptography-41.0.4-cp37-abi3-win_amd64.whl", hash = "sha256:c880eba5175f4307129784eca96f4e70b88e57aa3f680aeba3bab0e980b0f37d"},
-    {file = "cryptography-41.0.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:004b6ccc95943f6a9ad3142cfabcc769d7ee38a3f60fb0dddbfb431f818c3a67"},
-    {file = "cryptography-41.0.4-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:86defa8d248c3fa029da68ce61fe735432b047e32179883bdb1e79ed9bb8195e"},
-    {file = "cryptography-41.0.4-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:37480760ae08065437e6573d14be973112c9e6dcaf5f11d00147ee74f37a3829"},
-    {file = "cryptography-41.0.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:b5f4dfe950ff0479f1f00eda09c18798d4f49b98f4e2006d644b3301682ebdca"},
-    {file = "cryptography-41.0.4-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7e53db173370dea832190870e975a1e09c86a879b613948f09eb49324218c14d"},
-    {file = "cryptography-41.0.4-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5b72205a360f3b6176485a333256b9bcd48700fc755fef51c8e7e67c4b63e3ac"},
-    {file = "cryptography-41.0.4-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:93530900d14c37a46ce3d6c9e6fd35dbe5f5601bf6b3a5c325c7bffc030344d9"},
-    {file = "cryptography-41.0.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:efc8ad4e6fc4f1752ebfb58aefece8b4e3c4cae940b0994d43649bdfce8d0d4f"},
-    {file = "cryptography-41.0.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c3391bd8e6de35f6f1140e50aaeb3e2b3d6a9012536ca23ab0d9c35ec18c8a91"},
-    {file = "cryptography-41.0.4-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:0d9409894f495d465fe6fda92cb70e8323e9648af912d5b9141d616df40a87b8"},
-    {file = "cryptography-41.0.4-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8ac4f9ead4bbd0bc8ab2d318f97d85147167a488be0e08814a37eb2f439d5cf6"},
-    {file = "cryptography-41.0.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:047c4603aeb4bbd8db2756e38f5b8bd7e94318c047cfe4efeb5d715e08b49311"},
-    {file = "cryptography-41.0.4.tar.gz", hash = "sha256:7febc3094125fc126a7f6fb1f420d0da639f3f32cb15c8ff0dc3997c4549f51a"},
+    {file = "cryptography-41.0.6-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:0f27acb55a4e77b9be8d550d762b0513ef3fc658cd3eb15110ebbcbd626db12c"},
+    {file = "cryptography-41.0.6-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:ae236bb8760c1e55b7a39b6d4d32d2279bc6c7c8500b7d5a13b6fb9fc97be35b"},
+    {file = "cryptography-41.0.6-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afda76d84b053923c27ede5edc1ed7d53e3c9f475ebaf63c68e69f1403c405a8"},
+    {file = "cryptography-41.0.6-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da46e2b5df770070412c46f87bac0849b8d685c5f2679771de277a422c7d0b86"},
+    {file = "cryptography-41.0.6-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ff369dd19e8fe0528b02e8df9f2aeb2479f89b1270d90f96a63500afe9af5cae"},
+    {file = "cryptography-41.0.6-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:b648fe2a45e426aaee684ddca2632f62ec4613ef362f4d681a9a6283d10e079d"},
+    {file = "cryptography-41.0.6-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:5daeb18e7886a358064a68dbcaf441c036cbdb7da52ae744e7b9207b04d3908c"},
+    {file = "cryptography-41.0.6-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:068bc551698c234742c40049e46840843f3d98ad7ce265fd2bd4ec0d11306596"},
+    {file = "cryptography-41.0.6-cp37-abi3-win32.whl", hash = "sha256:2132d5865eea673fe6712c2ed5fb4fa49dba10768bb4cc798345748380ee3660"},
+    {file = "cryptography-41.0.6-cp37-abi3-win_amd64.whl", hash = "sha256:48783b7e2bef51224020efb61b42704207dde583d7e371ef8fc2a5fb6c0aabc7"},
+    {file = "cryptography-41.0.6-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:8efb2af8d4ba9dbc9c9dd8f04d19a7abb5b49eab1f3694e7b5a16a5fc2856f5c"},
+    {file = "cryptography-41.0.6-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c5a550dc7a3b50b116323e3d376241829fd326ac47bc195e04eb33a8170902a9"},
+    {file = "cryptography-41.0.6-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:85abd057699b98fce40b41737afb234fef05c67e116f6f3650782c10862c43da"},
+    {file = "cryptography-41.0.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f39812f70fc5c71a15aa3c97b2bbe213c3f2a460b79bd21c40d033bb34a9bf36"},
+    {file = "cryptography-41.0.6-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:742ae5e9a2310e9dade7932f9576606836ed174da3c7d26bc3d3ab4bd49b9f65"},
+    {file = "cryptography-41.0.6-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:35f3f288e83c3f6f10752467c48919a7a94b7d88cc00b0668372a0d2ad4f8ead"},
+    {file = "cryptography-41.0.6-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4d03186af98b1c01a4eda396b137f29e4e3fb0173e30f885e27acec8823c1b09"},
+    {file = "cryptography-41.0.6-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:b27a7fd4229abef715e064269d98a7e2909ebf92eb6912a9603c7e14c181928c"},
+    {file = "cryptography-41.0.6-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:398ae1fc711b5eb78e977daa3cbf47cec20f2c08c5da129b7a296055fbb22aed"},
+    {file = "cryptography-41.0.6-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:7e00fb556bda398b99b0da289ce7053639d33b572847181d6483ad89835115f6"},
+    {file = "cryptography-41.0.6-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:60e746b11b937911dc70d164060d28d273e31853bb359e2b2033c9e93e6f3c43"},
+    {file = "cryptography-41.0.6-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:3288acccef021e3c3c10d58933f44e8602cf04dba96d9796d70d537bb2f4bbc4"},
+    {file = "cryptography-41.0.6.tar.gz", hash = "sha256:422e3e31d63743855e43e5a6fcc8b4acab860f560f9321b0ee6269cc7ed70cc3"},
 ]
 
 [package.dependencies]
@@ -1979,18 +1967,18 @@ pytest = [
 
 [[package]]
 name = "pytest-rerunfailures"
-version = "11.1.2"
+version = "13.0"
 description = "pytest plugin to re-run tests to eliminate flaky failures"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "pytest-rerunfailures-11.1.2.tar.gz", hash = "sha256:55611661e873f1cafa384c82f08d07883954f4b76435f4b8a5b470c1954573de"},
-    {file = "pytest_rerunfailures-11.1.2-py3-none-any.whl", hash = "sha256:d21fe2e46d9774f8ad95f1aa799544ae95cac3a223477af94aa985adfae92b7e"},
+    {file = "pytest-rerunfailures-13.0.tar.gz", hash = "sha256:e132dbe420bc476f544b96e7036edd0a69707574209b6677263c950d19b09199"},
+    {file = "pytest_rerunfailures-13.0-py3-none-any.whl", hash = "sha256:34919cb3fcb1f8e5d4b940aa75ccdea9661bade925091873b7c6fa5548333069"},
 ]
 
 [package.dependencies]
 packaging = ">=17.1"
-pytest = ">=5.3"
+pytest = ">=7"
 
 [[package]]
 name = "pytest-split"
@@ -2488,16 +2476,6 @@ files = [
     {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"},
     {file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"},
     {file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"},
-    {file = "wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55"},
-    {file = "wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be"},
-    {file = "wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204"},
-    {file = "wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224"},
     {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"},
     {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"},
     {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"},
@@ -2719,4 +2697,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "0834e5cb69e5457741d4f476c3e49a4dc83598b5730685c8755da651b96ad3ec"
+content-hash = "9f33b4404dbb9803ede5785469241dde1d09132427b87db8928bdbc37ccd6b7a"
diff --git a/proxy/Cargo.toml b/proxy/Cargo.toml
index 0ec7efd3167a..48c8604d86fa 100644
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -24,6 +24,7 @@ hostname.workspace = true
 humantime.workspace = true
 hyper-tungstenite.workspace = true
 hyper.workspace = true
+ipnet.workspace = true
 itertools.workspace = true
 md5.workspace = true
 metrics.workspace = true
@@ -68,6 +69,7 @@ webpki-roots.workspace = true
 x509-parser.workspace = true
 native-tls.workspace = true
 postgres-native-tls.workspace = true
+smol_str.workspace = true
 
 workspace_hack.workspace = true
 tokio-util.workspace = true
@@ -76,3 +78,4 @@ tokio-util.workspace = true
 rcgen.workspace = true
 rstest.workspace = true
 tokio-postgres-rustls.workspace = true
+postgres-protocol.workspace = true
diff --git a/proxy/src/auth.rs b/proxy/src/auth.rs
index 58dceb3bb6e1..7d79d3404525 100644
--- a/proxy/src/auth.rs
+++ b/proxy/src/auth.rs
@@ -4,7 +4,7 @@ pub mod backend;
 pub use backend::BackendType;
 
 mod credentials;
-pub use credentials::ClientCredentials;
+pub use credentials::{check_peer_addr_is_in_list, ClientCredentials};
 
 mod password_hack;
 pub use password_hack::parse_endpoint_param;
@@ -56,6 +56,12 @@ pub enum AuthErrorImpl {
     /// Errors produced by e.g. [`crate::stream::PqStream`].
     #[error(transparent)]
     Io(#[from] io::Error),
+
+    #[error(
+        "This IP address is not allowed to connect to this endpoint. \
+        Please add it to the allowed list in the Neon console."
+    )]
+    IpAddressNotAllowed,
 }
 
 #[derive(Debug, Error)]
@@ -70,6 +76,10 @@ impl AuthError {
     pub fn auth_failed(user: impl Into<Box<str>>) -> Self {
         AuthErrorImpl::AuthFailed(user.into()).into()
     }
+
+    pub fn ip_address_not_allowed() -> Self {
+        AuthErrorImpl::IpAddressNotAllowed.into()
+    }
 }
 
 impl<E: Into<AuthErrorImpl>> From<E> for AuthError {
@@ -91,6 +101,7 @@ impl UserFacingError for AuthError {
             MalformedPassword(_) => self.to_string(),
             MissingEndpointName => self.to_string(),
             Io(_) => "Internal error".to_string(),
+            IpAddressNotAllowed => self.to_string(),
         }
     }
 }
diff --git a/proxy/src/auth/backend.rs b/proxy/src/auth/backend.rs
index 9cf45c0eec30..aa872285b169 100644
--- a/proxy/src/auth/backend.rs
+++ b/proxy/src/auth/backend.rs
@@ -5,7 +5,13 @@ mod link;
 pub use link::LinkAuthError;
 use tokio_postgres::config::AuthKeys;
 
+use crate::auth::credentials::check_peer_addr_is_in_list;
+use crate::console::errors::GetAuthInfoError;
+use crate::console::provider::AuthInfo;
+use crate::console::AuthSecret;
 use crate::proxy::{handle_try_wake, retry_after, LatencyTimer};
+use crate::scram;
+use crate::stream::Stream;
 use crate::{
     auth::{self, ClientCredentials},
     config::AuthenticationConfig,
@@ -19,6 +25,7 @@ use crate::{
 use futures::TryFutureExt;
 use std::borrow::Cow;
 use std::ops::ControlFlow;
+use std::sync::Arc;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::{error, info, warn};
 
@@ -63,6 +70,7 @@ pub enum BackendType<'a, T> {
 
 pub trait TestBackend: Send + Sync + 'static {
     fn wake_compute(&self) -> Result<CachedNodeInfo, console::errors::WakeComputeError>;
+    fn get_allowed_ips(&self) -> Result<Arc<Vec<String>>, console::errors::GetAuthInfoError>;
 }
 
 impl std::fmt::Display for BackendType<'_, ()> {
@@ -131,7 +139,7 @@ async fn auth_quirks_creds(
     api: &impl console::Api,
     extra: &ConsoleReqExtra<'_>,
     creds: &mut ClientCredentials<'_>,
-    client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin>,
+    client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
     allow_cleartext: bool,
     config: &'static AuthenticationConfig,
     latency_timer: &mut LatencyTimer,
@@ -139,14 +147,38 @@ async fn auth_quirks_creds(
     // If there's no project so far, that entails that client doesn't
     // support SNI or other means of passing the endpoint (project) name.
     // We now expect to see a very specific payload in the place of password.
-    if creds.project.is_none() {
+    let maybe_success = if creds.project.is_none() {
         // Password will be checked by the compute node later.
-        return hacks::password_hack(creds, client, latency_timer).await;
-    }
+        Some(hacks::password_hack(creds, client, latency_timer).await?)
+    } else {
+        None
+    };
 
     // Password hack should set the project name.
     // TODO: make `creds.project` more type-safe.
     assert!(creds.project.is_some());
+    info!("fetching user's authentication info");
+    // TODO(anna): this will slow down both "hacks" below; we probably need a cache.
+    let AuthInfo {
+        secret,
+        allowed_ips,
+    } = api.get_auth_info(extra, creds).await?;
+
+    // check allowed list
+    if !check_peer_addr_is_in_list(&creds.peer_addr.ip(), &allowed_ips) {
+        return Err(auth::AuthError::ip_address_not_allowed());
+    }
+    let secret = secret.unwrap_or_else(|| {
+        // If we don't have an authentication secret, we mock one to
+        // prevent malicious probing (possible due to missing protocol steps).
+        // This mocked secret will never lead to successful authentication.
+        info!("authentication info not found, mocking it");
+        AuthSecret::Scram(scram::ServerSecret::mock(creds.user, rand::random()))
+    });
+
+    if let Some(success) = maybe_success {
+        return Ok(success);
+    }
 
     // Perform cleartext auth if we're allowed to do that.
     // Currently, we use it for websocket connections (latency).
@@ -156,7 +188,7 @@ async fn auth_quirks_creds(
     }
 
     // Finally, proceed with the main auth flow (SCRAM-based).
-    classic::authenticate(api, extra, creds, client, config, latency_timer).await
+    classic::authenticate(creds, client, config, latency_timer, secret).await
 }
 
 /// True to its name, this function encapsulates our current auth trade-offs.
@@ -165,7 +197,7 @@ async fn auth_quirks(
     api: &impl console::Api,
     extra: &ConsoleReqExtra<'_>,
     creds: &mut ClientCredentials<'_>,
-    client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin>,
+    client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
     allow_cleartext: bool,
     config: &'static AuthenticationConfig,
     latency_timer: &mut LatencyTimer,
@@ -241,7 +273,7 @@ impl BackendType<'_, ClientCredentials<'_>> {
     pub async fn authenticate(
         &mut self,
         extra: &ConsoleReqExtra<'_>,
-        client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin>,
+        client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
         allow_cleartext: bool,
         config: &'static AuthenticationConfig,
         latency_timer: &mut LatencyTimer,
@@ -304,6 +336,19 @@ impl BackendType<'_, ClientCredentials<'_>> {
         Ok(res)
     }
 
+    pub async fn get_allowed_ips(
+        &self,
+        extra: &ConsoleReqExtra<'_>,
+    ) -> Result<Arc<Vec<String>>, GetAuthInfoError> {
+        use BackendType::*;
+        match self {
+            Console(api, creds) => api.get_allowed_ips(extra, creds).await,
+            Postgres(api, creds) => api.get_allowed_ips(extra, creds).await,
+            Link(_) => Ok(Arc::new(vec![])),
+            Test(x) => x.get_allowed_ips(),
+        }
+    }
+
     /// When applicable, wake the compute node, gaining its connection info in the process.
     /// The link auth flow doesn't support this, so we return [`None`] in that case.
     pub async fn wake_compute(
diff --git a/proxy/src/auth/backend/classic.rs b/proxy/src/auth/backend/classic.rs
index aee00576062d..bb210821cd9a 100644
--- a/proxy/src/auth/backend/classic.rs
+++ b/proxy/src/auth/backend/classic.rs
@@ -3,38 +3,28 @@ use crate::{
     auth::{self, AuthFlow, ClientCredentials},
     compute,
     config::AuthenticationConfig,
-    console::{self, AuthInfo, ConsoleReqExtra},
+    console::AuthSecret,
     proxy::LatencyTimer,
-    sasl, scram,
-    stream::PqStream,
+    sasl,
+    stream::{PqStream, Stream},
 };
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::{info, warn};
 
 pub(super) async fn authenticate(
-    api: &impl console::Api,
-    extra: &ConsoleReqExtra<'_>,
     creds: &ClientCredentials<'_>,
-    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
+    client: &mut PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
     config: &'static AuthenticationConfig,
     latency_timer: &mut LatencyTimer,
+    secret: AuthSecret,
 ) -> auth::Result<AuthSuccess<ComputeCredentials>> {
-    info!("fetching user's authentication info");
-    let info = api.get_auth_info(extra, creds).await?.unwrap_or_else(|| {
-        // If we don't have an authentication secret, we mock one to
-        // prevent malicious probing (possible due to missing protocol steps).
-        // This mocked secret will never lead to successful authentication.
-        info!("authentication info not found, mocking it");
-        AuthInfo::Scram(scram::ServerSecret::mock(creds.user, rand::random()))
-    });
-
     let flow = AuthFlow::new(client);
-    let scram_keys = match info {
-        AuthInfo::Md5(_) => {
+    let scram_keys = match secret {
+        AuthSecret::Md5(_) => {
             info!("auth endpoint chooses MD5");
             return Err(auth::AuthError::bad_auth_method("MD5"));
         }
-        AuthInfo::Scram(secret) => {
+        AuthSecret::Scram(secret) => {
             info!("auth endpoint chooses SCRAM");
             let scram = auth::Scram(&secret);
 
diff --git a/proxy/src/auth/backend/hacks.rs b/proxy/src/auth/backend/hacks.rs
index 895683af1b2d..4448dbc56aaf 100644
--- a/proxy/src/auth/backend/hacks.rs
+++ b/proxy/src/auth/backend/hacks.rs
@@ -2,7 +2,7 @@ use super::{AuthSuccess, ComputeCredentials};
 use crate::{
     auth::{self, AuthFlow, ClientCredentials},
     proxy::LatencyTimer,
-    stream,
+    stream::{self, Stream},
 };
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::{info, warn};
@@ -12,7 +12,7 @@ use tracing::{info, warn};
 /// These properties are benefical for serverless JS workers, so we
 /// use this mechanism for websocket connections.
 pub async fn cleartext_hack(
-    client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin>,
+    client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
     latency_timer: &mut LatencyTimer,
 ) -> auth::Result<AuthSuccess<ComputeCredentials>> {
     warn!("cleartext auth flow override is enabled, proceeding");
@@ -37,7 +37,7 @@ pub async fn cleartext_hack(
 /// Very similar to [`cleartext_hack`], but there's a specific password format.
 pub async fn password_hack(
     creds: &mut ClientCredentials<'_>,
-    client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin>,
+    client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
     latency_timer: &mut LatencyTimer,
 ) -> auth::Result<AuthSuccess<ComputeCredentials>> {
     warn!("project not specified, resorting to the password hack auth flow");
diff --git a/proxy/src/auth/backend/link.rs b/proxy/src/auth/backend/link.rs
index da43cf11c403..3a77d7e5cae2 100644
--- a/proxy/src/auth/backend/link.rs
+++ b/proxy/src/auth/backend/link.rs
@@ -106,7 +106,7 @@ pub(super) async fn authenticate(
         reported_auth_ok: true,
         value: NodeInfo {
             config,
-            aux: db_info.aux.into(),
+            aux: db_info.aux,
             allow_self_signed_compute: false, // caller may override
         },
     })
diff --git a/proxy/src/auth/credentials.rs b/proxy/src/auth/credentials.rs
index 9fe9c26f0c27..facb8da8cd2d 100644
--- a/proxy/src/auth/credentials.rs
+++ b/proxy/src/auth/credentials.rs
@@ -7,9 +7,12 @@ use crate::{
 };
 use itertools::Itertools;
 use pq_proto::StartupMessageParams;
-use std::collections::HashSet;
+use std::{
+    collections::HashSet,
+    net::{IpAddr, SocketAddr},
+};
 use thiserror::Error;
-use tracing::info;
+use tracing::{info, warn};
 
 #[derive(Debug, Error, PartialEq, Eq, Clone)]
 pub enum ClientCredsParseError {
@@ -44,6 +47,7 @@ pub struct ClientCredentials<'a> {
     pub project: Option<String>,
 
     pub cache_key: String,
+    pub peer_addr: SocketAddr,
 }
 
 impl ClientCredentials<'_> {
@@ -54,19 +58,11 @@ impl ClientCredentials<'_> {
 }
 
 impl<'a> ClientCredentials<'a> {
-    #[cfg(test)]
-    pub fn new_noop() -> Self {
-        ClientCredentials {
-            user: "",
-            project: None,
-            cache_key: "".to_string(),
-        }
-    }
-
     pub fn parse(
         params: &'a StartupMessageParams,
         sni: Option<&str>,
         common_names: Option<HashSet<String>>,
+        peer_addr: SocketAddr,
     ) -> Result<Self, ClientCredsParseError> {
         use ClientCredsParseError::*;
 
@@ -153,10 +149,59 @@ impl<'a> ClientCredentials<'a> {
             user,
             project,
             cache_key,
+            peer_addr,
         })
     }
 }
 
+pub fn check_peer_addr_is_in_list(peer_addr: &IpAddr, ip_list: &Vec<String>) -> bool {
+    if ip_list.is_empty() {
+        return true;
+    }
+    for ip in ip_list {
+        // We expect that all ip addresses from control plane are correct.
+        // However, if some of them are broken, we still can check the others.
+        match parse_ip_pattern(ip) {
+            Ok(pattern) => {
+                if check_ip(peer_addr, &pattern) {
+                    return true;
+                }
+            }
+            Err(err) => warn!("Cannot parse ip: {}; err: {}", ip, err),
+        }
+    }
+    false
+}
+
+#[derive(Debug, Clone, Eq, PartialEq)]
+enum IpPattern {
+    Subnet(ipnet::IpNet),
+    Range(IpAddr, IpAddr),
+    Single(IpAddr),
+}
+
+fn parse_ip_pattern(pattern: &str) -> anyhow::Result<IpPattern> {
+    if pattern.contains('/') {
+        let subnet: ipnet::IpNet = pattern.parse()?;
+        return Ok(IpPattern::Subnet(subnet));
+    }
+    if let Some((start, end)) = pattern.split_once('-') {
+        let start: IpAddr = start.parse()?;
+        let end: IpAddr = end.parse()?;
+        return Ok(IpPattern::Range(start, end));
+    }
+    let addr: IpAddr = pattern.parse()?;
+    Ok(IpPattern::Single(addr))
+}
+
+fn check_ip(ip: &IpAddr, pattern: &IpPattern) -> bool {
+    match pattern {
+        IpPattern::Subnet(subnet) => subnet.contains(ip),
+        IpPattern::Range(start, end) => start <= ip && ip <= end,
+        IpPattern::Single(addr) => addr == ip,
+    }
+}
+
 fn project_name_valid(name: &str) -> bool {
     name.chars().all(|c| c.is_alphanumeric() || c == '-')
 }
@@ -176,8 +221,8 @@ mod tests {
     fn parse_bare_minimum() -> anyhow::Result<()> {
         // According to postgresql, only `user` should be required.
         let options = StartupMessageParams::new([("user", "john_doe")]);
-
-        let creds = ClientCredentials::parse(&options, None, None)?;
+        let peer_addr = SocketAddr::from(([127, 0, 0, 1], 1234));
+        let creds = ClientCredentials::parse(&options, None, None, peer_addr)?;
         assert_eq!(creds.user, "john_doe");
         assert_eq!(creds.project, None);
 
@@ -191,8 +236,8 @@ mod tests {
             ("database", "world"), // should be ignored
             ("foo", "bar"),        // should be ignored
         ]);
-
-        let creds = ClientCredentials::parse(&options, None, None)?;
+        let peer_addr = SocketAddr::from(([127, 0, 0, 1], 1234));
+        let creds = ClientCredentials::parse(&options, None, None, peer_addr)?;
         assert_eq!(creds.user, "john_doe");
         assert_eq!(creds.project, None);
 
@@ -206,7 +251,8 @@ mod tests {
         let sni = Some("foo.localhost");
         let common_names = Some(["localhost".into()].into());
 
-        let creds = ClientCredentials::parse(&options, sni, common_names)?;
+        let peer_addr = SocketAddr::from(([127, 0, 0, 1], 1234));
+        let creds = ClientCredentials::parse(&options, sni, common_names, peer_addr)?;
         assert_eq!(creds.user, "john_doe");
         assert_eq!(creds.project.as_deref(), Some("foo"));
         assert_eq!(creds.cache_key, "foo");
@@ -221,7 +267,8 @@ mod tests {
             ("options", "-ckey=1 project=bar -c geqo=off"),
         ]);
 
-        let creds = ClientCredentials::parse(&options, None, None)?;
+        let peer_addr = SocketAddr::from(([127, 0, 0, 1], 1234));
+        let creds = ClientCredentials::parse(&options, None, None, peer_addr)?;
         assert_eq!(creds.user, "john_doe");
         assert_eq!(creds.project.as_deref(), Some("bar"));
 
@@ -235,7 +282,8 @@ mod tests {
             ("options", "-ckey=1 endpoint=bar -c geqo=off"),
         ]);
 
-        let creds = ClientCredentials::parse(&options, None, None)?;
+        let peer_addr = SocketAddr::from(([127, 0, 0, 1], 1234));
+        let creds = ClientCredentials::parse(&options, None, None, peer_addr)?;
         assert_eq!(creds.user, "john_doe");
         assert_eq!(creds.project.as_deref(), Some("bar"));
 
@@ -252,7 +300,8 @@ mod tests {
             ),
         ]);
 
-        let creds = ClientCredentials::parse(&options, None, None)?;
+        let peer_addr = SocketAddr::from(([127, 0, 0, 1], 1234));
+        let creds = ClientCredentials::parse(&options, None, None, peer_addr)?;
         assert_eq!(creds.user, "john_doe");
         assert!(creds.project.is_none());
 
@@ -266,7 +315,8 @@ mod tests {
             ("options", "-ckey=1 endpoint=bar project=foo -c geqo=off"),
         ]);
 
-        let creds = ClientCredentials::parse(&options, None, None)?;
+        let peer_addr = SocketAddr::from(([127, 0, 0, 1], 1234));
+        let creds = ClientCredentials::parse(&options, None, None, peer_addr)?;
         assert_eq!(creds.user, "john_doe");
         assert!(creds.project.is_none());
 
@@ -280,7 +330,8 @@ mod tests {
         let sni = Some("baz.localhost");
         let common_names = Some(["localhost".into()].into());
 
-        let creds = ClientCredentials::parse(&options, sni, common_names)?;
+        let peer_addr = SocketAddr::from(([127, 0, 0, 1], 1234));
+        let creds = ClientCredentials::parse(&options, sni, common_names, peer_addr)?;
         assert_eq!(creds.user, "john_doe");
         assert_eq!(creds.project.as_deref(), Some("baz"));
 
@@ -293,12 +344,14 @@ mod tests {
 
         let common_names = Some(["a.com".into(), "b.com".into()].into());
         let sni = Some("p1.a.com");
-        let creds = ClientCredentials::parse(&options, sni, common_names)?;
+        let peer_addr = SocketAddr::from(([127, 0, 0, 1], 1234));
+        let creds = ClientCredentials::parse(&options, sni, common_names, peer_addr)?;
         assert_eq!(creds.project.as_deref(), Some("p1"));
 
         let common_names = Some(["a.com".into(), "b.com".into()].into());
         let sni = Some("p1.b.com");
-        let creds = ClientCredentials::parse(&options, sni, common_names)?;
+        let peer_addr = SocketAddr::from(([127, 0, 0, 1], 1234));
+        let creds = ClientCredentials::parse(&options, sni, common_names, peer_addr)?;
         assert_eq!(creds.project.as_deref(), Some("p1"));
 
         Ok(())
@@ -312,7 +365,9 @@ mod tests {
         let sni = Some("second.localhost");
         let common_names = Some(["localhost".into()].into());
 
-        let err = ClientCredentials::parse(&options, sni, common_names).expect_err("should fail");
+        let peer_addr = SocketAddr::from(([127, 0, 0, 1], 1234));
+        let err = ClientCredentials::parse(&options, sni, common_names, peer_addr)
+            .expect_err("should fail");
         match err {
             InconsistentProjectNames { domain, option } => {
                 assert_eq!(option, "first");
@@ -329,7 +384,9 @@ mod tests {
         let sni = Some("project.localhost");
         let common_names = Some(["example.com".into()].into());
 
-        let err = ClientCredentials::parse(&options, sni, common_names).expect_err("should fail");
+        let peer_addr = SocketAddr::from(([127, 0, 0, 1], 1234));
+        let err = ClientCredentials::parse(&options, sni, common_names, peer_addr)
+            .expect_err("should fail");
         match err {
             UnknownCommonName { cn } => {
                 assert_eq!(cn, "localhost");
@@ -347,7 +404,8 @@ mod tests {
 
         let sni = Some("project.localhost");
         let common_names = Some(["localhost".into()].into());
-        let creds = ClientCredentials::parse(&options, sni, common_names)?;
+        let peer_addr = SocketAddr::from(([127, 0, 0, 1], 1234));
+        let creds = ClientCredentials::parse(&options, sni, common_names, peer_addr)?;
         assert_eq!(creds.project.as_deref(), Some("project"));
         assert_eq!(
             creds.cache_key,
@@ -356,4 +414,91 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_check_peer_addr_is_in_list() {
+        let peer_addr = IpAddr::from([127, 0, 0, 1]);
+        assert!(check_peer_addr_is_in_list(&peer_addr, &vec![]));
+        assert!(check_peer_addr_is_in_list(
+            &peer_addr,
+            &vec!["127.0.0.1".into()]
+        ));
+        assert!(!check_peer_addr_is_in_list(
+            &peer_addr,
+            &vec!["8.8.8.8".into()]
+        ));
+        // If there is an incorrect address, it will be skipped.
+        assert!(check_peer_addr_is_in_list(
+            &peer_addr,
+            &vec!["88.8.8".into(), "127.0.0.1".into()]
+        ));
+    }
+    #[test]
+    fn test_parse_ip_v4() -> anyhow::Result<()> {
+        let peer_addr = IpAddr::from([127, 0, 0, 1]);
+        // Ok
+        assert_eq!(parse_ip_pattern("127.0.0.1")?, IpPattern::Single(peer_addr));
+        assert_eq!(
+            parse_ip_pattern("127.0.0.1/31")?,
+            IpPattern::Subnet(ipnet::IpNet::new(peer_addr, 31)?)
+        );
+        assert_eq!(
+            parse_ip_pattern("0.0.0.0-200.0.1.2")?,
+            IpPattern::Range(IpAddr::from([0, 0, 0, 0]), IpAddr::from([200, 0, 1, 2]))
+        );
+
+        // Error
+        assert!(parse_ip_pattern("300.0.1.2").is_err());
+        assert!(parse_ip_pattern("30.1.2").is_err());
+        assert!(parse_ip_pattern("127.0.0.1/33").is_err());
+        assert!(parse_ip_pattern("127.0.0.1-127.0.3").is_err());
+        assert!(parse_ip_pattern("1234.0.0.1-127.0.3.0").is_err());
+        Ok(())
+    }
+
+    #[test]
+    fn test_check_ipv4() -> anyhow::Result<()> {
+        let peer_addr = IpAddr::from([127, 0, 0, 1]);
+        let peer_addr_next = IpAddr::from([127, 0, 0, 2]);
+        let peer_addr_prev = IpAddr::from([127, 0, 0, 0]);
+        // Success
+        assert!(check_ip(&peer_addr, &IpPattern::Single(peer_addr)));
+        assert!(check_ip(
+            &peer_addr,
+            &IpPattern::Subnet(ipnet::IpNet::new(peer_addr_prev, 31)?)
+        ));
+        assert!(check_ip(
+            &peer_addr,
+            &IpPattern::Subnet(ipnet::IpNet::new(peer_addr_next, 30)?)
+        ));
+        assert!(check_ip(
+            &peer_addr,
+            &IpPattern::Range(IpAddr::from([0, 0, 0, 0]), IpAddr::from([200, 0, 1, 2]))
+        ));
+        assert!(check_ip(
+            &peer_addr,
+            &IpPattern::Range(peer_addr, peer_addr)
+        ));
+
+        // Not success
+        assert!(!check_ip(&peer_addr, &IpPattern::Single(peer_addr_prev)));
+        assert!(!check_ip(
+            &peer_addr,
+            &IpPattern::Subnet(ipnet::IpNet::new(peer_addr_next, 31)?)
+        ));
+        assert!(!check_ip(
+            &peer_addr,
+            &IpPattern::Range(IpAddr::from([0, 0, 0, 0]), peer_addr_prev)
+        ));
+        assert!(!check_ip(
+            &peer_addr,
+            &IpPattern::Range(peer_addr_next, IpAddr::from([128, 0, 0, 0]))
+        ));
+        // There is no check that for range start <= end. But it's fine as long as for all this cases the result is false.
+        assert!(!check_ip(
+            &peer_addr,
+            &IpPattern::Range(peer_addr, peer_addr_prev)
+        ));
+        Ok(())
+    }
 }
diff --git a/proxy/src/auth/flow.rs b/proxy/src/auth/flow.rs
index 190abc9b2e0d..efb90733d6fe 100644
--- a/proxy/src/auth/flow.rs
+++ b/proxy/src/auth/flow.rs
@@ -1,16 +1,21 @@
 //! Main authentication flow.
 
 use super::{AuthErrorImpl, PasswordHackPayload};
-use crate::{sasl, scram, stream::PqStream};
+use crate::{
+    config::TlsServerEndPoint,
+    sasl, scram,
+    stream::{PqStream, Stream},
+};
 use pq_proto::{BeAuthenticationSaslMessage, BeMessage, BeMessage as Be};
 use std::io;
 use tokio::io::{AsyncRead, AsyncWrite};
+use tracing::info;
 
 /// Every authentication selector is supposed to implement this trait.
 pub trait AuthMethod {
     /// Any authentication selector should provide initial backend message
     /// containing auth method name and parameters, e.g. md5 salt.
-    fn first_message(&self) -> BeMessage<'_>;
+    fn first_message(&self, channel_binding: bool) -> BeMessage<'_>;
 }
 
 /// Initial state of [`AuthFlow`].
@@ -21,8 +26,14 @@ pub struct Scram<'a>(pub &'a scram::ServerSecret);
 
 impl AuthMethod for Scram<'_> {
     #[inline(always)]
-    fn first_message(&self) -> BeMessage<'_> {
-        Be::AuthenticationSasl(BeAuthenticationSaslMessage::Methods(scram::METHODS))
+    fn first_message(&self, channel_binding: bool) -> BeMessage<'_> {
+        if channel_binding {
+            Be::AuthenticationSasl(BeAuthenticationSaslMessage::Methods(scram::METHODS))
+        } else {
+            Be::AuthenticationSasl(BeAuthenticationSaslMessage::Methods(
+                scram::METHODS_WITHOUT_PLUS,
+            ))
+        }
     }
 }
 
@@ -32,7 +43,7 @@ pub struct PasswordHack;
 
 impl AuthMethod for PasswordHack {
     #[inline(always)]
-    fn first_message(&self) -> BeMessage<'_> {
+    fn first_message(&self, _channel_binding: bool) -> BeMessage<'_> {
         Be::AuthenticationCleartextPassword
     }
 }
@@ -43,37 +54,44 @@ pub struct CleartextPassword;
 
 impl AuthMethod for CleartextPassword {
     #[inline(always)]
-    fn first_message(&self) -> BeMessage<'_> {
+    fn first_message(&self, _channel_binding: bool) -> BeMessage<'_> {
         Be::AuthenticationCleartextPassword
     }
 }
 
 /// This wrapper for [`PqStream`] performs client authentication.
 #[must_use]
-pub struct AuthFlow<'a, Stream, State> {
+pub struct AuthFlow<'a, S, State> {
     /// The underlying stream which implements libpq's protocol.
-    stream: &'a mut PqStream<Stream>,
+    stream: &'a mut PqStream<Stream<S>>,
     /// State might contain ancillary data (see [`Self::begin`]).
     state: State,
+    tls_server_end_point: TlsServerEndPoint,
 }
 
 /// Initial state of the stream wrapper.
-impl<'a, S: AsyncWrite + Unpin> AuthFlow<'a, S, Begin> {
+impl<'a, S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'a, S, Begin> {
     /// Create a new wrapper for client authentication.
-    pub fn new(stream: &'a mut PqStream<S>) -> Self {
+    pub fn new(stream: &'a mut PqStream<Stream<S>>) -> Self {
+        let tls_server_end_point = stream.get_ref().tls_server_end_point();
+
         Self {
             stream,
             state: Begin,
+            tls_server_end_point,
         }
     }
 
     /// Move to the next step by sending auth method's name & params to client.
     pub async fn begin<M: AuthMethod>(self, method: M) -> io::Result<AuthFlow<'a, S, M>> {
-        self.stream.write_message(&method.first_message()).await?;
+        self.stream
+            .write_message(&method.first_message(self.tls_server_end_point.supported()))
+            .await?;
 
         Ok(AuthFlow {
             stream: self.stream,
             state: method,
+            tls_server_end_point: self.tls_server_end_point,
         })
     }
 }
@@ -123,9 +141,15 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, Scram<'_>> {
             return Err(super::AuthError::bad_auth_method(sasl.method));
         }
 
+        info!("client chooses {}", sasl.method);
+
         let secret = self.state.0;
         let outcome = sasl::SaslStream::new(self.stream, sasl.message)
-            .authenticate(scram::Exchange::new(secret, rand::random, None))
+            .authenticate(scram::Exchange::new(
+                secret,
+                rand::random,
+                self.tls_server_end_point,
+            ))
             .await?;
 
         Ok(outcome)
diff --git a/proxy/src/bin/pg_sni_router.rs b/proxy/src/bin/pg_sni_router.rs
index 42aecdb6fe89..bedbdbcc8358 100644
--- a/proxy/src/bin/pg_sni_router.rs
+++ b/proxy/src/bin/pg_sni_router.rs
@@ -6,6 +6,8 @@
 use std::{net::SocketAddr, sync::Arc};
 
 use futures::future::Either;
+use itertools::Itertools;
+use proxy::config::TlsServerEndPoint;
 use tokio::net::TcpListener;
 
 use anyhow::{anyhow, bail, ensure, Context};
@@ -65,7 +67,7 @@ async fn main() -> anyhow::Result<()> {
     let destination: String = args.get_one::<String>("dest").unwrap().parse()?;
 
     // Configure TLS
-    let tls_config: Arc<rustls::ServerConfig> = match (
+    let (tls_config, tls_server_end_point): (Arc<rustls::ServerConfig>, TlsServerEndPoint) = match (
         args.get_one::<String>("tls-key"),
         args.get_one::<String>("tls-cert"),
     ) {
@@ -89,16 +91,22 @@ async fn main() -> anyhow::Result<()> {
                     ))?
                     .into_iter()
                     .map(rustls::Certificate)
-                    .collect()
+                    .collect_vec()
             };
 
-            rustls::ServerConfig::builder()
+            // needed for channel bindings
+            let first_cert = cert_chain.first().context("missing certificate")?;
+            let tls_server_end_point = TlsServerEndPoint::new(first_cert)?;
+
+            let tls_config = rustls::ServerConfig::builder()
                 .with_safe_default_cipher_suites()
                 .with_safe_default_kx_groups()
                 .with_protocol_versions(&[&rustls::version::TLS13, &rustls::version::TLS12])?
                 .with_no_client_auth()
                 .with_single_cert(cert_chain, key)?
-                .into()
+                .into();
+
+            (tls_config, tls_server_end_point)
         }
         _ => bail!("tls-key and tls-cert must be specified"),
     };
@@ -113,6 +121,7 @@ async fn main() -> anyhow::Result<()> {
     let main = tokio::spawn(task_main(
         Arc::new(destination),
         tls_config,
+        tls_server_end_point,
         proxy_listener,
         cancellation_token.clone(),
     ));
@@ -134,6 +143,7 @@ async fn main() -> anyhow::Result<()> {
 async fn task_main(
     dest_suffix: Arc<String>,
     tls_config: Arc<rustls::ServerConfig>,
+    tls_server_end_point: TlsServerEndPoint,
     listener: tokio::net::TcpListener,
     cancellation_token: CancellationToken,
 ) -> anyhow::Result<()> {
@@ -159,7 +169,7 @@ async fn task_main(
                             .context("failed to set socket option")?;
 
                         info!(%peer_addr, "serving");
-                        handle_client(dest_suffix, tls_config, socket).await
+                        handle_client(dest_suffix, tls_config, tls_server_end_point, socket).await
                     }
                     .unwrap_or_else(|e| {
                         // Acknowledge that the task has finished with an error.
@@ -207,6 +217,7 @@ const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmod
 async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
     raw_stream: S,
     tls_config: Arc<rustls::ServerConfig>,
+    tls_server_end_point: TlsServerEndPoint,
 ) -> anyhow::Result<Stream<S>> {
     let mut stream = PqStream::new(Stream::from_raw(raw_stream));
 
@@ -231,7 +242,11 @@ async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
             if !read_buf.is_empty() {
                 bail!("data is sent before server replied with EncryptionResponse");
             }
-            Ok(raw.upgrade(tls_config).await?)
+
+            Ok(Stream::Tls {
+                tls: Box::new(raw.upgrade(tls_config).await?),
+                tls_server_end_point,
+            })
         }
         unexpected => {
             info!(
@@ -246,9 +261,10 @@ async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
 async fn handle_client(
     dest_suffix: Arc<String>,
     tls_config: Arc<rustls::ServerConfig>,
+    tls_server_end_point: TlsServerEndPoint,
     stream: impl AsyncRead + AsyncWrite + Unpin,
 ) -> anyhow::Result<()> {
-    let tls_stream = ssl_handshake(stream, tls_config).await?;
+    let tls_stream = ssl_handshake(stream, tls_config, tls_server_end_point).await?;
 
     // Cut off first part of the SNI domain
     // We receive required destination details in the format of
@@ -268,5 +284,5 @@ async fn handle_client(
     let client = tokio::net::TcpStream::connect(destination).await?;
 
     let metrics_aux: MetricsAuxInfo = Default::default();
-    proxy::proxy::proxy_pass(tls_stream, client, &metrics_aux).await
+    proxy::proxy::proxy_pass(tls_stream, client, metrics_aux).await
 }
diff --git a/proxy/src/bin/proxy.rs b/proxy/src/bin/proxy.rs
index 570cf0943a0e..7457e268679d 100644
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -1,8 +1,11 @@
 use futures::future::Either;
 use proxy::auth;
 use proxy::config::AuthenticationConfig;
+use proxy::config::CacheOptions;
 use proxy::config::HttpConfig;
 use proxy::console;
+use proxy::console::provider::AllowedIpsCache;
+use proxy::console::provider::NodeInfoCache;
 use proxy::http;
 use proxy::rate_limiter::RateLimiterConfig;
 use proxy::usage_metrics;
@@ -90,6 +93,9 @@ struct ProxyCliArgs {
     /// timeout for http connections
     #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
     sql_over_http_timeout: tokio::time::Duration,
+    /// Whether the SQL over http pool is opt-in
+    #[clap(long, default_value_t = true, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
+    sql_over_http_pool_opt_in: bool,
     /// timeout for scram authentication protocol
     #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
     scram_protocol_timeout: tokio::time::Duration,
@@ -110,6 +116,12 @@ struct ProxyCliArgs {
     initial_limit: usize,
     #[clap(flatten)]
     aimd_config: proxy::rate_limiter::AimdConfig,
+    /// cache for `allowed_ips` (use `size=0` to disable)
+    #[clap(long, default_value = config::CacheOptions::DEFAULT_OPTIONS_NODE_INFO)]
+    allowed_ips_cache: String,
+    /// disable ip check for http requests. If it is too time consuming, it could be turned off.
+    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
+    disable_ip_check_for_http: bool,
 }
 
 #[tokio::main]
@@ -238,11 +250,24 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
 
     let auth_backend = match &args.auth_backend {
         AuthBackend::Console => {
-            let config::CacheOptions { size, ttl } = args.wake_compute_cache.parse()?;
+            let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;
+            let allowed_ips_cache_config: CacheOptions = args.allowed_ips_cache.parse()?;
 
-            info!("Using NodeInfoCache (wake_compute) with size={size} ttl={ttl:?}");
+            info!("Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}");
+            info!("Using AllowedIpsCache (wake_compute) with options={allowed_ips_cache_config:?}");
             let caches = Box::leak(Box::new(console::caches::ApiCaches {
-                node_info: console::caches::NodeInfoCache::new("node_info_cache", size, ttl),
+                node_info: NodeInfoCache::new(
+                    "node_info_cache",
+                    wake_compute_cache_config.size,
+                    wake_compute_cache_config.ttl,
+                    true,
+                ),
+                allowed_ips: AllowedIpsCache::new(
+                    "allowed_ips_cache",
+                    allowed_ips_cache_config.size,
+                    allowed_ips_cache_config.ttl,
+                    false,
+                ),
             }));
 
             let config::WakeComputeLockOptions {
@@ -275,7 +300,8 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
         }
     };
     let http_config = HttpConfig {
-        sql_over_http_timeout: args.sql_over_http_timeout,
+        timeout: args.sql_over_http_timeout,
+        pool_opt_in: args.sql_over_http_pool_opt_in,
     };
     let authentication_config = AuthenticationConfig {
         scram_protocol_timeout: args.scram_protocol_timeout,
@@ -288,6 +314,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
         http_config,
         authentication_config,
         require_client_ip: args.require_client_ip,
+        disable_ip_check_for_http: args.disable_ip_check_for_http,
     }));
 
     Ok(config)
diff --git a/proxy/src/cache.rs b/proxy/src/cache.rs
index a9d6793bbda9..f54f360b0195 100644
--- a/proxy/src/cache.rs
+++ b/proxy/src/cache.rs
@@ -55,7 +55,7 @@ pub mod timed_lru {
     /// * Whenever a new entry is inserted, the least recently accessed one is evicted.
     ///   The cache also keeps track of entry's insertion time (`created_at`) and TTL (`expires_at`).
     ///
-    /// * When the entry is about to be retrieved, we check its expiration timestamp.
+    /// * If `update_ttl_on_retrieval` is `true`. When the entry is about to be retrieved, we check its expiration timestamp.
     ///   If the entry has expired, we remove it from the cache; Otherwise we bump the
     ///   expiration timestamp (e.g. +5mins) and change its place in LRU list to prolong
     ///   its existence.
@@ -79,6 +79,8 @@ pub mod timed_lru {
 
         /// Default time-to-live of a single entry.
         ttl: Duration,
+
+        update_ttl_on_retrieval: bool,
     }
 
     impl<K: Hash + Eq, V> Cache for TimedLru<K, V> {
@@ -99,11 +101,17 @@ pub mod timed_lru {
 
     impl<K: Hash + Eq, V> TimedLru<K, V> {
         /// Construct a new LRU cache with timed entries.
-        pub fn new(name: &'static str, capacity: usize, ttl: Duration) -> Self {
+        pub fn new(
+            name: &'static str,
+            capacity: usize,
+            ttl: Duration,
+            update_ttl_on_retrieval: bool,
+        ) -> Self {
             Self {
                 name,
                 cache: LruCache::new(capacity).into(),
                 ttl,
+                update_ttl_on_retrieval,
             }
         }
 
@@ -165,7 +173,9 @@ pub mod timed_lru {
             let (created_at, expires_at) = (entry.created_at, entry.expires_at);
 
             // Update the deadline and the entry's position in the LRU list.
-            raw_entry.get_mut().expires_at = deadline;
+            if self.update_ttl_on_retrieval {
+                raw_entry.get_mut().expires_at = deadline;
+            }
             raw_entry.to_back();
 
             drop(cache); // drop lock before logging
diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs
index 0741ad06230f..c838c8fc3814 100644
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -1,9 +1,6 @@
 use crate::{
-    auth::parse_endpoint_param,
-    cancellation::CancelClosure,
-    console::errors::WakeComputeError,
-    error::{io_error, UserFacingError},
-    proxy::is_neon_param,
+    auth::parse_endpoint_param, cancellation::CancelClosure, console::errors::WakeComputeError,
+    error::UserFacingError, proxy::is_neon_param,
 };
 use futures::{FutureExt, TryFutureExt};
 use itertools::Itertools;
@@ -28,12 +25,9 @@ pub enum ConnectionError {
 
     #[error("{COULD_NOT_CONNECT}: {0}")]
     TlsError(#[from] native_tls::Error),
-}
 
-impl From<WakeComputeError> for ConnectionError {
-    fn from(value: WakeComputeError) -> Self {
-        io_error(value).into()
-    }
+    #[error("{COULD_NOT_CONNECT}: {0}")]
+    WakeComputeError(#[from] WakeComputeError),
 }
 
 impl UserFacingError for ConnectionError {
@@ -46,6 +40,7 @@ impl UserFacingError for ConnectionError {
                 Some(err) => err.message().to_owned(),
                 None => err.to_string(),
             },
+            WakeComputeError(err) => err.to_string_client(),
             _ => COULD_NOT_CONNECT.to_owned(),
         }
     }
diff --git a/proxy/src/config.rs b/proxy/src/config.rs
index bd00123905eb..182d71f9be0b 100644
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -1,12 +1,15 @@
 use crate::auth;
 use anyhow::{bail, ensure, Context, Ok};
-use rustls::sign;
+use rustls::{sign, Certificate, PrivateKey};
+use sha2::{Digest, Sha256};
 use std::{
     collections::{HashMap, HashSet},
     str::FromStr,
     sync::Arc,
     time::Duration,
 };
+use tracing::{error, info};
+use x509_parser::oid_registry;
 
 pub struct ProxyConfig {
     pub tls_config: Option<TlsConfig>,
@@ -16,6 +19,7 @@ pub struct ProxyConfig {
     pub http_config: HttpConfig,
     pub authentication_config: AuthenticationConfig,
     pub require_client_ip: bool,
+    pub disable_ip_check_for_http: bool,
 }
 
 #[derive(Debug)]
@@ -27,10 +31,12 @@ pub struct MetricCollectionConfig {
 pub struct TlsConfig {
     pub config: Arc<rustls::ServerConfig>,
     pub common_names: Option<HashSet<String>>,
+    pub cert_resolver: Arc<CertResolver>,
 }
 
 pub struct HttpConfig {
-    pub sql_over_http_timeout: tokio::time::Duration,
+    pub timeout: tokio::time::Duration,
+    pub pool_opt_in: bool,
 }
 
 pub struct AuthenticationConfig {
@@ -52,7 +58,7 @@ pub fn configure_tls(
     let mut cert_resolver = CertResolver::new();
 
     // add default certificate
-    cert_resolver.add_cert(key_path, cert_path, true)?;
+    cert_resolver.add_cert_path(key_path, cert_path, true)?;
 
     // add extra certificates
     if let Some(certs_dir) = certs_dir {
@@ -64,7 +70,7 @@ pub fn configure_tls(
                 let key_path = path.join("tls.key");
                 let cert_path = path.join("tls.crt");
                 if key_path.exists() && cert_path.exists() {
-                    cert_resolver.add_cert(
+                    cert_resolver.add_cert_path(
                         &key_path.to_string_lossy(),
                         &cert_path.to_string_lossy(),
                         false,
@@ -76,35 +82,97 @@ pub fn configure_tls(
 
     let common_names = cert_resolver.get_common_names();
 
+    let cert_resolver = Arc::new(cert_resolver);
+
     let config = rustls::ServerConfig::builder()
         .with_safe_default_cipher_suites()
         .with_safe_default_kx_groups()
         // allow TLS 1.2 to be compatible with older client libraries
         .with_protocol_versions(&[&rustls::version::TLS13, &rustls::version::TLS12])?
         .with_no_client_auth()
-        .with_cert_resolver(Arc::new(cert_resolver))
+        .with_cert_resolver(cert_resolver.clone())
         .into();
 
     Ok(TlsConfig {
         config,
         common_names: Some(common_names),
+        cert_resolver,
     })
 }
 
-struct CertResolver {
-    certs: HashMap<String, Arc<rustls::sign::CertifiedKey>>,
-    default: Option<Arc<rustls::sign::CertifiedKey>>,
+/// Channel binding parameter
+///
+/// <https://www.rfc-editor.org/rfc/rfc5929#section-4>
+/// Description: The hash of the TLS server's certificate as it
+/// appears, octet for octet, in the server's Certificate message.  Note
+/// that the Certificate message contains a certificate_list, in which
+/// the first element is the server's certificate.
+///
+/// The hash function is to be selected as follows:
+///
+/// * if the certificate's signatureAlgorithm uses a single hash
+///   function, and that hash function is either MD5 or SHA-1, then use SHA-256;
+///
+/// * if the certificate's signatureAlgorithm uses a single hash
+///   function and that hash function neither MD5 nor SHA-1, then use
+///   the hash function associated with the certificate's
+///   signatureAlgorithm;
+///
+/// * if the certificate's signatureAlgorithm uses no hash functions or
+///   uses multiple hash functions, then this channel binding type's
+///   channel bindings are undefined at this time (updates to is channel
+///   binding type may occur to address this issue if it ever arises).
+#[derive(Debug, Clone, Copy)]
+pub enum TlsServerEndPoint {
+    Sha256([u8; 32]),
+    Undefined,
 }
 
-impl CertResolver {
-    fn new() -> Self {
-        Self {
-            certs: HashMap::new(),
-            default: None,
+impl TlsServerEndPoint {
+    pub fn new(cert: &Certificate) -> anyhow::Result<Self> {
+        let sha256_oids = [
+            // I'm explicitly not adding MD5 or SHA1 here... They're bad.
+            oid_registry::OID_SIG_ECDSA_WITH_SHA256,
+            oid_registry::OID_PKCS1_SHA256WITHRSA,
+        ];
+
+        let pem = x509_parser::parse_x509_certificate(&cert.0)
+            .context("Failed to parse PEM object from cerficiate")?
+            .1;
+
+        info!(subject = %pem.subject, "parsing TLS certificate");
+
+        let reg = oid_registry::OidRegistry::default().with_all_crypto();
+        let oid = pem.signature_algorithm.oid();
+        let alg = reg.get(oid);
+        if sha256_oids.contains(oid) {
+            let tls_server_end_point: [u8; 32] =
+                Sha256::new().chain_update(&cert.0).finalize().into();
+            info!(subject = %pem.subject, signature_algorithm = alg.map(|a| a.description()), tls_server_end_point = %base64::encode(tls_server_end_point), "determined channel binding");
+            Ok(Self::Sha256(tls_server_end_point))
+        } else {
+            error!(subject = %pem.subject, signature_algorithm = alg.map(|a| a.description()), "unknown channel binding");
+            Ok(Self::Undefined)
         }
     }
 
-    fn add_cert(
+    pub fn supported(&self) -> bool {
+        !matches!(self, TlsServerEndPoint::Undefined)
+    }
+}
+
+#[derive(Default)]
+pub struct CertResolver {
+    certs: HashMap<String, (Arc<rustls::sign::CertifiedKey>, TlsServerEndPoint)>,
+    default: Option<(Arc<rustls::sign::CertifiedKey>, TlsServerEndPoint)>,
+}
+
+impl CertResolver {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    fn add_cert_path(
         &mut self,
         key_path: &str,
         cert_path: &str,
@@ -120,57 +188,65 @@ impl CertResolver {
             keys.pop().map(rustls::PrivateKey).unwrap()
         };
 
-        let key = sign::any_supported_type(&priv_key).context("invalid private key")?;
-
         let cert_chain_bytes = std::fs::read(cert_path)
             .context(format!("Failed to read TLS cert file at '{cert_path}.'"))?;
 
         let cert_chain = {
             rustls_pemfile::certs(&mut &cert_chain_bytes[..])
-                .context(format!(
+                .with_context(|| {
+                    format!(
                     "Failed to read TLS certificate chain from bytes from file at '{cert_path}'."
-                ))?
+                )
+                })?
                 .into_iter()
                 .map(rustls::Certificate)
                 .collect()
         };
 
-        let common_name = {
-            let pem = x509_parser::pem::parse_x509_pem(&cert_chain_bytes)
-                .context(format!(
-                    "Failed to parse PEM object from bytes from file at '{cert_path}'."
-                ))?
-                .1;
-            let common_name = pem.parse_x509()?.subject().to_string();
-
-            // We only use non-wildcard certificates in link proxy so it seems okay to treat them the same as
-            // wildcard ones as we don't use SNI there. That treatment only affects certificate selection, so
-            // verify-full will still check wildcard match. Old coding here just ignored non-wildcard common names
-            // and passed None instead, which blows up number of cases downstream code should handle. Proper coding
-            // here should better avoid Option for common_names, and do wildcard-based certificate selection instead
-            // of cutting off '*.' parts.
-            if common_name.starts_with("CN=*.") {
-                common_name.strip_prefix("CN=*.").map(|s| s.to_string())
-            } else {
-                common_name.strip_prefix("CN=").map(|s| s.to_string())
-            }
+        self.add_cert(priv_key, cert_chain, is_default)
+    }
+
+    pub fn add_cert(
+        &mut self,
+        priv_key: PrivateKey,
+        cert_chain: Vec<Certificate>,
+        is_default: bool,
+    ) -> anyhow::Result<()> {
+        let key = sign::any_supported_type(&priv_key).context("invalid private key")?;
+
+        let first_cert = &cert_chain[0];
+        let tls_server_end_point = TlsServerEndPoint::new(first_cert)?;
+        let pem = x509_parser::parse_x509_certificate(&first_cert.0)
+            .context("Failed to parse PEM object from cerficiate")?
+            .1;
+
+        let common_name = pem.subject().to_string();
+
+        // We only use non-wildcard certificates in link proxy so it seems okay to treat them the same as
+        // wildcard ones as we don't use SNI there. That treatment only affects certificate selection, so
+        // verify-full will still check wildcard match. Old coding here just ignored non-wildcard common names
+        // and passed None instead, which blows up number of cases downstream code should handle. Proper coding
+        // here should better avoid Option for common_names, and do wildcard-based certificate selection instead
+        // of cutting off '*.' parts.
+        let common_name = if common_name.starts_with("CN=*.") {
+            common_name.strip_prefix("CN=*.").map(|s| s.to_string())
+        } else {
+            common_name.strip_prefix("CN=").map(|s| s.to_string())
         }
-        .context(format!(
-            "Failed to parse common name from certificate at '{cert_path}'."
-        ))?;
+        .context("Failed to parse common name from certificate")?;
 
         let cert = Arc::new(rustls::sign::CertifiedKey::new(cert_chain, key));
 
         if is_default {
-            self.default = Some(cert.clone());
+            self.default = Some((cert.clone(), tls_server_end_point));
         }
 
-        self.certs.insert(common_name, cert);
+        self.certs.insert(common_name, (cert, tls_server_end_point));
 
         Ok(())
     }
 
-    fn get_common_names(&self) -> HashSet<String> {
+    pub fn get_common_names(&self) -> HashSet<String> {
         self.certs.keys().map(|s| s.to_string()).collect()
     }
 }
@@ -178,15 +254,24 @@ impl CertResolver {
 impl rustls::server::ResolvesServerCert for CertResolver {
     fn resolve(
         &self,
-        _client_hello: rustls::server::ClientHello,
+        client_hello: rustls::server::ClientHello,
     ) -> Option<Arc<rustls::sign::CertifiedKey>> {
+        self.resolve(client_hello.server_name()).map(|x| x.0)
+    }
+}
+
+impl CertResolver {
+    pub fn resolve(
+        &self,
+        server_name: Option<&str>,
+    ) -> Option<(Arc<rustls::sign::CertifiedKey>, TlsServerEndPoint)> {
         // loop here and cut off more and more subdomains until we find
         // a match to get a proper wildcard support. OTOH, we now do not
         // use nested domains, so keep this simple for now.
         //
         // With the current coding foo.com will match *.foo.com and that
         // repeats behavior of the old code.
-        if let Some(mut sni_name) = _client_hello.server_name() {
+        if let Some(mut sni_name) = server_name {
             loop {
                 if let Some(cert) = self.certs.get(sni_name) {
                     return Some(cert.clone());
@@ -214,6 +299,7 @@ impl rustls::server::ResolvesServerCert for CertResolver {
 }
 
 /// Helper for cmdline cache options parsing.
+#[derive(Debug)]
 pub struct CacheOptions {
     /// Max number of entries.
     pub size: usize,
diff --git a/proxy/src/console.rs b/proxy/src/console.rs
index 6da627389e9f..07bc807950cc 100644
--- a/proxy/src/console.rs
+++ b/proxy/src/console.rs
@@ -6,7 +6,7 @@ pub mod messages;
 
 /// Wrappers for console APIs and their mocks.
 pub mod provider;
-pub use provider::{errors, Api, AuthInfo, CachedNodeInfo, ConsoleReqExtra, NodeInfo};
+pub use provider::{errors, Api, AuthSecret, CachedNodeInfo, ConsoleReqExtra, NodeInfo};
 
 /// Various cache-related types.
 pub mod caches {
diff --git a/proxy/src/console/messages.rs b/proxy/src/console/messages.rs
index e5f1615b149e..837379b21ff4 100644
--- a/proxy/src/console/messages.rs
+++ b/proxy/src/console/messages.rs
@@ -1,4 +1,5 @@
 use serde::Deserialize;
+use smol_str::SmolStr;
 use std::fmt;
 
 /// Generic error response with human-readable description.
@@ -88,11 +89,11 @@ impl fmt::Debug for DatabaseInfo {
 
 /// Various labels for prometheus metrics.
 /// Also known as `ProxyMetricsAuxInfo` in the console.
-#[derive(Debug, Deserialize, Default)]
+#[derive(Debug, Deserialize, Clone, Default)]
 pub struct MetricsAuxInfo {
-    pub endpoint_id: Box<str>,
-    pub project_id: Box<str>,
-    pub branch_id: Box<str>,
+    pub endpoint_id: SmolStr,
+    pub project_id: SmolStr,
+    pub branch_id: SmolStr,
 }
 
 impl MetricsAuxInfo {
diff --git a/proxy/src/console/provider.rs b/proxy/src/console/provider.rs
index 54bcd1f081c2..e735b9f66c21 100644
--- a/proxy/src/console/provider.rs
+++ b/proxy/src/console/provider.rs
@@ -204,7 +204,7 @@ pub struct ConsoleReqExtra<'a> {
 }
 
 /// Auth secret which is managed by the cloud.
-pub enum AuthInfo {
+pub enum AuthSecret {
     /// Md5 hash of user's password.
     Md5([u8; 16]),
 
@@ -212,6 +212,13 @@ pub enum AuthInfo {
     Scram(scram::ServerSecret),
 }
 
+#[derive(Default)]
+pub struct AuthInfo {
+    pub secret: Option<AuthSecret>,
+    /// List of IP addresses allowed for the autorization.
+    pub allowed_ips: Vec<String>,
+}
+
 /// Info for establishing a connection to a compute node.
 /// This is what we get after auth succeeded, but not before!
 #[derive(Clone)]
@@ -222,7 +229,7 @@ pub struct NodeInfo {
     pub config: compute::ConnCfg,
 
     /// Labels for proxy's metrics.
-    pub aux: Arc<MetricsAuxInfo>,
+    pub aux: MetricsAuxInfo,
 
     /// Whether we should accept self-signed certificates (for testing)
     pub allow_self_signed_compute: bool,
@@ -230,6 +237,7 @@ pub struct NodeInfo {
 
 pub type NodeInfoCache = TimedLru<Arc<str>, NodeInfo>;
 pub type CachedNodeInfo = timed_lru::Cached<&'static NodeInfoCache>;
+pub type AllowedIpsCache = TimedLru<Arc<str>, Arc<Vec<String>>>;
 
 /// This will allocate per each call, but the http requests alone
 /// already require a few allocations, so it should be fine.
@@ -240,7 +248,13 @@ pub trait Api {
         &self,
         extra: &ConsoleReqExtra<'_>,
         creds: &ClientCredentials,
-    ) -> Result<Option<AuthInfo>, errors::GetAuthInfoError>;
+    ) -> Result<AuthInfo, errors::GetAuthInfoError>;
+
+    async fn get_allowed_ips(
+        &self,
+        extra: &ConsoleReqExtra<'_>,
+        creds: &ClientCredentials,
+    ) -> Result<Arc<Vec<String>>, errors::GetAuthInfoError>;
 
     /// Wake up the compute node and return the corresponding connection info.
     async fn wake_compute(
@@ -254,6 +268,8 @@ pub trait Api {
 pub struct ApiCaches {
     /// Cache for the `wake_compute` API method.
     pub node_info: NodeInfoCache,
+    /// Cache for the `get_allowed_ips`. TODO(anna): use notifications listener instead.
+    pub allowed_ips: TimedLru<Arc<str>, Arc<Vec<String>>>,
 }
 
 /// Various caches for [`console`](super).
diff --git a/proxy/src/console/provider/mock.rs b/proxy/src/console/provider/mock.rs
index 750a2d141ea3..4cc68f0ac143 100644
--- a/proxy/src/console/provider/mock.rs
+++ b/proxy/src/console/provider/mock.rs
@@ -1,14 +1,16 @@
 //! Mock console backend which relies on a user-provided postgres instance.
 
+use std::sync::Arc;
+
 use super::{
     errors::{ApiError, GetAuthInfoError, WakeComputeError},
-    AuthInfo, CachedNodeInfo, ConsoleReqExtra, NodeInfo,
+    AuthInfo, AuthSecret, CachedNodeInfo, ConsoleReqExtra, NodeInfo,
 };
 use crate::{auth::ClientCredentials, compute, error::io_error, scram, url::ApiUrl};
 use async_trait::async_trait;
 use futures::TryFutureExt;
 use thiserror::Error;
-use tokio_postgres::config::SslMode;
+use tokio_postgres::{config::SslMode, Client};
 use tracing::{error, info, info_span, warn, Instrument};
 
 #[derive(Debug, Error)]
@@ -46,8 +48,8 @@ impl Api {
     async fn do_get_auth_info(
         &self,
         creds: &ClientCredentials<'_>,
-    ) -> Result<Option<AuthInfo>, GetAuthInfoError> {
-        async {
+    ) -> Result<AuthInfo, GetAuthInfoError> {
+        let (secret, allowed_ips) = async {
             // Perhaps we could persist this connection, but then we'd have to
             // write more code for reopening it if it got closed, which doesn't
             // seem worth it.
@@ -55,32 +57,48 @@ impl Api {
                 tokio_postgres::connect(self.endpoint.as_str(), tokio_postgres::NoTls).await?;
 
             tokio::spawn(connection);
-            let query = "select rolpassword from pg_catalog.pg_authid where rolname = $1";
-            let rows = client.query(query, &[&creds.user]).await?;
-
-            // We can get at most one row, because `rolname` is unique.
-            let row = match rows.first() {
-                Some(row) => row,
-                // This means that the user doesn't exist, so there can be no secret.
-                // However, this is still a *valid* outcome which is very similar
-                // to getting `404 Not found` from the Neon console.
+            let secret = match get_execute_postgres_query(
+                &client,
+                "select rolpassword from pg_catalog.pg_authid where rolname = $1",
+                &[&creds.user],
+                "rolpassword",
+            )
+            .await?
+            {
+                Some(entry) => {
+                    info!("got a secret: {entry}"); // safe since it's not a prod scenario
+                    let secret = scram::ServerSecret::parse(&entry).map(AuthSecret::Scram);
+                    secret.or_else(|| parse_md5(&entry).map(AuthSecret::Md5))
+                }
                 None => {
                     warn!("user '{}' does not exist", creds.user);
-                    return Ok(None);
+                    None
                 }
             };
+            let allowed_ips = match get_execute_postgres_query(
+                &client,
+                "select allowed_ips from neon_control_plane.endpoints where endpoint_id = $1",
+                &[&creds.project.clone().unwrap_or_default().as_str()],
+                "allowed_ips",
+            )
+            .await?
+            {
+                Some(s) => {
+                    info!("got allowed_ips: {s}");
+                    s.split(',').map(String::from).collect()
+                }
+                None => vec![],
+            };
 
-            let entry = row
-                .try_get("rolpassword")
-                .map_err(MockApiError::PasswordNotSet)?;
-
-            info!("got a secret: {entry}"); // safe since it's not a prod scenario
-            let secret = scram::ServerSecret::parse(entry).map(AuthInfo::Scram);
-            Ok(secret.or_else(|| parse_md5(entry).map(AuthInfo::Md5)))
+            Ok((secret, allowed_ips))
         }
-        .map_err(crate::error::log_error)
+        .map_err(crate::error::log_error::<GetAuthInfoError>)
         .instrument(info_span!("postgres", url = self.endpoint.as_str()))
-        .await
+        .await?;
+        Ok(AuthInfo {
+            secret,
+            allowed_ips,
+        })
     }
 
     async fn do_wake_compute(&self) -> Result<NodeInfo, WakeComputeError> {
@@ -100,6 +118,27 @@ impl Api {
     }
 }
 
+async fn get_execute_postgres_query(
+    client: &Client,
+    query: &str,
+    params: &[&(dyn tokio_postgres::types::ToSql + Sync)],
+    idx: &str,
+) -> Result<Option<String>, GetAuthInfoError> {
+    let rows = client.query(query, params).await?;
+
+    // We can get at most one row, because `rolname` is unique.
+    let row = match rows.first() {
+        Some(row) => row,
+        // This means that the user doesn't exist, so there can be no secret.
+        // However, this is still a *valid* outcome which is very similar
+        // to getting `404 Not found` from the Neon console.
+        None => return Ok(None),
+    };
+
+    let entry = row.try_get(idx).map_err(MockApiError::PasswordNotSet)?;
+    Ok(Some(entry))
+}
+
 #[async_trait]
 impl super::Api for Api {
     #[tracing::instrument(skip_all)]
@@ -107,10 +146,18 @@ impl super::Api for Api {
         &self,
         _extra: &ConsoleReqExtra<'_>,
         creds: &ClientCredentials,
-    ) -> Result<Option<AuthInfo>, GetAuthInfoError> {
+    ) -> Result<AuthInfo, GetAuthInfoError> {
         self.do_get_auth_info(creds).await
     }
 
+    async fn get_allowed_ips(
+        &self,
+        _extra: &ConsoleReqExtra<'_>,
+        creds: &ClientCredentials,
+    ) -> Result<Arc<Vec<String>>, GetAuthInfoError> {
+        Ok(Arc::new(self.do_get_auth_info(creds).await?.allowed_ips))
+    }
+
     #[tracing::instrument(skip_all)]
     async fn wake_compute(
         &self,
diff --git a/proxy/src/console/provider/neon.rs b/proxy/src/console/provider/neon.rs
index 0dc7c7153433..7828a7d7e43b 100644
--- a/proxy/src/console/provider/neon.rs
+++ b/proxy/src/console/provider/neon.rs
@@ -3,11 +3,17 @@
 use super::{
     super::messages::{ConsoleError, GetRoleSecret, WakeCompute},
     errors::{ApiError, GetAuthInfoError, WakeComputeError},
-    ApiCaches, ApiLocks, AuthInfo, CachedNodeInfo, ConsoleReqExtra, NodeInfo,
+    ApiCaches, ApiLocks, AuthInfo, AuthSecret, CachedNodeInfo, ConsoleReqExtra, NodeInfo,
+};
+use crate::{
+    auth::ClientCredentials,
+    compute, http,
+    proxy::{ALLOWED_IPS_BY_CACHE_OUTCOME, ALLOWED_IPS_NUMBER},
+    scram,
 };
-use crate::{auth::ClientCredentials, compute, http, scram};
 use async_trait::async_trait;
 use futures::TryFutureExt;
+use itertools::Itertools;
 use std::{net::SocketAddr, sync::Arc};
 use tokio::time::Instant;
 use tokio_postgres::config::SslMode;
@@ -48,7 +54,7 @@ impl Api {
         &self,
         extra: &ConsoleReqExtra<'_>,
         creds: &ClientCredentials<'_>,
-    ) -> Result<Option<AuthInfo>, GetAuthInfoError> {
+    ) -> Result<AuthInfo, GetAuthInfoError> {
         let request_id = uuid::Uuid::new_v4().to_string();
         async {
             let request = self
@@ -72,16 +78,25 @@ impl Api {
                 Ok(body) => body,
                 // Error 404 is special: it's ok not to have a secret.
                 Err(e) => match e.http_status_code() {
-                    Some(http::StatusCode::NOT_FOUND) => return Ok(None),
+                    Some(http::StatusCode::NOT_FOUND) => return Ok(AuthInfo::default()),
                     _otherwise => return Err(e.into()),
                 },
             };
 
             let secret = scram::ServerSecret::parse(&body.role_secret)
-                .map(AuthInfo::Scram)
+                .map(AuthSecret::Scram)
                 .ok_or(GetAuthInfoError::BadSecret)?;
-
-            Ok(Some(secret))
+            let allowed_ips = body
+                .allowed_ips
+                .into_iter()
+                .flatten()
+                .map(String::from)
+                .collect_vec();
+            ALLOWED_IPS_NUMBER.observe(allowed_ips.len() as f64);
+            Ok(AuthInfo {
+                secret: Some(secret),
+                allowed_ips,
+            })
         }
         .map_err(crate::error::log_error)
         .instrument(info_span!("http", id = request_id))
@@ -129,7 +144,7 @@ impl Api {
 
             let node = NodeInfo {
                 config,
-                aux: body.aux.into(),
+                aux: body.aux,
                 allow_self_signed_compute: false,
             };
 
@@ -148,10 +163,32 @@ impl super::Api for Api {
         &self,
         extra: &ConsoleReqExtra<'_>,
         creds: &ClientCredentials,
-    ) -> Result<Option<AuthInfo>, GetAuthInfoError> {
+    ) -> Result<AuthInfo, GetAuthInfoError> {
         self.do_get_auth_info(extra, creds).await
     }
 
+    async fn get_allowed_ips(
+        &self,
+        extra: &ConsoleReqExtra<'_>,
+        creds: &ClientCredentials,
+    ) -> Result<Arc<Vec<String>>, GetAuthInfoError> {
+        let key: &str = creds.project().expect("impossible");
+        if let Some(allowed_ips) = self.caches.allowed_ips.get(key) {
+            ALLOWED_IPS_BY_CACHE_OUTCOME
+                .with_label_values(&["hit"])
+                .inc();
+            return Ok(Arc::new(allowed_ips.to_vec()));
+        }
+        ALLOWED_IPS_BY_CACHE_OUTCOME
+            .with_label_values(&["miss"])
+            .inc();
+        let allowed_ips = Arc::new(self.do_get_auth_info(extra, creds).await?.allowed_ips);
+        self.caches
+            .allowed_ips
+            .insert(key.into(), allowed_ips.clone());
+        Ok(allowed_ips)
+    }
+
     #[tracing::instrument(skip_all)]
     async fn wake_compute(
         &self,
diff --git a/proxy/src/http.rs b/proxy/src/http.rs
index 159b949da345..09423eca77c9 100644
--- a/proxy/src/http.rs
+++ b/proxy/src/http.rs
@@ -13,7 +13,7 @@ pub use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
 use tokio::time::Instant;
 use tracing::trace;
 
-use crate::{rate_limiter, url::ApiUrl};
+use crate::{proxy::CONSOLE_REQUEST_LATENCY, rate_limiter, url::ApiUrl};
 use reqwest_middleware::RequestBuilder;
 
 /// This is the preferred way to create new http clients,
@@ -90,7 +90,13 @@ impl Endpoint {
 
     /// Execute a [request](reqwest::Request).
     pub async fn execute(&self, request: Request) -> Result<Response, Error> {
-        self.client.execute(request).await
+        let path = request.url().path().to_string();
+        let start = Instant::now();
+        let res = self.client.execute(request).await;
+        CONSOLE_REQUEST_LATENCY
+            .with_label_values(&[&path])
+            .observe(start.elapsed().as_secs_f64());
+        res
     }
 }
 
diff --git a/proxy/src/proxy.rs b/proxy/src/proxy.rs
index adcb1bffaf9e..36d01f9acc36 100644
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -24,7 +24,7 @@ use prometheus::{
     IntGaugeVec,
 };
 use regex::Regex;
-use std::{error::Error, io, ops::ControlFlow, sync::Arc, time::Instant};
+use std::{error::Error, io, net::SocketAddr, ops::ControlFlow, sync::Arc, time::Instant};
 use tokio::{
     io::{AsyncRead, AsyncWrite, AsyncWriteExt},
     time,
@@ -110,12 +110,34 @@ static COMPUTE_CONNECTION_LATENCY: Lazy<HistogramVec> = Lazy::new(|| {
     .unwrap()
 });
 
+pub static CONSOLE_REQUEST_LATENCY: Lazy<HistogramVec> = Lazy::new(|| {
+    register_histogram_vec!(
+        "proxy_console_request_latency",
+        "Time it took for proxy to establish a connection to the compute endpoint",
+        // proxy_wake_compute/proxy_get_role_info
+        &["request"],
+        // largest bucket = 2^16 * 0.2ms = 13s
+        exponential_buckets(0.0002, 2.0, 16).unwrap(),
+    )
+    .unwrap()
+});
+
+pub static ALLOWED_IPS_BY_CACHE_OUTCOME: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "proxy_allowed_ips_cache_misses",
+        "Number of cache hits/misses for allowed ips",
+        // hit/miss
+        &["outcome"],
+    )
+    .unwrap()
+});
+
 pub static RATE_LIMITER_ACQUIRE_LATENCY: Lazy<Histogram> = Lazy::new(|| {
     register_histogram!(
         "semaphore_control_plane_token_acquire_seconds",
         "Time it took for proxy to establish a connection to the compute endpoint",
-        // largest bucket = 2^16 * 0.5ms = 32s
-        exponential_buckets(0.0005, 2.0, 16).unwrap(),
+        // largest bucket = 3^16 * 0.00005ms = 2.15s
+        exponential_buckets(0.00005, 3.0, 16).unwrap(),
     )
     .unwrap()
 });
@@ -138,6 +160,15 @@ pub static NUM_CONNECTION_ACCEPTED_BY_SNI: Lazy<IntCounterVec> = Lazy::new(|| {
     .unwrap()
 });
 
+pub static ALLOWED_IPS_NUMBER: Lazy<Histogram> = Lazy::new(|| {
+    register_histogram!(
+        "proxy_allowed_ips_number",
+        "Number of allowed ips",
+        vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 20.0, 50.0, 100.0],
+    )
+    .unwrap()
+});
+
 pub struct LatencyTimer {
     // time since the stopwatch was started
     start: Option<Instant>,
@@ -265,7 +296,7 @@ pub async fn task_main(
     loop {
         tokio::select! {
             accept_result = listener.accept() => {
-                let (socket, _) = accept_result?;
+                let (socket, peer_addr) = accept_result?;
 
                 let session_id = uuid::Uuid::new_v4();
                 let cancel_map = Arc::clone(&cancel_map);
@@ -274,7 +305,9 @@ pub async fn task_main(
                         info!("accepted postgres client connection");
 
                         let mut socket = WithClientIp::new(socket);
+                        let mut peer_addr = peer_addr;
                         if let Some(ip) = socket.wait_for_addr().await? {
+                            peer_addr = ip;
                             tracing::Span::current().record("peer_addr", &tracing::field::display(ip));
                         } else if config.require_client_ip {
                             bail!("missing required client IP");
@@ -285,7 +318,7 @@ pub async fn task_main(
                             .set_nodelay(true)
                             .context("failed to set socket option")?;
 
-                        handle_client(config, &cancel_map, session_id, socket, ClientMode::Tcp).await
+                        handle_client(config, &cancel_map, session_id, socket, ClientMode::Tcp, peer_addr).await
                     }
                     .instrument(info_span!("handle_client", ?session_id, peer_addr = tracing::field::Empty))
                     .unwrap_or_else(move |e| {
@@ -375,6 +408,7 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
     session_id: uuid::Uuid,
     stream: S,
     mode: ClientMode,
+    peer_addr: SocketAddr,
 ) -> anyhow::Result<()> {
     info!(
         protocol = mode.protocol_label(),
@@ -408,7 +442,7 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
         let result = config
             .auth_backend
             .as_ref()
-            .map(|_| auth::ClientCredentials::parse(&params, hostname, common_names))
+            .map(|_| auth::ClientCredentials::parse(&params, hostname, common_names, peer_addr))
             .transpose();
 
         match result {
@@ -470,7 +504,17 @@ async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
                         if !read_buf.is_empty() {
                             bail!("data is sent before server replied with EncryptionResponse");
                         }
-                        stream = PqStream::new(raw.upgrade(tls.to_server_config()).await?);
+                        let tls_stream = raw.upgrade(tls.to_server_config()).await?;
+
+                        let (_, tls_server_end_point) = tls
+                            .cert_resolver
+                            .resolve(tls_stream.get_ref().1.server_name())
+                            .context("missing certificate")?;
+
+                        stream = PqStream::new(Stream::Tls {
+                            tls: Box::new(tls_stream),
+                            tls_server_end_point,
+                        });
                     }
                 }
                 _ => bail!(ERR_PROTO_VIOLATION),
@@ -833,11 +877,11 @@ async fn prepare_client_connection(
 pub async fn proxy_pass(
     client: impl AsyncRead + AsyncWrite + Unpin,
     compute: impl AsyncRead + AsyncWrite + Unpin,
-    aux: &MetricsAuxInfo,
+    aux: MetricsAuxInfo,
 ) -> anyhow::Result<()> {
     let usage = USAGE_METRICS.register(Ids {
-        endpoint_id: aux.endpoint_id.to_string(),
-        branch_id: aux.branch_id.to_string(),
+        endpoint_id: aux.endpoint_id.clone(),
+        branch_id: aux.branch_id.clone(),
     });
 
     let m_sent = NUM_BYTES_PROXIED_COUNTER.with_label_values(&["tx"]);
@@ -875,7 +919,7 @@ pub async fn proxy_pass(
 /// Thin connection context.
 struct Client<'a, S> {
     /// The underlying libpq protocol stream.
-    stream: PqStream<S>,
+    stream: PqStream<Stream<S>>,
     /// Client credentials that we care about.
     creds: auth::BackendType<'a, auth::ClientCredentials<'a>>,
     /// KV-dictionary with PostgreSQL connection params.
@@ -889,7 +933,7 @@ struct Client<'a, S> {
 impl<'a, S> Client<'a, S> {
     /// Construct a new connection context.
     fn new(
-        stream: PqStream<S>,
+        stream: PqStream<Stream<S>>,
         creds: auth::BackendType<'a, auth::ClientCredentials<'a>>,
         params: &'a StartupMessageParams,
         session_id: uuid::Uuid,
@@ -988,7 +1032,7 @@ impl<S: AsyncRead + AsyncWrite + Unpin> Client<'_, S> {
         // immediately after opening the connection.
         let (stream, read_buf) = stream.into_inner();
         node.stream.write_all(&read_buf).await?;
-        proxy_pass(stream, node.stream, &aux).await
+        proxy_pass(stream, node.stream, aux).await
     }
 }
 
diff --git a/proxy/src/proxy/tests.rs b/proxy/src/proxy/tests.rs
index 3ae4df46ef83..b97c0efce47f 100644
--- a/proxy/src/proxy/tests.rs
+++ b/proxy/src/proxy/tests.rs
@@ -1,19 +1,23 @@
 //! A group of high-level tests for connection establishing logic and auth.
-//!
+
+mod mitm;
+
 use super::*;
 use crate::auth::backend::TestBackend;
 use crate::auth::ClientCredentials;
+use crate::config::CertResolver;
 use crate::console::{CachedNodeInfo, NodeInfo};
 use crate::{auth, http, sasl, scram};
 use async_trait::async_trait;
 use rstest::rstest;
 use tokio_postgres::config::SslMode;
 use tokio_postgres::tls::{MakeTlsConnect, NoTls};
-use tokio_postgres_rustls::MakeRustlsConnect;
+use tokio_postgres_rustls::{MakeRustlsConnect, RustlsStream};
 
 /// Generate a set of TLS certificates: CA + server.
 fn generate_certs(
     hostname: &str,
+    common_name: &str,
 ) -> anyhow::Result<(rustls::Certificate, rustls::Certificate, rustls::PrivateKey)> {
     let ca = rcgen::Certificate::from_params({
         let mut params = rcgen::CertificateParams::default();
@@ -21,7 +25,15 @@ fn generate_certs(
         params
     })?;
 
-    let cert = rcgen::generate_simple_self_signed(vec![hostname.into()])?;
+    let cert = rcgen::Certificate::from_params({
+        let mut params = rcgen::CertificateParams::new(vec![hostname.into()]);
+        params.distinguished_name = rcgen::DistinguishedName::new();
+        params
+            .distinguished_name
+            .push(rcgen::DnType::CommonName, common_name);
+        params
+    })?;
+
     Ok((
         rustls::Certificate(ca.serialize_der()?),
         rustls::Certificate(cert.serialize_der_with_signer(&ca)?),
@@ -37,7 +49,14 @@ struct ClientConfig<'a> {
 impl ClientConfig<'_> {
     fn make_tls_connect<S: AsyncRead + AsyncWrite + Unpin + Send + 'static>(
         self,
-    ) -> anyhow::Result<impl tokio_postgres::tls::TlsConnect<S>> {
+    ) -> anyhow::Result<
+        impl tokio_postgres::tls::TlsConnect<
+            S,
+            Error = impl std::fmt::Debug,
+            Future = impl Send,
+            Stream = RustlsStream<S>,
+        >,
+    > {
         let mut mk = MakeRustlsConnect::new(self.config);
         let tls = MakeTlsConnect::<S>::make_tls_connect(&mut mk, self.hostname)?;
         Ok(tls)
@@ -49,20 +68,24 @@ fn generate_tls_config<'a>(
     hostname: &'a str,
     common_name: &'a str,
 ) -> anyhow::Result<(ClientConfig<'a>, TlsConfig)> {
-    let (ca, cert, key) = generate_certs(hostname)?;
+    let (ca, cert, key) = generate_certs(hostname, common_name)?;
 
     let tls_config = {
         let config = rustls::ServerConfig::builder()
             .with_safe_defaults()
             .with_no_client_auth()
-            .with_single_cert(vec![cert], key)?
+            .with_single_cert(vec![cert.clone()], key.clone())?
             .into();
 
-        let common_names = Some([common_name.to_owned()].iter().cloned().collect());
+        let mut cert_resolver = CertResolver::new();
+        cert_resolver.add_cert(key, vec![cert], true)?;
+
+        let common_names = Some(cert_resolver.get_common_names());
 
         TlsConfig {
             config,
             common_names,
+            cert_resolver: Arc::new(cert_resolver),
         }
     };
 
@@ -253,6 +276,7 @@ async fn scram_auth_good(#[case] password: &str) -> anyhow::Result<()> {
     ));
 
     let (_client, _conn) = tokio_postgres::Config::new()
+        .channel_binding(tokio_postgres::config::ChannelBinding::Require)
         .user("user")
         .dbname("db")
         .password(password)
@@ -263,6 +287,30 @@ async fn scram_auth_good(#[case] password: &str) -> anyhow::Result<()> {
     proxy.await?
 }
 
+#[tokio::test]
+async fn scram_auth_disable_channel_binding() -> anyhow::Result<()> {
+    let (client, server) = tokio::io::duplex(1024);
+
+    let (client_config, server_config) =
+        generate_tls_config("generic-project-name.localhost", "localhost")?;
+    let proxy = tokio::spawn(dummy_proxy(
+        client,
+        Some(server_config),
+        Scram::new("password")?,
+    ));
+
+    let (_client, _conn) = tokio_postgres::Config::new()
+        .channel_binding(tokio_postgres::config::ChannelBinding::Disable)
+        .user("user")
+        .dbname("db")
+        .password("password")
+        .ssl_mode(SslMode::Require)
+        .connect_raw(server, client_config.make_tls_connect()?)
+        .await?;
+
+    proxy.await?
+}
+
 #[tokio::test]
 async fn scram_auth_mock() -> anyhow::Result<()> {
     let (client, server) = tokio::io::duplex(1024);
@@ -418,6 +466,10 @@ impl TestBackend for TestConnectMechanism {
             x => panic!("expecting action {:?}, wake_compute is called instead", x),
         }
     }
+
+    fn get_allowed_ips(&self) -> Result<Arc<Vec<String>>, console::errors::GetAuthInfoError> {
+        unimplemented!("not used in tests")
+    }
 }
 
 fn helper_create_cached_node_info() -> CachedNodeInfo {
diff --git a/proxy/src/proxy/tests/mitm.rs b/proxy/src/proxy/tests/mitm.rs
new file mode 100644
index 000000000000..50b3034936e8
--- /dev/null
+++ b/proxy/src/proxy/tests/mitm.rs
@@ -0,0 +1,257 @@
+//! Man-in-the-middle tests
+//!
+//! Channel binding should prevent a proxy server
+//! - that has access to create valid certificates -
+//! from controlling the TLS connection.
+
+use std::fmt::Debug;
+
+use super::*;
+use bytes::{Bytes, BytesMut};
+use futures::{SinkExt, StreamExt};
+use postgres_protocol::message::frontend;
+use tokio::io::{AsyncReadExt, DuplexStream};
+use tokio_postgres::config::SslMode;
+use tokio_postgres::tls::TlsConnect;
+use tokio_util::codec::{Decoder, Encoder};
+
+enum Intercept {
+    None,
+    Methods,
+    SASLResponse,
+}
+
+async fn proxy_mitm(
+    intercept: Intercept,
+) -> (DuplexStream, DuplexStream, ClientConfig<'static>, TlsConfig) {
+    let (end_server1, client1) = tokio::io::duplex(1024);
+    let (server2, end_client2) = tokio::io::duplex(1024);
+
+    let (client_config1, server_config1) =
+        generate_tls_config("generic-project-name.localhost", "localhost").unwrap();
+    let (client_config2, server_config2) =
+        generate_tls_config("generic-project-name.localhost", "localhost").unwrap();
+
+    tokio::spawn(async move {
+        // begin handshake with end_server
+        let end_server = connect_tls(server2, client_config2.make_tls_connect().unwrap()).await;
+        // process handshake with end_client
+        let (end_client, startup) =
+            handshake(client1, Some(&server_config1), &CancelMap::default())
+                .await
+                .unwrap()
+                .unwrap();
+
+        let mut end_server = tokio_util::codec::Framed::new(end_server, PgFrame);
+        let (end_client, buf) = end_client.framed.into_inner();
+        assert!(buf.is_empty());
+        let mut end_client = tokio_util::codec::Framed::new(end_client, PgFrame);
+
+        // give the end_server the startup parameters
+        let mut buf = BytesMut::new();
+        frontend::startup_message(startup.iter(), &mut buf).unwrap();
+        end_server.send(buf.freeze()).await.unwrap();
+
+        // proxy messages between end_client and end_server
+        loop {
+            tokio::select! {
+                message = end_server.next() => {
+                    match message {
+                        Some(Ok(message)) => {
+                            // intercept SASL and return only SCRAM-SHA-256 ;)
+                            if matches!(intercept, Intercept::Methods) && message.starts_with(b"R") && message[5..].starts_with(&[0,0,0,10]) {
+                                end_client.send(Bytes::from_static(b"R\0\0\0\x17\0\0\0\x0aSCRAM-SHA-256\0\0")).await.unwrap();
+                                continue;
+                            }
+                            end_client.send(message).await.unwrap()
+                        }
+                        _ => break,
+                    }
+                }
+                message = end_client.next() => {
+                    match message {
+                        Some(Ok(message)) => {
+                            // intercept SASL response and return SCRAM-SHA-256 with no channel binding ;)
+                            if matches!(intercept, Intercept::SASLResponse) && message.starts_with(b"p") && message[5..].starts_with(b"SCRAM-SHA-256-PLUS\0") {
+                                let sasl_message = &message[1+4+19+4..];
+                                let mut new_message = b"n,,".to_vec();
+                                new_message.extend_from_slice(sasl_message.strip_prefix(b"p=tls-server-end-point,,").unwrap());
+
+                                let mut buf = BytesMut::new();
+                                frontend::sasl_initial_response("SCRAM-SHA-256", &new_message, &mut buf).unwrap();
+
+                                end_server.send(buf.freeze()).await.unwrap();
+                                continue;
+                            }
+                            end_server.send(message).await.unwrap()
+                        }
+                        _ => break,
+                    }
+                }
+                else => { break }
+            }
+        }
+    });
+
+    (end_server1, end_client2, client_config1, server_config2)
+}
+
+/// taken from tokio-postgres
+pub async fn connect_tls<S, T>(mut stream: S, tls: T) -> T::Stream
+where
+    S: AsyncRead + AsyncWrite + Unpin,
+    T: TlsConnect<S>,
+    T::Error: Debug,
+{
+    let mut buf = BytesMut::new();
+    frontend::ssl_request(&mut buf);
+    stream.write_all(&buf).await.unwrap();
+
+    let mut buf = [0];
+    stream.read_exact(&mut buf).await.unwrap();
+
+    if buf[0] != b'S' {
+        panic!("ssl not supported by server");
+    }
+
+    tls.connect(stream).await.unwrap()
+}
+
+struct PgFrame;
+impl Decoder for PgFrame {
+    type Item = Bytes;
+    type Error = io::Error;
+
+    fn decode(&mut self, src: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
+        if src.len() < 5 {
+            src.reserve(5 - src.len());
+            return Ok(None);
+        }
+        let len = u32::from_be_bytes(src[1..5].try_into().unwrap()) as usize + 1;
+        if src.len() < len {
+            src.reserve(len - src.len());
+            return Ok(None);
+        }
+        Ok(Some(src.split_to(len).freeze()))
+    }
+}
+impl Encoder<Bytes> for PgFrame {
+    type Error = io::Error;
+
+    fn encode(&mut self, item: Bytes, dst: &mut BytesMut) -> Result<(), Self::Error> {
+        dst.extend_from_slice(&item);
+        Ok(())
+    }
+}
+
+/// If the client doesn't support channel bindings, it can be exploited.
+#[tokio::test]
+async fn scram_auth_disable_channel_binding() -> anyhow::Result<()> {
+    let (server, client, client_config, server_config) = proxy_mitm(Intercept::None).await;
+    let proxy = tokio::spawn(dummy_proxy(
+        client,
+        Some(server_config),
+        Scram::new("password")?,
+    ));
+
+    let _client_err = tokio_postgres::Config::new()
+        .channel_binding(tokio_postgres::config::ChannelBinding::Disable)
+        .user("user")
+        .dbname("db")
+        .password("password")
+        .ssl_mode(SslMode::Require)
+        .connect_raw(server, client_config.make_tls_connect()?)
+        .await?;
+
+    proxy.await?
+}
+
+/// If the client chooses SCRAM-PLUS, it will fail
+#[tokio::test]
+async fn scram_auth_prefer_channel_binding() -> anyhow::Result<()> {
+    connect_failure(
+        Intercept::None,
+        tokio_postgres::config::ChannelBinding::Prefer,
+    )
+    .await
+}
+
+/// If the MITM pretends like SCRAM-PLUS isn't available, but the client supports it, it will fail
+#[tokio::test]
+async fn scram_auth_prefer_channel_binding_intercept() -> anyhow::Result<()> {
+    connect_failure(
+        Intercept::Methods,
+        tokio_postgres::config::ChannelBinding::Prefer,
+    )
+    .await
+}
+
+/// If the MITM pretends like the client doesn't support channel bindings, it will fail
+#[tokio::test]
+async fn scram_auth_prefer_channel_binding_intercept_response() -> anyhow::Result<()> {
+    connect_failure(
+        Intercept::SASLResponse,
+        tokio_postgres::config::ChannelBinding::Prefer,
+    )
+    .await
+}
+
+/// If the client chooses SCRAM-PLUS, it will fail
+#[tokio::test]
+async fn scram_auth_require_channel_binding() -> anyhow::Result<()> {
+    connect_failure(
+        Intercept::None,
+        tokio_postgres::config::ChannelBinding::Require,
+    )
+    .await
+}
+
+/// If the client requires SCRAM-PLUS, and it is spoofed to remove SCRAM-PLUS, it will fail
+#[tokio::test]
+async fn scram_auth_require_channel_binding_intercept() -> anyhow::Result<()> {
+    connect_failure(
+        Intercept::Methods,
+        tokio_postgres::config::ChannelBinding::Require,
+    )
+    .await
+}
+
+/// If the client requires SCRAM-PLUS, and it is spoofed to remove SCRAM-PLUS, it will fail
+#[tokio::test]
+async fn scram_auth_require_channel_binding_intercept_response() -> anyhow::Result<()> {
+    connect_failure(
+        Intercept::SASLResponse,
+        tokio_postgres::config::ChannelBinding::Require,
+    )
+    .await
+}
+
+async fn connect_failure(
+    intercept: Intercept,
+    channel_binding: tokio_postgres::config::ChannelBinding,
+) -> anyhow::Result<()> {
+    let (server, client, client_config, server_config) = proxy_mitm(intercept).await;
+    let proxy = tokio::spawn(dummy_proxy(
+        client,
+        Some(server_config),
+        Scram::new("password")?,
+    ));
+
+    let _client_err = tokio_postgres::Config::new()
+        .channel_binding(channel_binding)
+        .user("user")
+        .dbname("db")
+        .password("password")
+        .ssl_mode(SslMode::Require)
+        .connect_raw(server, client_config.make_tls_connect()?)
+        .await
+        .err()
+        .context("client shouldn't be able to connect")?;
+
+    let _server_err = proxy
+        .await?
+        .err()
+        .context("server shouldn't accept client")?;
+
+    Ok(())
+}
diff --git a/proxy/src/sasl/channel_binding.rs b/proxy/src/sasl/channel_binding.rs
index 776adabe5587..13d681de6dc9 100644
--- a/proxy/src/sasl/channel_binding.rs
+++ b/proxy/src/sasl/channel_binding.rs
@@ -36,9 +36,9 @@ impl<'a> ChannelBinding<&'a str> {
 
 impl<T: std::fmt::Display> ChannelBinding<T> {
     /// Encode channel binding data as base64 for subsequent checks.
-    pub fn encode<E>(
+    pub fn encode<'a, E>(
         &self,
-        get_cbind_data: impl FnOnce(&T) -> Result<String, E>,
+        get_cbind_data: impl FnOnce(&T) -> Result<&'a [u8], E>,
     ) -> Result<std::borrow::Cow<'static, str>, E> {
         use ChannelBinding::*;
         Ok(match self {
@@ -51,12 +51,11 @@ impl<T: std::fmt::Display> ChannelBinding<T> {
                 "eSws".into()
             }
             Required(mode) => {
-                let msg = format!(
-                    "p={mode},,{data}",
-                    mode = mode,
-                    data = get_cbind_data(mode)?
-                );
-                base64::encode(msg).into()
+                use std::io::Write;
+                let mut cbind_input = vec![];
+                write!(&mut cbind_input, "p={mode},,",).unwrap();
+                cbind_input.extend_from_slice(get_cbind_data(mode)?);
+                base64::encode(&cbind_input).into()
             }
         })
     }
@@ -77,7 +76,7 @@ mod tests {
         ];
 
         for (cb, input) in cases {
-            assert_eq!(cb.encode(|_| anyhow::Ok("bar".to_owned()))?, input);
+            assert_eq!(cb.encode(|_| anyhow::Ok(b"bar"))?, input);
         }
 
         Ok(())
diff --git a/proxy/src/scram.rs b/proxy/src/scram.rs
index 2de26af96b3b..63271309e17f 100644
--- a/proxy/src/scram.rs
+++ b/proxy/src/scram.rs
@@ -22,9 +22,12 @@ pub use secret::ServerSecret;
 use hmac::{Hmac, Mac};
 use sha2::{Digest, Sha256};
 
-// TODO: add SCRAM-SHA-256-PLUS
+const SCRAM_SHA_256: &str = "SCRAM-SHA-256";
+const SCRAM_SHA_256_PLUS: &str = "SCRAM-SHA-256-PLUS";
+
 /// A list of supported SCRAM methods.
-pub const METHODS: &[&str] = &["SCRAM-SHA-256"];
+pub const METHODS: &[&str] = &[SCRAM_SHA_256_PLUS, SCRAM_SHA_256];
+pub const METHODS_WITHOUT_PLUS: &[&str] = &[SCRAM_SHA_256];
 
 /// Decode base64 into array without any heap allocations
 fn base64_decode_array<const N: usize>(input: impl AsRef<[u8]>) -> Option<[u8; N]> {
@@ -80,7 +83,11 @@ mod tests {
         const NONCE: [u8; 18] = [
             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
         ];
-        let mut exchange = Exchange::new(&secret, || NONCE, None);
+        let mut exchange = Exchange::new(
+            &secret,
+            || NONCE,
+            crate::config::TlsServerEndPoint::Undefined,
+        );
 
         let client_first = "n,,n=user,r=rOprNGfwEbeRWgbNEkqO";
         let client_final = "c=biws,r=rOprNGfwEbeRWgbNEkqOAQIDBAUGBwgJCgsMDQ4PEBES,p=rw1r5Kph5ThxmaUBC2GAQ6MfXbPnNkFiTIvdb/Rear0=";
diff --git a/proxy/src/scram/exchange.rs b/proxy/src/scram/exchange.rs
index 882769a70d91..319d9b101484 100644
--- a/proxy/src/scram/exchange.rs
+++ b/proxy/src/scram/exchange.rs
@@ -5,9 +5,11 @@ use super::messages::{
 };
 use super::secret::ServerSecret;
 use super::signature::SignatureBuilder;
+use crate::config;
 use crate::sasl::{self, ChannelBinding, Error as SaslError};
 
 /// The only channel binding mode we currently support.
+#[derive(Debug)]
 struct TlsServerEndPoint;
 
 impl std::fmt::Display for TlsServerEndPoint {
@@ -43,20 +45,20 @@ pub struct Exchange<'a> {
     state: ExchangeState,
     secret: &'a ServerSecret,
     nonce: fn() -> [u8; SCRAM_RAW_NONCE_LEN],
-    cert_digest: Option<&'a [u8]>,
+    tls_server_end_point: config::TlsServerEndPoint,
 }
 
 impl<'a> Exchange<'a> {
     pub fn new(
         secret: &'a ServerSecret,
         nonce: fn() -> [u8; SCRAM_RAW_NONCE_LEN],
-        cert_digest: Option<&'a [u8]>,
+        tls_server_end_point: config::TlsServerEndPoint,
     ) -> Self {
         Self {
             state: ExchangeState::Initial,
             secret,
             nonce,
-            cert_digest,
+            tls_server_end_point,
         }
     }
 }
@@ -71,6 +73,14 @@ impl sasl::Mechanism for Exchange<'_> {
                 let client_first_message = ClientFirstMessage::parse(input)
                     .ok_or(SaslError::BadClientMessage("invalid client-first-message"))?;
 
+                // If the flag is set to "y" and the server supports channel
+                // binding, the server MUST fail authentication
+                if client_first_message.cbind_flag == ChannelBinding::NotSupportedServer
+                    && self.tls_server_end_point.supported()
+                {
+                    return Err(SaslError::ChannelBindingFailed("SCRAM-PLUS not used"));
+                }
+
                 let server_first_message = client_first_message.build_server_first_message(
                     &(self.nonce)(),
                     &self.secret.salt_base64,
@@ -94,10 +104,11 @@ impl sasl::Mechanism for Exchange<'_> {
                 let client_final_message = ClientFinalMessage::parse(input)
                     .ok_or(SaslError::BadClientMessage("invalid client-final-message"))?;
 
-                let channel_binding = cbind_flag.encode(|_| {
-                    self.cert_digest
-                        .map(base64::encode)
-                        .ok_or(SaslError::ChannelBindingFailed("no cert digest provided"))
+                let channel_binding = cbind_flag.encode(|_| match &self.tls_server_end_point {
+                    config::TlsServerEndPoint::Sha256(x) => Ok(x),
+                    config::TlsServerEndPoint::Undefined => {
+                        Err(SaslError::ChannelBindingFailed("no cert digest provided"))
+                    }
                 })?;
 
                 // This might've been caused by a MITM attack
diff --git a/proxy/src/serverless.rs b/proxy/src/serverless.rs
index 23deda3ae67e..45f8132393e8 100644
--- a/proxy/src/serverless.rs
+++ b/proxy/src/serverless.rs
@@ -23,6 +23,7 @@ use hyper::{
     Body, Method, Request, Response,
 };
 
+use std::net::SocketAddr;
 use std::task::Poll;
 use std::{future::ready, sync::Arc};
 use tls_listener::TlsListener;
@@ -102,7 +103,7 @@ pub async fn task_main(
                             let session_id = uuid::Uuid::new_v4();
 
                             request_handler(
-                                req, config, conn_pool, cancel_map, session_id, sni_name,
+                                req, config, conn_pool, cancel_map, session_id, sni_name, peer_addr,
                             )
                             .instrument(info_span!(
                                 "serverless",
@@ -170,6 +171,7 @@ async fn request_handler(
     cancel_map: Arc<CancelMap>,
     session_id: uuid::Uuid,
     sni_hostname: Option<String>,
+    peer_addr: SocketAddr,
 ) -> Result<Response<Body>, ApiError> {
     let host = request
         .headers()
@@ -187,9 +189,15 @@ async fn request_handler(
 
         tokio::spawn(
             async move {
-                if let Err(e) =
-                    websocket::serve_websocket(websocket, config, &cancel_map, session_id, host)
-                        .await
+                if let Err(e) = websocket::serve_websocket(
+                    websocket,
+                    config,
+                    &cancel_map,
+                    session_id,
+                    host,
+                    peer_addr,
+                )
+                .await
                 {
                     error!(session_id = ?session_id, "error in websocket connection: {e:#}");
                 }
@@ -205,6 +213,7 @@ async fn request_handler(
             sni_hostname,
             conn_pool,
             session_id,
+            peer_addr,
             &config.http_config,
         )
         .await
diff --git a/proxy/src/serverless/conn_pool.rs b/proxy/src/serverless/conn_pool.rs
index b753bc8918bc..ca7a9ad0a0c7 100644
--- a/proxy/src/serverless/conn_pool.rs
+++ b/proxy/src/serverless/conn_pool.rs
@@ -8,7 +8,8 @@ use pbkdf2::{
     Params, Pbkdf2,
 };
 use pq_proto::StartupMessageParams;
-use std::{collections::HashMap, sync::Arc};
+use smol_str::SmolStr;
+use std::{collections::HashMap, net::SocketAddr, sync::Arc};
 use std::{
     fmt,
     task::{ready, Poll},
@@ -21,7 +22,8 @@ use tokio::time;
 use tokio_postgres::{AsyncMessage, ReadyForQueryStatus};
 
 use crate::{
-    auth, console,
+    auth::{self, check_peer_addr_is_in_list},
+    console,
     proxy::{
         neon_options, LatencyTimer, NUM_DB_CONNECTIONS_CLOSED_COUNTER,
         NUM_DB_CONNECTIONS_OPENED_COUNTER,
@@ -40,16 +42,16 @@ const MAX_CONNS_PER_ENDPOINT: usize = 20;
 
 #[derive(Debug, Clone)]
 pub struct ConnInfo {
-    pub username: String,
-    pub dbname: String,
-    pub hostname: String,
-    pub password: String,
-    pub options: Option<String>,
+    pub username: SmolStr,
+    pub dbname: SmolStr,
+    pub hostname: SmolStr,
+    pub password: SmolStr,
+    pub options: Option<SmolStr>,
 }
 
 impl ConnInfo {
     // hm, change to hasher to avoid cloning?
-    pub fn db_and_user(&self) -> (String, String) {
+    pub fn db_and_user(&self) -> (SmolStr, SmolStr) {
         (self.dbname.clone(), self.username.clone())
     }
 }
@@ -69,7 +71,7 @@ struct ConnPoolEntry {
 // Per-endpoint connection pool, (dbname, username) -> DbUserConnPool
 // Number of open connections is limited by the `max_conns_per_endpoint`.
 pub struct EndpointConnPool {
-    pools: HashMap<(String, String), DbUserConnPool>,
+    pools: HashMap<(SmolStr, SmolStr), DbUserConnPool>,
     total_conns: usize,
 }
 
@@ -94,7 +96,7 @@ pub struct GlobalConnPool {
     //
     // That should be a fairly conteded map, so return reference to the per-endpoint
     // pool as early as possible and release the lock.
-    global_pool: DashMap<String, Arc<RwLock<EndpointConnPool>>>,
+    global_pool: DashMap<SmolStr, Arc<RwLock<EndpointConnPool>>>,
 
     /// [`DashMap::len`] iterates over all inner pools and acquires a read lock on each.
     /// That seems like far too much effort, so we're using a relaxed increment counter instead.
@@ -144,6 +146,7 @@ impl GlobalConnPool {
         conn_info: &ConnInfo,
         force_new: bool,
         session_id: uuid::Uuid,
+        peer_addr: SocketAddr,
     ) -> anyhow::Result<Client> {
         let mut client: Option<ClientInner> = None;
         let mut latency_timer = LatencyTimer::new("http");
@@ -203,6 +206,7 @@ impl GlobalConnPool {
                     conn_id,
                     session_id,
                     latency_timer,
+                    peer_addr,
                 )
                 .await
             } else {
@@ -225,6 +229,7 @@ impl GlobalConnPool {
                 conn_id,
                 session_id,
                 latency_timer,
+                peer_addr,
             )
             .await
         };
@@ -323,7 +328,7 @@ impl GlobalConnPool {
         Ok(())
     }
 
-    fn get_or_create_endpoint_pool(&self, endpoint: &String) -> Arc<RwLock<EndpointConnPool>> {
+    fn get_or_create_endpoint_pool(&self, endpoint: &SmolStr) -> Arc<RwLock<EndpointConnPool>> {
         // fast path
         if let Some(pool) = self.global_pool.get(endpoint) {
             return pool.clone();
@@ -401,6 +406,7 @@ async fn connect_to_compute(
     conn_id: uuid::Uuid,
     session_id: uuid::Uuid,
     latency_timer: LatencyTimer,
+    peer_addr: SocketAddr,
 ) -> anyhow::Result<ClientInner> {
     let tls = config.tls_config.as_ref();
     let common_names = tls.and_then(|tls| tls.common_names.clone());
@@ -411,12 +417,13 @@ async fn connect_to_compute(
         ("application_name", APP_NAME),
         ("options", conn_info.options.as_deref().unwrap_or("")),
     ]);
-
-    let creds = config
-        .auth_backend
-        .as_ref()
-        .map(|_| auth::ClientCredentials::parse(&params, Some(&conn_info.hostname), common_names))
-        .transpose()?;
+    let creds = auth::ClientCredentials::parse(
+        &params,
+        Some(&conn_info.hostname),
+        common_names,
+        peer_addr,
+    )?;
+    let backend = config.auth_backend.as_ref().map(|_| creds);
 
     let console_options = neon_options(&params);
 
@@ -425,8 +432,14 @@ async fn connect_to_compute(
         application_name: Some(APP_NAME),
         options: console_options.as_deref(),
     };
-
-    let node_info = creds
+    // TODO(anna): this is a bit hacky way, consider using console notification listener.
+    if !config.disable_ip_check_for_http {
+        let allowed_ips = backend.get_allowed_ips(&extra).await?;
+        if !check_peer_addr_is_in_list(&peer_addr.ip(), &allowed_ips) {
+            return Err(auth::AuthError::ip_address_not_allowed().into());
+        }
+    }
+    let node_info = backend
         .wake_compute(&extra)
         .await?
         .context("missing cache entry from wake_compute")?;
@@ -439,7 +452,7 @@ async fn connect_to_compute(
         },
         node_info,
         &extra,
-        &creds,
+        &backend,
         latency_timer,
     )
     .await
@@ -456,7 +469,7 @@ async fn connect_to_compute_once(
 
     let (client, mut connection) = config
         .user(&conn_info.username)
-        .password(&conn_info.password)
+        .password(&*conn_info.password)
         .dbname(&conn_info.dbname)
         .connect_timeout(timeout)
         .connect(tokio_postgres::NoTls)
@@ -470,8 +483,8 @@ async fn connect_to_compute_once(
         info!(%conn_info, %session, "new connection");
     });
     let ids = Ids {
-        endpoint_id: node_info.aux.endpoint_id.to_string(),
-        branch_id: node_info.aux.branch_id.to_string(),
+        endpoint_id: node_info.aux.endpoint_id.clone(),
+        branch_id: node_info.aux.branch_id.clone(),
     };
 
     tokio::spawn(
diff --git a/proxy/src/serverless/sql_over_http.rs b/proxy/src/serverless/sql_over_http.rs
index 4a9829e360d7..6c337a837cdb 100644
--- a/proxy/src/serverless/sql_over_http.rs
+++ b/proxy/src/serverless/sql_over_http.rs
@@ -1,3 +1,4 @@
+use std::net::SocketAddr;
 use std::sync::Arc;
 
 use anyhow::bail;
@@ -13,6 +14,7 @@ use hyper::{Body, HeaderMap, Request};
 use serde_json::json;
 use serde_json::Map;
 use serde_json::Value;
+use tokio_postgres::error::DbError;
 use tokio_postgres::types::Kind;
 use tokio_postgres::types::Type;
 use tokio_postgres::GenericClient;
@@ -180,16 +182,16 @@ fn get_conn_info(
 
     for (key, value) in pairs {
         if key == "options" {
-            options = Some(value.to_string());
+            options = Some(value.into());
             break;
         }
     }
 
     Ok(ConnInfo {
-        username: username.to_owned(),
-        dbname: dbname.to_owned(),
-        hostname: hostname.to_owned(),
-        password: password.to_owned(),
+        username: username.into(),
+        dbname: dbname.into(),
+        hostname: hostname.into(),
+        password: password.into(),
         options,
     })
 }
@@ -200,11 +202,19 @@ pub async fn handle(
     sni_hostname: Option<String>,
     conn_pool: Arc<GlobalConnPool>,
     session_id: uuid::Uuid,
+    peer_addr: SocketAddr,
     config: &'static HttpConfig,
 ) -> Result<Response<Body>, ApiError> {
     let result = tokio::time::timeout(
-        config.sql_over_http_timeout,
-        handle_inner(request, sni_hostname, conn_pool, session_id),
+        config.timeout,
+        handle_inner(
+            config,
+            request,
+            sni_hostname,
+            conn_pool,
+            session_id,
+            peer_addr,
+        ),
     )
     .await;
     let mut response = match result {
@@ -212,14 +222,33 @@ pub async fn handle(
             Ok(r) => r,
             Err(e) => {
                 let message = format!("{:?}", e);
-                let code = e.downcast_ref::<tokio_postgres::Error>().and_then(|e| {
-                    e.code()
-                        .map(|s| serde_json::to_value(s.code()).unwrap_or_default())
-                });
-                let code = match code {
-                    Some(c) => c,
-                    None => Value::Null,
-                };
+                let db_error = e
+                    .downcast_ref::<tokio_postgres::Error>()
+                    .and_then(|e| e.as_db_error());
+                fn get<'a, T: serde::Serialize>(
+                    db: Option<&'a DbError>,
+                    x: impl FnOnce(&'a DbError) -> T,
+                ) -> Value {
+                    db.map(x)
+                        .and_then(|t| serde_json::to_value(t).ok())
+                        .unwrap_or_default()
+                }
+
+                // TODO(conrad): db_error.position()
+                let code = get(db_error, |db| db.code().code());
+                let severity = get(db_error, |db| db.severity());
+                let detail = get(db_error, |db| db.detail());
+                let hint = get(db_error, |db| db.hint());
+                let where_ = get(db_error, |db| db.where_());
+                let table = get(db_error, |db| db.table());
+                let column = get(db_error, |db| db.column());
+                let schema = get(db_error, |db| db.schema());
+                let datatype = get(db_error, |db| db.datatype());
+                let constraint = get(db_error, |db| db.constraint());
+                let file = get(db_error, |db| db.file());
+                let line = get(db_error, |db| db.line());
+                let routine = get(db_error, |db| db.routine());
+
                 error!(
                     ?code,
                     "sql-over-http per-client task finished with an error: {e:#}"
@@ -227,14 +256,29 @@ pub async fn handle(
                 // TODO: this shouldn't always be bad request.
                 json_response(
                     StatusCode::BAD_REQUEST,
-                    json!({ "message": message, "code": code }),
+                    json!({
+                        "message": message,
+                        "code": code,
+                        "detail": detail,
+                        "hint": hint,
+                        "severity": severity,
+                        "where": where_,
+                        "table": table,
+                        "column": column,
+                        "schema": schema,
+                        "datatype": datatype,
+                        "constraint": constraint,
+                        "file": file,
+                        "line": line,
+                        "routine": routine,
+                    }),
                 )?
             }
         },
         Err(_) => {
             let message = format!(
                 "HTTP-Connection timed out, execution time exeeded {} seconds",
-                config.sql_over_http_timeout.as_secs()
+                config.timeout.as_secs()
             );
             error!(message);
             json_response(
@@ -252,10 +296,12 @@ pub async fn handle(
 
 #[instrument(name = "sql-over-http", fields(pid = tracing::field::Empty), skip_all)]
 async fn handle_inner(
+    config: &'static HttpConfig,
     request: Request<Body>,
     sni_hostname: Option<String>,
     conn_pool: Arc<GlobalConnPool>,
     session_id: uuid::Uuid,
+    peer_addr: SocketAddr,
 ) -> anyhow::Result<Response<Body>> {
     NUM_CONNECTIONS_ACCEPTED_COUNTER
         .with_label_values(&["http"])
@@ -276,7 +322,8 @@ async fn handle_inner(
     let array_mode = headers.get(&ARRAY_MODE) == Some(&HEADER_VALUE_TRUE);
 
     // Allow connection pooling only if explicitly requested
-    let allow_pool = headers.get(&ALLOW_POOL) == Some(&HEADER_VALUE_TRUE);
+    // or if we have decided that http pool is no longer opt-in
+    let allow_pool = !config.pool_opt_in || headers.get(&ALLOW_POOL) == Some(&HEADER_VALUE_TRUE);
 
     // isolation level, read only and deferrable
 
@@ -314,7 +361,9 @@ async fn handle_inner(
     let body = hyper::body::to_bytes(request.into_body()).await?;
     let payload: Payload = serde_json::from_slice(&body)?;
 
-    let mut client = conn_pool.get(&conn_info, !allow_pool, session_id).await?;
+    let mut client = conn_pool
+        .get(&conn_info, !allow_pool, session_id, peer_addr)
+        .await?;
 
     let mut response = Response::builder()
         .status(StatusCode::OK)
diff --git a/proxy/src/serverless/websocket.rs b/proxy/src/serverless/websocket.rs
index 86141ab64f81..8fb9a3dee4f4 100644
--- a/proxy/src/serverless/websocket.rs
+++ b/proxy/src/serverless/websocket.rs
@@ -11,6 +11,7 @@ use hyper_tungstenite::{tungstenite::Message, HyperWebsocket, WebSocketStream};
 use pin_project_lite::pin_project;
 
 use std::{
+    net::SocketAddr,
     pin::Pin,
     task::{ready, Context, Poll},
 };
@@ -132,6 +133,7 @@ pub async fn serve_websocket(
     cancel_map: &CancelMap,
     session_id: uuid::Uuid,
     hostname: Option<String>,
+    peer_addr: SocketAddr,
 ) -> anyhow::Result<()> {
     let websocket = websocket.await?;
     handle_client(
@@ -140,6 +142,7 @@ pub async fn serve_websocket(
         session_id,
         WebSocketRw::new(websocket),
         ClientMode::Websockets { hostname },
+        peer_addr,
     )
     .await?;
     Ok(())
diff --git a/proxy/src/stream.rs b/proxy/src/stream.rs
index 6210601a80d4..f48b3fe39ff3 100644
--- a/proxy/src/stream.rs
+++ b/proxy/src/stream.rs
@@ -1,7 +1,8 @@
+use crate::config::TlsServerEndPoint;
 use crate::error::UserFacingError;
 use anyhow::bail;
 use bytes::BytesMut;
-use pin_project_lite::pin_project;
+
 use pq_proto::framed::{ConnectionError, Framed};
 use pq_proto::{BeMessage, FeMessage, FeStartupPacket, ProtocolError};
 use rustls::ServerConfig;
@@ -17,7 +18,7 @@ use tokio_rustls::server::TlsStream;
 /// or [`AsyncWrite`] to prevent subtle errors (e.g. trying
 /// to pass random malformed bytes through the connection).
 pub struct PqStream<S> {
-    framed: Framed<S>,
+    pub(crate) framed: Framed<S>,
 }
 
 impl<S> PqStream<S> {
@@ -118,19 +119,21 @@ impl<S: AsyncWrite + Unpin> PqStream<S> {
     }
 }
 
-pin_project! {
-    /// Wrapper for upgrading raw streams into secure streams.
-    /// NOTE: it should be possible to decompose this object as necessary.
-    #[project = StreamProj]
-    pub enum Stream<S> {
-        /// We always begin with a raw stream,
-        /// which may then be upgraded into a secure stream.
-        Raw { #[pin] raw: S },
+/// Wrapper for upgrading raw streams into secure streams.
+pub enum Stream<S> {
+    /// We always begin with a raw stream,
+    /// which may then be upgraded into a secure stream.
+    Raw { raw: S },
+    Tls {
         /// We box [`TlsStream`] since it can be quite large.
-        Tls { #[pin] tls: Box<TlsStream<S>> },
-    }
+        tls: Box<TlsStream<S>>,
+        /// Channel binding parameter
+        tls_server_end_point: TlsServerEndPoint,
+    },
 }
 
+impl<S: Unpin> Unpin for Stream<S> {}
+
 impl<S> Stream<S> {
     /// Construct a new instance from a raw stream.
     pub fn from_raw(raw: S) -> Self {
@@ -141,7 +144,17 @@ impl<S> Stream<S> {
     pub fn sni_hostname(&self) -> Option<&str> {
         match self {
             Stream::Raw { .. } => None,
-            Stream::Tls { tls } => tls.get_ref().1.server_name(),
+            Stream::Tls { tls, .. } => tls.get_ref().1.server_name(),
+        }
+    }
+
+    pub fn tls_server_end_point(&self) -> TlsServerEndPoint {
+        match self {
+            Stream::Raw { .. } => TlsServerEndPoint::Undefined,
+            Stream::Tls {
+                tls_server_end_point,
+                ..
+            } => *tls_server_end_point,
         }
     }
 }
@@ -158,12 +171,9 @@ pub enum StreamUpgradeError {
 
 impl<S: AsyncRead + AsyncWrite + Unpin> Stream<S> {
     /// If possible, upgrade raw stream into a secure TLS-based stream.
-    pub async fn upgrade(self, cfg: Arc<ServerConfig>) -> Result<Self, StreamUpgradeError> {
+    pub async fn upgrade(self, cfg: Arc<ServerConfig>) -> Result<TlsStream<S>, StreamUpgradeError> {
         match self {
-            Stream::Raw { raw } => {
-                let tls = Box::new(tokio_rustls::TlsAcceptor::from(cfg).accept(raw).await?);
-                Ok(Stream::Tls { tls })
-            }
+            Stream::Raw { raw } => Ok(tokio_rustls::TlsAcceptor::from(cfg).accept(raw).await?),
             Stream::Tls { .. } => Err(StreamUpgradeError::AlreadyTls),
         }
     }
@@ -171,50 +181,46 @@ impl<S: AsyncRead + AsyncWrite + Unpin> Stream<S> {
 
 impl<S: AsyncRead + AsyncWrite + Unpin> AsyncRead for Stream<S> {
     fn poll_read(
-        self: Pin<&mut Self>,
+        mut self: Pin<&mut Self>,
         context: &mut task::Context<'_>,
         buf: &mut ReadBuf<'_>,
     ) -> task::Poll<io::Result<()>> {
-        use StreamProj::*;
-        match self.project() {
-            Raw { raw } => raw.poll_read(context, buf),
-            Tls { tls } => tls.poll_read(context, buf),
+        match &mut *self {
+            Self::Raw { raw } => Pin::new(raw).poll_read(context, buf),
+            Self::Tls { tls, .. } => Pin::new(tls).poll_read(context, buf),
         }
     }
 }
 
 impl<S: AsyncRead + AsyncWrite + Unpin> AsyncWrite for Stream<S> {
     fn poll_write(
-        self: Pin<&mut Self>,
+        mut self: Pin<&mut Self>,
         context: &mut task::Context<'_>,
         buf: &[u8],
     ) -> task::Poll<io::Result<usize>> {
-        use StreamProj::*;
-        match self.project() {
-            Raw { raw } => raw.poll_write(context, buf),
-            Tls { tls } => tls.poll_write(context, buf),
+        match &mut *self {
+            Self::Raw { raw } => Pin::new(raw).poll_write(context, buf),
+            Self::Tls { tls, .. } => Pin::new(tls).poll_write(context, buf),
         }
     }
 
     fn poll_flush(
-        self: Pin<&mut Self>,
+        mut self: Pin<&mut Self>,
         context: &mut task::Context<'_>,
     ) -> task::Poll<io::Result<()>> {
-        use StreamProj::*;
-        match self.project() {
-            Raw { raw } => raw.poll_flush(context),
-            Tls { tls } => tls.poll_flush(context),
+        match &mut *self {
+            Self::Raw { raw } => Pin::new(raw).poll_flush(context),
+            Self::Tls { tls, .. } => Pin::new(tls).poll_flush(context),
         }
     }
 
     fn poll_shutdown(
-        self: Pin<&mut Self>,
+        mut self: Pin<&mut Self>,
         context: &mut task::Context<'_>,
     ) -> task::Poll<io::Result<()>> {
-        use StreamProj::*;
-        match self.project() {
-            Raw { raw } => raw.poll_shutdown(context),
-            Tls { tls } => tls.poll_shutdown(context),
+        match &mut *self {
+            Self::Raw { raw } => Pin::new(raw).poll_shutdown(context),
+            Self::Tls { tls, .. } => Pin::new(tls).poll_shutdown(context),
         }
     }
 }
diff --git a/proxy/src/usage_metrics.rs b/proxy/src/usage_metrics.rs
index 180b5f7199b4..789a4c680ce7 100644
--- a/proxy/src/usage_metrics.rs
+++ b/proxy/src/usage_metrics.rs
@@ -6,6 +6,7 @@ use consumption_metrics::{idempotency_key, Event, EventChunk, EventType, CHUNK_S
 use dashmap::{mapref::entry::Entry, DashMap};
 use once_cell::sync::Lazy;
 use serde::{Deserialize, Serialize};
+use smol_str::SmolStr;
 use std::{
     convert::Infallible,
     sync::{
@@ -29,8 +30,8 @@ const DEFAULT_HTTP_REPORTING_TIMEOUT: Duration = Duration::from_secs(60);
 /// because we enrich the event with project_id in the control-plane endpoint.
 #[derive(Eq, Hash, PartialEq, Serialize, Deserialize, Debug, Clone)]
 pub struct Ids {
-    pub endpoint_id: String,
-    pub branch_id: String,
+    pub endpoint_id: SmolStr,
+    pub branch_id: SmolStr,
 }
 
 #[derive(Debug)]
@@ -290,8 +291,8 @@ mod tests {
 
         // register a new counter
         let counter = metrics.register(Ids {
-            endpoint_id: "e1".to_string(),
-            branch_id: "b1".to_string(),
+            endpoint_id: "e1".into(),
+            branch_id: "b1".into(),
         });
 
         // the counter should be observed despite 0 egress
diff --git a/pyproject.toml b/pyproject.toml
index 396edabe1006..536efeab5683 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,8 +33,8 @@ psutil = "^5.9.4"
 types-psutil = "^5.9.5.12"
 types-toml = "^0.10.8.6"
 pytest-httpserver = "^1.0.8"
-aiohttp = "3.8.6"
-pytest-rerunfailures = "^11.1.2"
+aiohttp = "3.9.0"
+pytest-rerunfailures = "^13.0"
 types-pytest-lazy-fixture = "^0.6.3.3"
 pytest-split = "^0.8.1"
 zstandard = "^0.21.0"
diff --git a/s3_scrubber/Cargo.toml b/s3_scrubber/Cargo.toml
index 0f3e5630e85e..e26f2c6d6b34 100644
--- a/s3_scrubber/Cargo.toml
+++ b/s3_scrubber/Cargo.toml
@@ -6,8 +6,6 @@ license.workspace = true
 
 [dependencies]
 aws-sdk-s3.workspace = true
-aws-smithy-http.workspace = true
-aws-types.workspace = true
 either.workspace = true
 tokio-rustls.workspace = true
 anyhow.workspace = true
@@ -30,7 +28,7 @@ itertools.workspace = true
 tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
 chrono = { workspace = true, default-features = false, features = ["clock", "serde"] }
 reqwest = { workspace = true, default-features = false, features = ["rustls-tls", "json"] }
-aws-config = { workspace = true, default-features = false, features = ["rustls", "credentials-sso"] }
+aws-config = { workspace = true, default-features = false, features = ["rustls", "sso"] }
 
 pageserver = { path = "../pageserver" }
 remote_storage = { path = "../libs/remote_storage" }
diff --git a/s3_scrubber/src/checks.rs b/s3_scrubber/src/checks.rs
index 64702fca3d15..510a12866366 100644
--- a/s3_scrubber/src/checks.rs
+++ b/s3_scrubber/src/checks.rs
@@ -94,11 +94,10 @@ pub(crate) async fn branch_cleanup_and_check_errors(
                         != index_part.get_disk_consistent_lsn()
                     {
                         result.errors.push(format!(
-                                    "Mismatching disk_consistent_lsn in TimelineMetadata ({}) and in the index_part ({})",
-                                    index_part.metadata.disk_consistent_lsn(),
-                                    index_part.get_disk_consistent_lsn(),
-
-                                ))
+                            "Mismatching disk_consistent_lsn in TimelineMetadata ({}) and in the index_part ({})",
+                            index_part.metadata.disk_consistent_lsn(),
+                            index_part.get_disk_consistent_lsn(),
+                        ))
                     }
 
                     if index_part.layer_metadata.is_empty() {
@@ -109,8 +108,8 @@ pub(crate) async fn branch_cleanup_and_check_errors(
                     for (layer, metadata) in index_part.layer_metadata {
                         if metadata.file_size == 0 {
                             result.errors.push(format!(
-                                            "index_part.json contains a layer {} that has 0 size in its layer metadata", layer.file_name(),
-                                        ))
+                                "index_part.json contains a layer {} that has 0 size in its layer metadata", layer.file_name(),
+                            ))
                         }
 
                         let layer_map_key = (layer, metadata.generation);
@@ -136,7 +135,7 @@ pub(crate) async fn branch_cleanup_and_check_errors(
                             // a new generation that didn't upload an index yet.
                             //
                             // Even so, a layer that is not referenced by the index could just
-                            // be something enqueued for deletion, so while this check is valid 
+                            // be something enqueued for deletion, so while this check is valid
                             // for indicating that a layer is garbage, it is not an indicator
                             // of a problem.
                             gen < &index_part_generation)
@@ -251,10 +250,7 @@ pub(crate) async fn list_timeline_blobs(
     pin_mut!(stream);
     while let Some(obj) = stream.next().await {
         let obj = obj?;
-        let key = match obj.key() {
-            Some(k) => k,
-            None => continue,
-        };
+        let key = obj.key();
 
         let blob_name = key.strip_prefix(&timeline_dir_target.prefix_in_bucket);
         match blob_name {
@@ -287,7 +283,7 @@ pub(crate) async fn list_timeline_blobs(
     let (index_part_object, index_part_generation) = match index_parts
         .iter()
         .filter_map(|k| {
-            let key = k.key().unwrap();
+            let key = k.key();
             // Stripping the index key to the last part, because RemotePath doesn't
             // like absolute paths, and depending on prefix_in_bucket it's possible
             // for the keys we read back to start with a slash.
@@ -308,8 +304,7 @@ pub(crate) async fn list_timeline_blobs(
         errors.push("S3 list response got no index_part.json file".to_string());
     }
 
-    if let Some(index_part_object_key) = index_part_object.as_ref().and_then(|object| object.key())
-    {
+    if let Some(index_part_object_key) = index_part_object.as_ref().map(|object| object.key()) {
         let index_part_bytes = download_object_with_retries(
             s3_client,
             &timeline_dir_target.bucket_name,
diff --git a/s3_scrubber/src/garbage.rs b/s3_scrubber/src/garbage.rs
index daeb5e97778e..f27e1d7f6594 100644
--- a/s3_scrubber/src/garbage.rs
+++ b/s3_scrubber/src/garbage.rs
@@ -323,7 +323,7 @@ async fn do_delete(
             let delete_request = s3_client
                 .delete_objects()
                 .bucket(bucket_name)
-                .delete(Delete::builder().set_objects(Some(request_keys)).build());
+                .delete(Delete::builder().set_objects(Some(request_keys)).build()?);
             delete_request
                 .send()
                 .await
diff --git a/s3_scrubber/src/lib.rs b/s3_scrubber/src/lib.rs
index 777276a4d1fb..e5465952fbad 100644
--- a/s3_scrubber/src/lib.rs
+++ b/s3_scrubber/src/lib.rs
@@ -16,6 +16,7 @@ use aws_config::environment::EnvironmentVariableCredentialsProvider;
 use aws_config::imds::credentials::ImdsCredentialsProvider;
 use aws_config::meta::credentials::CredentialsProviderChain;
 use aws_config::sso::SsoCredentialsProvider;
+use aws_config::BehaviorVersion;
 use aws_sdk_s3::config::Region;
 use aws_sdk_s3::{Client, Config};
 
@@ -245,6 +246,7 @@ pub fn init_s3_client(account_id: Option<String>, bucket_region: Region) -> Clie
     };
 
     let mut builder = Config::builder()
+        .behavior_version(BehaviorVersion::v2023_11_09())
         .region(bucket_region)
         .credentials_provider(credentials_provider);
 
diff --git a/s3_scrubber/src/metadata_stream.rs b/s3_scrubber/src/metadata_stream.rs
index 8095071c1fd1..4cfa77cfc139 100644
--- a/s3_scrubber/src/metadata_stream.rs
+++ b/s3_scrubber/src/metadata_stream.rs
@@ -20,7 +20,6 @@ pub fn stream_tenants<'a>(
 
             let new_entry_ids = fetch_response
                 .common_prefixes()
-                .unwrap_or_default()
                 .iter()
                 .filter_map(|prefix| prefix.prefix())
                 .filter_map(|prefix| -> Option<&str> {
@@ -72,7 +71,6 @@ pub async fn stream_tenant_timelines<'a>(
 
         let new_entry_ids = fetch_response
             .common_prefixes()
-            .unwrap_or_default()
             .iter()
             .filter_map(|prefix| prefix.prefix())
             .filter_map(|prefix| -> Option<&str> {
@@ -116,15 +114,15 @@ pub(crate) fn stream_listing<'a>(
                 list_objects_with_retries(s3_client, target, continuation_token.clone()).await?;
 
             if target.delimiter.is_empty() {
-                for object_id in fetch_response.contents().unwrap_or_default().iter().filter_map(|object| object.key()).map(|i|
-                    ObjectIdentifier::builder().key(i).build()
-                ) {
+                for object_key in fetch_response.contents().iter().filter_map(|object| object.key())
+                {
+                    let object_id = ObjectIdentifier::builder().key(object_key).build()?;
                     yield object_id;
                 }
             } else {
-                for prefix in fetch_response.common_prefixes().unwrap_or_default()
-                .iter().filter_map(|p| p.prefix().map(|k| ObjectIdentifier::builder().key(k).build())) {
-                    yield prefix;
+                for prefix in fetch_response.common_prefixes().iter().filter_map(|p| p.prefix()) {
+                    let object_id = ObjectIdentifier::builder().key(prefix).build()?;
+                    yield object_id;
                 }
             }
 
diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py
index fc7e834bd2fd..9545dc2dd5c8 100644
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -434,8 +434,6 @@ def __init__(
 
         # Pageserver remote storage
         self.pageserver_remote_storage = pageserver_remote_storage
-        # Extensions remote storage
-        self.ext_remote_storage: Optional[S3Storage] = None
         # Safekeepers remote storage
         self.sk_remote_storage: Optional[RemoteStorage] = None
 
@@ -534,24 +532,6 @@ def enable_pageserver_remote_storage(
         )
         self.pageserver_remote_storage = ret
 
-    def enable_extensions_remote_storage(self, kind: RemoteStorageKind):
-        assert self.ext_remote_storage is None, "already configured extensions remote storage"
-
-        # there is an assumption that REAL_S3 for extensions is never
-        # cleaned up these are also special in that they have a hardcoded
-        # bucket and region, which is most likely the same as our normal
-        ext = self._configure_and_create_remote_storage(
-            kind,
-            RemoteStorageUser.EXTENSIONS,
-            bucket_name="neon-dev-extensions-eu-central-1",
-            bucket_region="eu-central-1",
-        )
-        assert isinstance(
-            ext, S3Storage
-        ), "unsure why, but only MOCK_S3 and REAL_S3 are currently supported for extensions"
-        ext.cleanup = False
-        self.ext_remote_storage = ext
-
     def enable_safekeeper_remote_storage(self, kind: RemoteStorageKind):
         assert self.sk_remote_storage is None, "sk_remote_storage already configured"
 
@@ -608,8 +588,7 @@ def cleanup_local_storage(self):
                 directory_to_clean.rmdir()
 
     def cleanup_remote_storage(self):
-        # extensions are currently not cleaned up, disabled when creating
-        for x in [self.pageserver_remote_storage, self.ext_remote_storage, self.sk_remote_storage]:
+        for x in [self.pageserver_remote_storage, self.sk_remote_storage]:
             if isinstance(x, S3Storage):
                 x.do_cleanup()
 
@@ -713,7 +692,6 @@ def __init__(self, config: NeonEnvBuilder):
         self.pageservers: List[NeonPageserver] = []
         self.broker = config.broker
         self.pageserver_remote_storage = config.pageserver_remote_storage
-        self.ext_remote_storage = config.ext_remote_storage
         self.safekeepers_remote_storage = config.sk_remote_storage
         self.pg_version = config.pg_version
         # Binary path for pageserver, safekeeper, etc
@@ -1436,45 +1414,25 @@ def endpoint_create(
     def endpoint_start(
         self,
         endpoint_id: str,
-        pg_port: int,
-        http_port: int,
         safekeepers: Optional[List[int]] = None,
-        tenant_id: Optional[TenantId] = None,
-        lsn: Optional[Lsn] = None,
-        branch_name: Optional[str] = None,
         remote_ext_config: Optional[str] = None,
         pageserver_id: Optional[int] = None,
     ) -> "subprocess.CompletedProcess[str]":
         args = [
             "endpoint",
             "start",
-            "--tenant-id",
-            str(tenant_id or self.env.initial_tenant),
-            "--pg-version",
-            self.env.pg_version,
         ]
         if remote_ext_config is not None:
             args.extend(["--remote-ext-config", remote_ext_config])
-        if lsn is not None:
-            args.append(f"--lsn={lsn}")
-        args.extend(["--pg-port", str(pg_port)])
-        args.extend(["--http-port", str(http_port)])
 
         if safekeepers is not None:
             args.extend(["--safekeepers", (",".join(map(str, safekeepers)))])
-        if branch_name is not None:
-            args.extend(["--branch-name", branch_name])
         if endpoint_id is not None:
             args.append(endpoint_id)
         if pageserver_id is not None:
             args.extend(["--pageserver-id", str(pageserver_id)])
 
-        storage = self.env.ext_remote_storage
-        s3_env_vars = None
-        if isinstance(storage, S3Storage):
-            s3_env_vars = storage.access_env_vars()
-
-        res = self.raw_cli(args, extra_env_vars=s3_env_vars)
+        res = self.raw_cli(args)
         res.check_returncode()
         return res
 
@@ -1495,15 +1453,12 @@ def endpoint_reconfigure(
     def endpoint_stop(
         self,
         endpoint_id: str,
-        tenant_id: Optional[TenantId] = None,
         destroy=False,
         check_return_code=True,
     ) -> "subprocess.CompletedProcess[str]":
         args = [
             "endpoint",
             "stop",
-            "--tenant-id",
-            str(tenant_id or self.env.initial_tenant),
         ]
         if destroy:
             args.append("--destroy")
@@ -1599,7 +1554,7 @@ def stop(self, immediate: bool = False) -> "NeonAttachmentService":
             self.running = False
         return self
 
-    def attach_hook(self, tenant_id: TenantId, pageserver_id: int) -> int:
+    def attach_hook_issue(self, tenant_id: TenantId, pageserver_id: int) -> int:
         response = requests.post(
             f"{self.env.control_plane_api}/attach-hook",
             json={"tenant_id": str(tenant_id), "node_id": pageserver_id},
@@ -1609,6 +1564,13 @@ def attach_hook(self, tenant_id: TenantId, pageserver_id: int) -> int:
         assert isinstance(gen, int)
         return gen
 
+    def attach_hook_drop(self, tenant_id: TenantId):
+        response = requests.post(
+            f"{self.env.control_plane_api}/attach-hook",
+            json={"tenant_id": str(tenant_id), "node_id": None},
+        )
+        response.raise_for_status()
+
     def __enter__(self) -> "NeonAttachmentService":
         return self
 
@@ -1808,13 +1770,20 @@ def tenant_attach(
         to call into the pageserver HTTP client.
         """
         if self.env.attachment_service is not None:
-            generation = self.env.attachment_service.attach_hook(tenant_id, self.id)
+            generation = self.env.attachment_service.attach_hook_issue(tenant_id, self.id)
         else:
             generation = None
 
         client = self.http_client()
         return client.tenant_attach(tenant_id, config, config_null, generation=generation)
 
+    def tenant_detach(self, tenant_id: TenantId):
+        if self.env.attachment_service is not None:
+            self.env.attachment_service.attach_hook_drop(tenant_id)
+
+        client = self.http_client()
+        return client.tenant_detach(tenant_id)
+
 
 def append_pageserver_param_overrides(
     params_to_update: List[str],
@@ -1889,7 +1858,8 @@ def run_capture(
         command: List[str],
         env: Optional[Env] = None,
         cwd: Optional[str] = None,
-        **kwargs: Any,
+        with_command_header=True,
+        **popen_kwargs: Any,
     ) -> str:
         """
         Run one of the postgres binaries, with stderr and stdout redirected to a file.
@@ -1902,7 +1872,13 @@ def run_capture(
         log.info(f"Running command '{' '.join(command)}'")
         env = self._build_env(env)
         base_path, _, _ = subprocess_capture(
-            self.log_dir, command, env=env, cwd=cwd, check=True, **kwargs
+            self.log_dir,
+            command,
+            env=env,
+            cwd=cwd,
+            check=True,
+            with_command_header=with_command_header,
+            **popen_kwargs,
         )
         return base_path
 
@@ -2145,6 +2121,7 @@ def extra_args(self) -> list[str]:
                 # Console auth backend params
                 *["--auth-backend", "console"],
                 *["--auth-endpoint", self.endpoint],
+                *["--sql-over-http-pool-opt-in", "false"],
             ]
             if self.fixed_rate_limit is not None:
                 args += [
@@ -2420,6 +2397,10 @@ def static_proxy(
     # For simplicity, we use the same user for both `--auth-endpoint` and `safe_psql`
     vanilla_pg.start()
     vanilla_pg.safe_psql("create user proxy with login superuser password 'password'")
+    vanilla_pg.safe_psql("CREATE SCHEMA IF NOT EXISTS neon_control_plane")
+    vanilla_pg.safe_psql(
+        "CREATE TABLE neon_control_plane.endpoints (endpoint_id VARCHAR(255) PRIMARY KEY, allowed_ips VARCHAR(255))"
+    )
 
     proxy_port = port_distributor.get_port()
     mgmt_port = port_distributor.get_port()
@@ -2520,9 +2501,6 @@ def start(
 
         self.env.neon_cli.endpoint_start(
             self.endpoint_id,
-            pg_port=self.pg_port,
-            http_port=self.http_port,
-            tenant_id=self.tenant_id,
             safekeepers=self.active_safekeepers,
             remote_ext_config=remote_ext_config,
             pageserver_id=pageserver_id,
@@ -2582,6 +2560,17 @@ def respec(self, **kwargs):
         with open(config_path, "w") as file:
             json.dump(dict(data_dict, **kwargs), file, indent=4)
 
+    # Mock the extension part of spec passed from control plane for local testing
+    # endpooint.rs adds content of this file as a part of the spec.json
+    def create_remote_extension_spec(self, spec: dict[str, Any]):
+        """Create a remote extension spec file for the endpoint."""
+        remote_extensions_spec_path = os.path.join(
+            self.endpoint_path(), "remote_extensions_spec.json"
+        )
+
+        with open(remote_extensions_spec_path, "w") as file:
+            json.dump(spec, file, indent=4)
+
     def stop(self) -> "Endpoint":
         """
         Stop the Postgres instance if it's running.
@@ -2591,7 +2580,7 @@ def stop(self) -> "Endpoint":
         if self.running:
             assert self.endpoint_id is not None
             self.env.neon_cli.endpoint_stop(
-                self.endpoint_id, self.tenant_id, check_return_code=self.check_stop_result
+                self.endpoint_id, check_return_code=self.check_stop_result
             )
             self.running = False
 
@@ -2605,7 +2594,7 @@ def stop_and_destroy(self) -> "Endpoint":
 
         assert self.endpoint_id is not None
         self.env.neon_cli.endpoint_stop(
-            self.endpoint_id, self.tenant_id, True, check_return_code=self.check_stop_result
+            self.endpoint_id, True, check_return_code=self.check_stop_result
         )
         self.endpoint_id = None
         self.running = False
@@ -3040,6 +3029,11 @@ def get_test_output_dir(request: FixtureRequest, top_output_dir: Path) -> Path:
     """Compute the working directory for an individual test."""
     test_name = request.node.name
     test_dir = top_output_dir / test_name.replace("/", "-")
+
+    # We rerun flaky tests multiple times, use a separate directory for each run.
+    if (suffix := getattr(request.node, "execution_count", None)) is not None:
+        test_dir = test_dir.parent / f"{test_dir.name}-{suffix}"
+
     log.info(f"get_test_output_dir is {test_dir}")
     # make mypy happy
     assert isinstance(test_dir, Path)
diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py
index 2f1d68b92c0e..76aa40122f53 100644
--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -4,7 +4,7 @@
 import time
 from collections import defaultdict
 from dataclasses import dataclass
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Set, Tuple
 
 import requests
 from requests.adapters import HTTPAdapter
@@ -100,6 +100,15 @@ def kind_count(self) -> Dict[str, int]:
             counts[hist_layer.kind] += 1
         return counts
 
+    def delta_layers(self) -> List[HistoricLayerInfo]:
+        return [x for x in self.historic_layers if x.kind == "Delta"]
+
+    def image_layers(self) -> List[HistoricLayerInfo]:
+        return [x for x in self.historic_layers if x.kind == "Image"]
+
+    def historic_by_name(self) -> Set[str]:
+        return set(x.layer_file_name for x in self.historic_layers)
+
 
 @dataclass
 class TenantConfig:
@@ -254,6 +263,7 @@ def tenant_detach(self, tenant_id: TenantId, detach_ignored=False):
     def tenant_delete(self, tenant_id: TenantId):
         res = self.delete(f"http://localhost:{self.port}/v1/tenant/{tenant_id}")
         self.verbose_error(res)
+        return res
 
     def tenant_load(self, tenant_id: TenantId):
         res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/load")
@@ -352,12 +362,16 @@ def timeline_create(
         new_timeline_id: TimelineId,
         ancestor_timeline_id: Optional[TimelineId] = None,
         ancestor_start_lsn: Optional[Lsn] = None,
+        existing_initdb_timeline_id: Optional[TimelineId] = None,
         **kwargs,
     ) -> Dict[Any, Any]:
         body: Dict[str, Any] = {
             "new_timeline_id": str(new_timeline_id),
             "ancestor_start_lsn": str(ancestor_start_lsn) if ancestor_start_lsn else None,
             "ancestor_timeline_id": str(ancestor_timeline_id) if ancestor_timeline_id else None,
+            "existing_initdb_timeline_id": str(existing_initdb_timeline_id)
+            if existing_initdb_timeline_id
+            else None,
         }
         if pg_version != PgVersion.NOT_SET:
             body["pg_version"] = int(pg_version)
@@ -416,6 +430,10 @@ def timeline_delete(self, tenant_id: TenantId, timeline_id: TimelineId, **kwargs
     def timeline_gc(
         self, tenant_id: TenantId, timeline_id: TimelineId, gc_horizon: Optional[int]
     ) -> dict[str, Any]:
+        """
+        Unlike most handlers, this will wait for the layers to be actually
+        complete registering themselves to the deletion queue.
+        """
         self.is_testing_enabled_or_skip()
 
         log.info(
diff --git a/test_runner/fixtures/pageserver/utils.py b/test_runner/fixtures/pageserver/utils.py
index 007ff387f41c..e7b78cfb9a97 100644
--- a/test_runner/fixtures/pageserver/utils.py
+++ b/test_runner/fixtures/pageserver/utils.py
@@ -1,7 +1,7 @@
 import time
-from typing import TYPE_CHECKING, Any, Dict, Optional
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
 
-from mypy_boto3_s3.type_defs import ListObjectsV2OutputTypeDef
+from mypy_boto3_s3.type_defs import ListObjectsV2OutputTypeDef, ObjectTypeDef
 
 from fixtures.log_helper import log
 from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient
@@ -235,10 +235,14 @@ def timeline_delete_wait_completed(
     from fixtures.neon_fixtures import NeonEnvBuilder
 
 
-def assert_prefix_empty(neon_env_builder: "NeonEnvBuilder", prefix: Optional[str] = None):
+def assert_prefix_empty(
+    neon_env_builder: "NeonEnvBuilder",
+    prefix: Optional[str] = None,
+    allowed_postfix: Optional[str] = None,
+):
     response = list_prefix(neon_env_builder, prefix)
     keys = response["KeyCount"]
-    objects = response.get("Contents", [])
+    objects: List[ObjectTypeDef] = response.get("Contents", [])
     common_prefixes = response.get("CommonPrefixes", [])
 
     remote_storage = neon_env_builder.pageserver_remote_storage
@@ -261,7 +265,18 @@ def assert_prefix_empty(neon_env_builder: "NeonEnvBuilder", prefix: Optional[str
                 f"contradicting ListObjectsV2 response with KeyCount={keys} and Contents={objects}, CommonPrefixes={common_prefixes}"
             )
 
-    assert keys == 0, f"remote dir with prefix {prefix} is not empty after deletion: {objects}"
+    filtered_count = 0
+    if allowed_postfix is None:
+        filtered_count = len(objects)
+    else:
+        for _obj in objects:
+            key: str = str(response.get("Key", []))
+            if not (allowed_postfix.endswith(key)):
+                filtered_count += 1
+
+    assert (
+        filtered_count == 0
+    ), f"remote dir with prefix {prefix} is not empty after deletion: {objects}"
 
 
 def assert_prefix_not_empty(neon_env_builder: "NeonEnvBuilder", prefix: Optional[str] = None):
diff --git a/test_runner/fixtures/utils.py b/test_runner/fixtures/utils.py
index 6e857766e55f..cda788b2a480 100644
--- a/test_runner/fixtures/utils.py
+++ b/test_runner/fixtures/utils.py
@@ -49,7 +49,8 @@ def subprocess_capture(
     echo_stdout=False,
     capture_stdout=False,
     timeout=None,
-    **kwargs: Any,
+    with_command_header=True,
+    **popen_kwargs: Any,
 ) -> Tuple[str, Optional[str], int]:
     """Run a process and bifurcate its output to files and the `log` logger
 
@@ -86,13 +87,23 @@ def __init__(self, in_file, out_file, echo: bool, capture: bool):
             self.captured = ""
 
         def run(self):
+            first = with_command_header
             for line in self.in_file:
+                if first:
+                    # do this only after receiving any input so that we can
+                    # keep deleting empty files, or leave it out completly if
+                    # it was unwanted (using the file as input later for example)
+                    first = False
+                    # prefix the files with the command line so that we can
+                    # later understand which file is for what command
+                    self.out_file.write((f"# {' '.join(cmd)}\n\n").encode("utf-8"))
+
                 # Only bother decoding if we are going to do something more than stream to a file
                 if self.echo or self.capture:
                     string = line.decode(encoding="utf-8", errors="replace")
 
                     if self.echo:
-                        log.info(string)
+                        log.info(string.strip())
 
                     if self.capture:
                         self.captured += string
@@ -107,7 +118,7 @@ def run(self):
 
                 p = subprocess.Popen(
                     cmd,
-                    **kwargs,
+                    **popen_kwargs,
                     stdout=subprocess.PIPE,
                     stderr=subprocess.PIPE,
                 )
@@ -138,17 +149,19 @@ def run(self):
 
 
 _global_counter = 0
+_global_counter_lock = threading.Lock()
 
 
 def global_counter() -> int:
-    """A really dumb global counter.
+    """A really dumb but thread-safe global counter.
 
     This is useful for giving output files a unique number, so if we run the
     same command multiple times we can keep their output separate.
     """
-    global _global_counter
-    _global_counter += 1
-    return _global_counter
+    global _global_counter, _global_counter_lock
+    with _global_counter_lock:
+        _global_counter += 1
+        return _global_counter
 
 
 def print_gc_result(row: Dict[str, Any]):
diff --git a/test_runner/performance/README.md b/test_runner/performance/README.md
index d113e9e6375e..7ad65821d45b 100644
--- a/test_runner/performance/README.md
+++ b/test_runner/performance/README.md
@@ -14,7 +14,7 @@ Some handy pytest flags for local development:
 - `-s` shows test output
 - `-k` selects a test to run
 - `--timeout=0` disables our default timeout of 300s (see `setup.cfg`)
-- `--cleanup-test-ouput` cleans up after each test
+- `--preserve-database-files` to skip cleanup
 
 # What performance tests do we have and how we run them
 
diff --git a/test_runner/pg_clients/rust/tokio-postgres/Cargo.lock b/test_runner/pg_clients/rust/tokio-postgres/Cargo.lock
index 4ea21eb378a1..3ac0f16e4bac 100644
--- a/test_runner/pg_clients/rust/tokio-postgres/Cargo.lock
+++ b/test_runner/pg_clients/rust/tokio-postgres/Cargo.lock
@@ -437,9 +437,9 @@ checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
 
 [[package]]
 name = "openssl"
-version = "0.10.57"
+version = "0.10.60"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bac25ee399abb46215765b1cb35bc0212377e58a061560d8b29b024fd0430e7c"
+checksum = "79a4c6c3a2b158f7f8f2a2fc5a969fa3a068df6fc9dbb4a43845436e3af7c800"
 dependencies = [
  "bitflags 2.4.1",
  "cfg-if",
@@ -469,9 +469,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
 
 [[package]]
 name = "openssl-sys"
-version = "0.9.93"
+version = "0.9.96"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "db4d56a4c0478783083cfafcc42493dd4a981d41669da64b4572a2a089b51b1d"
+checksum = "3812c071ba60da8b5677cc12bcb1d42989a65553772897a7e0355545a819838f"
 dependencies = [
  "cc",
  "libc",
diff --git a/test_runner/regress/test_broken_timeline.py b/test_runner/regress/test_broken_timeline.py
index 4911fc09d63c..84a322039a66 100644
--- a/test_runner/regress/test_broken_timeline.py
+++ b/test_runner/regress/test_broken_timeline.py
@@ -114,7 +114,6 @@ def test_timeline_init_break_before_checkpoint(neon_env_builder: NeonEnvBuilder)
         [
             ".*Failed to process timeline dir contents.*Timeline has no ancestor and no layer files.*",
             ".*Timeline got dropped without initializing, cleaning its files.*",
-            ".*Failed to load index_part from remote storage, failed creation?.*",
         ]
     )
 
@@ -144,8 +143,13 @@ def test_timeline_init_break_before_checkpoint(neon_env_builder: NeonEnvBuilder)
     ), "pageserver should clean its temp timeline files on timeline creation failure"
 
 
-def test_timeline_init_break_before_checkpoint_recreate(neon_env_builder: NeonEnvBuilder):
-    env = neon_env_builder.init_start()
+# The "exit" case is for a reproducer of issue 6007: an unclean shutdown where we can't do local fs cleanups
+@pytest.mark.parametrize("exit_or_return", ["return", "exit"])
+def test_timeline_init_break_before_checkpoint_recreate(
+    neon_env_builder: NeonEnvBuilder, exit_or_return: str
+):
+    env = neon_env_builder.init_configs()
+    env.start()
     pageserver_http = env.pageserver.http_client()
 
     env.pageserver.allowed_errors.extend(
@@ -156,6 +160,7 @@ def test_timeline_init_break_before_checkpoint_recreate(neon_env_builder: NeonEn
         ]
     )
 
+    pageserver_http.tenant_create(env.initial_tenant)
     tenant_id = env.initial_tenant
 
     timelines_dir = env.pageserver.timeline_dir(tenant_id)
@@ -166,13 +171,17 @@ def test_timeline_init_break_before_checkpoint_recreate(neon_env_builder: NeonEn
     timeline_id = TimelineId("1080243c1f76fe3c5147266663c9860b")
 
     # Introduce failpoint during timeline init (some intermediate files are on disk), before it's checkpointed.
-    pageserver_http.configure_failpoints(("before-checkpoint-new-timeline", "return"))
-    with pytest.raises(Exception, match="before-checkpoint-new-timeline"):
-        _ = env.neon_cli.create_timeline(
-            "test_timeline_init_break_before_checkpoint", tenant_id, timeline_id
-        )
+    failpoint = "before-checkpoint-new-timeline"
+    pattern = failpoint
+    if exit_or_return == "exit":
+        # in reality a read error happens, but there are automatic retries which now fail because pageserver is dead
+        pattern = "Connection aborted."
 
-    # Restart the page server
+    pageserver_http.configure_failpoints((failpoint, exit_or_return))
+    with pytest.raises(Exception, match=pattern):
+        _ = pageserver_http.timeline_create(env.pg_version, tenant_id, timeline_id)
+
+    # Restart the page server (with the failpoint disabled)
     env.pageserver.restart(immediate=True)
 
     # Creating the timeline didn't finish. The other timelines on tenant should still be present and work normally.
@@ -186,11 +195,9 @@ def test_timeline_init_break_before_checkpoint_recreate(neon_env_builder: NeonEn
         timeline_dirs == initial_timeline_dirs
     ), "pageserver should clean its temp timeline files on timeline creation failure"
 
-    # Disable the failpoint again
-    pageserver_http.configure_failpoints(("before-checkpoint-new-timeline", "off"))
     # creating the branch should have worked now
-    new_timeline_id = env.neon_cli.create_timeline(
-        "test_timeline_init_break_before_checkpoint", tenant_id, timeline_id
+    new_timeline_id = TimelineId(
+        pageserver_http.timeline_create(env.pg_version, tenant_id, timeline_id)["timeline_id"]
     )
 
     assert timeline_id == new_timeline_id
diff --git a/test_runner/regress/test_compatibility.py b/test_runner/regress/test_compatibility.py
index 98f6677c00a5..f3c6af442739 100644
--- a/test_runner/regress/test_compatibility.py
+++ b/test_runner/regress/test_compatibility.py
@@ -411,7 +411,6 @@ def check_neon_works(
     config.initial_tenant = snapshot_config["default_tenant_id"]
     config.pg_distrib_dir = pg_distrib_dir
     config.remote_storage = None
-    config.ext_remote_storage = None
     config.sk_remote_storage = None
 
     # Use the "target" binaries to launch the storage nodes
@@ -435,8 +434,11 @@ def check_neon_works(
 
     pg_port = port_distributor.get_port()
     http_port = port_distributor.get_port()
-    cli_current.endpoint_start("main", pg_port=pg_port, http_port=http_port)
-    request.addfinalizer(lambda: cli_current.endpoint_stop("main"))
+    cli_current.endpoint_create(
+        branch_name="main", pg_port=pg_port, http_port=http_port, endpoint_id="ep-main"
+    )
+    cli_current.endpoint_start("ep-main")
+    request.addfinalizer(lambda: cli_current.endpoint_stop("ep-main"))
 
     connstr = f"host=127.0.0.1 port={pg_port} user=cloud_admin dbname=postgres"
     pg_bin.run_capture(
diff --git a/test_runner/regress/test_download_extensions.py b/test_runner/regress/test_download_extensions.py
index 775ad102416a..27eb05ac0912 100644
--- a/test_runner/regress/test_download_extensions.py
+++ b/test_runner/regress/test_download_extensions.py
@@ -1,316 +1,137 @@
 import os
 import shutil
-import threading
 from contextlib import closing
 from pathlib import Path
+from typing import Any, Dict
 
 import pytest
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import (
     NeonEnvBuilder,
 )
-from fixtures.pg_version import PgVersion, skip_on_postgres
-from fixtures.remote_storage import (
-    RemoteStorageKind,
-    S3Storage,
-    available_s3_storages,
-)
-
-
-# Cleaning up downloaded files is important for local tests
-# or else one test could reuse the files from another test or another test run
-def cleanup(pg_version):
-    PGDIR = Path(f"pg_install/v{pg_version}")
-
-    LIB_DIR = PGDIR / Path("lib/postgresql")
-    cleanup_lib_globs = ["anon*", "postgis*", "pg_buffercache*"]
-    cleanup_lib_glob_paths = [LIB_DIR.glob(x) for x in cleanup_lib_globs]
-
-    SHARE_DIR = PGDIR / Path("share/postgresql/extension")
-    cleanup_ext_globs = [
-        "anon*",
-        "address_standardizer*",
-        "postgis*",
-        "pageinspect*",
-        "pg_buffercache*",
-        "pgrouting*",
-    ]
-    cleanup_ext_glob_paths = [SHARE_DIR.glob(x) for x in cleanup_ext_globs]
-
-    all_glob_paths = cleanup_lib_glob_paths + cleanup_ext_glob_paths
-    all_cleanup_files = []
-    for file_glob in all_glob_paths:
-        for file in file_glob:
-            all_cleanup_files.append(file)
-
-    for file in all_cleanup_files:
-        try:
-            os.remove(file)
-            log.info(f"removed file {file}")
-        except Exception as err:
-            log.info(
-                f"skipping remove of file {file} because it doesn't exist.\
-                      this may be expected or unexpected depending on the test {err}"
-            )
-
-    cleanup_folders = [SHARE_DIR / Path("anon"), PGDIR / Path("download_extensions")]
-    for folder in cleanup_folders:
-        try:
-            shutil.rmtree(folder)
-            log.info(f"removed folder {folder}")
-        except Exception as err:
-            log.info(
-                f"skipping remove of folder {folder} because it doesn't exist.\
-                      this may be expected or unexpected depending on the test {err}"
-            )
+from fixtures.pg_version import PgVersion
+from pytest_httpserver import HTTPServer
+from werkzeug.wrappers.request import Request
+from werkzeug.wrappers.response import Response
 
 
-def upload_files(env):
-    log.info("Uploading test files to mock bucket")
-    os.chdir("test_runner/regress/data/extension_test")
-    for path in os.walk("."):
-        prefix, _, files = path
-        for file in files:
-            # the [2:] is to remove the leading "./"
-            full_path = os.path.join(prefix, file)[2:]
-
-            with open(full_path, "rb") as f:
-                log.info(f"UPLOAD {full_path} to ext/{full_path}")
-                assert isinstance(env.pageserver_remote_storage, S3Storage)
-                env.pageserver_remote_storage.client.upload_fileobj(
-                    f,
-                    env.ext_remote_storage.bucket_name,
-                    f"ext/{full_path}",
-                )
-    os.chdir("../../../..")
-
-
-# Test downloading remote extension.
-@skip_on_postgres(PgVersion.V16, reason="TODO: PG16 extension building")
-@pytest.mark.parametrize("remote_storage_kind", available_s3_storages())
-@pytest.mark.skip(reason="https://github.com/neondatabase/neon/issues/4949")
-def test_remote_extensions(
+# use neon_env_builder_local fixture to override the default neon_env_builder fixture
+# and use a test-specific pg_install instead of shared one
+@pytest.fixture(scope="function")
+def neon_env_builder_local(
     neon_env_builder: NeonEnvBuilder,
-    remote_storage_kind: RemoteStorageKind,
+    test_output_dir: Path,
+    pg_distrib_dir: Path,
     pg_version: PgVersion,
-):
-    neon_env_builder.enable_extensions_remote_storage(remote_storage_kind)
-    env = neon_env_builder.init_start()
-    tenant_id, _ = env.neon_cli.create_tenant()
-    env.neon_cli.create_timeline("test_remote_extensions", tenant_id=tenant_id)
-
-    assert env.ext_remote_storage is not None  # satisfy mypy
-
-    # For MOCK_S3 we upload test files.
-    # For REAL_S3 we use the files already in the bucket
-    if remote_storage_kind == RemoteStorageKind.MOCK_S3:
-        upload_files(env)
-
-    # Start a compute node and check that it can download the extensions
-    # and use them to CREATE EXTENSION and LOAD
-    endpoint = env.endpoints.create_start(
-        "test_remote_extensions",
-        tenant_id=tenant_id,
-        remote_ext_config=env.ext_remote_storage.to_string(),
-        # config_lines=["log_min_messages=debug3"],
+) -> NeonEnvBuilder:
+    test_local_pginstall = test_output_dir / "pg_install"
+    log.info(f"copy {pg_distrib_dir} to {test_local_pginstall}")
+    shutil.copytree(
+        pg_distrib_dir / pg_version.v_prefixed, test_local_pginstall / pg_version.v_prefixed
     )
-    try:
-        with closing(endpoint.connect()) as conn:
-            with conn.cursor() as cur:
-                # Check that appropriate control files were downloaded
-                cur.execute("SELECT * FROM pg_available_extensions")
-                all_extensions = [x[0] for x in cur.fetchall()]
-                log.info(all_extensions)
-                assert "anon" in all_extensions
 
-                # postgis is on real s3 but not mock s3.
-                # it's kind of a big file, would rather not upload to github
-                if remote_storage_kind == RemoteStorageKind.REAL_S3:
-                    assert "postgis" in all_extensions
-                    # this may fail locally if dependency is missing
-                    # we don't really care about the error,
-                    # we just want to make sure it downloaded
-                    try:
-                        cur.execute("CREATE EXTENSION postgis")
-                    except Exception as err:
-                        log.info(f"(expected) error creating postgis extension: {err}")
-                        # we do not check the error, so this is basically a NO-OP
-                        # however checking the log you can make sure that it worked
-                        # and also get valuable information about how long loading the extension took
+    neon_env_builder.pg_distrib_dir = test_local_pginstall
+    log.info(f"local neon_env_builder.pg_distrib_dir: {neon_env_builder.pg_distrib_dir}")
 
-                # this is expected to fail on my computer because I don't have the pgcrypto extension
-                try:
-                    cur.execute("CREATE EXTENSION anon")
-                except Exception as err:
-                    log.info("error creating anon extension")
-                    assert "pgcrypto" in str(err), "unexpected error creating anon extension"
-    finally:
-        cleanup(pg_version)
+    return neon_env_builder
 
 
-# Test downloading remote library.
-@skip_on_postgres(PgVersion.V16, reason="TODO: PG16 extension building")
-@pytest.mark.parametrize("remote_storage_kind", available_s3_storages())
-@pytest.mark.skip(reason="https://github.com/neondatabase/neon/issues/4949")
-def test_remote_library(
-    neon_env_builder: NeonEnvBuilder,
-    remote_storage_kind: RemoteStorageKind,
-    pg_version: PgVersion,
+def test_remote_extensions(
+    httpserver: HTTPServer,
+    neon_env_builder_local: NeonEnvBuilder,
+    httpserver_listen_address,
+    pg_version,
 ):
-    neon_env_builder.enable_extensions_remote_storage(remote_storage_kind)
-    env = neon_env_builder.init_start()
-    tenant_id, _ = env.neon_cli.create_tenant()
-    env.neon_cli.create_timeline("test_remote_library", tenant_id=tenant_id)
-
-    assert env.ext_remote_storage is not None  # satisfy mypy
-
-    # For MOCK_S3 we upload test files.
-    # For REAL_S3 we use the files already in the bucket
-    if remote_storage_kind == RemoteStorageKind.MOCK_S3:
-        upload_files(env)
+    if pg_version == PgVersion.V16:
+        pytest.skip("TODO: PG16 extension building")
+
+    # setup mock http server
+    # that expects request for anon.tar.zst
+    # and returns the requested file
+    (host, port) = httpserver_listen_address
+    extensions_endpoint = f"http://{host}:{port}/pg-ext-s3-gateway"
+
+    build_tag = os.environ.get("BUILD_TAG", "latest")
+    archive_path = f"{build_tag}/v{pg_version}/extensions/anon.tar.zst"
+
+    def endpoint_handler_build_tag(request: Request) -> Response:
+        log.info(f"request: {request}")
+
+        file_name = "anon.tar.zst"
+        file_path = f"test_runner/regress/data/extension_test/5670669815/v{pg_version}/extensions/anon.tar.zst"
+        file_size = os.path.getsize(file_path)
+        fh = open(file_path, "rb")
+
+        return Response(
+            fh,
+            mimetype="application/octet-stream",
+            headers=[
+                ("Content-Length", str(file_size)),
+                ("Content-Disposition", 'attachment; filename="%s"' % file_name),
+            ],
+            direct_passthrough=True,
+        )
+
+    httpserver.expect_request(
+        f"/pg-ext-s3-gateway/{archive_path}", method="GET"
+    ).respond_with_handler(endpoint_handler_build_tag)
+
+    # Start a compute node with remote_extension spec
+    # and check that it can download the extensions and use them to CREATE EXTENSION.
+    env = neon_env_builder_local.init_start()
+    env.neon_cli.create_branch("test_remote_extensions")
+    endpoint = env.endpoints.create(
+        "test_remote_extensions",
+        config_lines=["log_min_messages=debug3"],
+    )
 
-    # and use them to run LOAD library
-    endpoint = env.endpoints.create_start(
-        "test_remote_library",
-        tenant_id=tenant_id,
-        remote_ext_config=env.ext_remote_storage.to_string(),
-        # config_lines=["log_min_messages=debug3"],
+    # mock remote_extensions spec
+    spec: Dict[str, Any] = {
+        "library_index": {
+            "anon": "anon",
+        },
+        "extension_data": {
+            "anon": {
+                "archive_path": "",
+                "control_data": {
+                    "anon.control": "# PostgreSQL Anonymizer (anon) extension\ncomment = 'Data anonymization tools'\ndefault_version = '1.1.0'\ndirectory='extension/anon'\nrelocatable = false\nrequires = 'pgcrypto'\nsuperuser = false\nmodule_pathname = '$libdir/anon'\ntrusted = true\n"
+                },
+            },
+        },
+    }
+    spec["extension_data"]["anon"]["archive_path"] = archive_path
+
+    endpoint.create_remote_extension_spec(spec)
+
+    endpoint.start(
+        remote_ext_config=extensions_endpoint,
     )
+
+    # this is expected to fail if there's no pgcrypto extension, that's ok
+    # we just want to check that the extension was downloaded
     try:
         with closing(endpoint.connect()) as conn:
             with conn.cursor() as cur:
-                # try to load library
-                try:
-                    cur.execute("LOAD 'anon'")
-                except Exception as err:
-                    log.info(f"error loading anon library: {err}")
-                    raise AssertionError("unexpected error loading anon library") from err
+                # Check that appropriate files were downloaded
+                cur.execute("CREATE EXTENSION anon")
+                res = [x[0] for x in cur.fetchall()]
+                log.info(res)
+    except Exception as err:
+        assert "pgcrypto" in str(err), f"unexpected error creating anon extension {err}"
 
-                # test library which name is different from extension name
-                # this may fail locally if dependency is missing
-                # however, it does successfully download the postgis archive
-                if remote_storage_kind == RemoteStorageKind.REAL_S3:
-                    try:
-                        cur.execute("LOAD 'postgis_topology-3'")
-                    except Exception as err:
-                        log.info("error loading postgis_topology-3")
-                        assert "No such file or directory" in str(
-                            err
-                        ), "unexpected error loading postgis_topology-3"
-    finally:
-        cleanup(pg_version)
+    httpserver.check()
 
 
-# Here we test a complex extension
-# which has multiple extensions in one archive
+# TODO
+# 1. Test downloading remote library.
+#
+# 2. Test a complex extension, which has multiple extensions in one archive
 # using postgis as an example
-# @pytest.mark.skipif(
-#    RemoteStorageKind.REAL_S3 not in available_s3_storages(),
-#    reason="skipping test because real s3 not enabled",
-# )
-@skip_on_postgres(PgVersion.V16, reason="TODO: PG16 extension building")
-@pytest.mark.skip(reason="https://github.com/neondatabase/neon/issues/4949")
-def test_multiple_extensions_one_archive(
-    neon_env_builder: NeonEnvBuilder,
-    pg_version: PgVersion,
-):
-    neon_env_builder.enable_extensions_remote_storage(RemoteStorageKind.REAL_S3)
-    env = neon_env_builder.init_start()
-    tenant_id, _ = env.neon_cli.create_tenant()
-    env.neon_cli.create_timeline("test_multiple_extensions_one_archive", tenant_id=tenant_id)
-
-    assert env.ext_remote_storage is not None  # satisfy mypy
-
-    endpoint = env.endpoints.create_start(
-        "test_multiple_extensions_one_archive",
-        tenant_id=tenant_id,
-        remote_ext_config=env.ext_remote_storage.to_string(),
-    )
-    with closing(endpoint.connect()) as conn:
-        with conn.cursor() as cur:
-            cur.execute("CREATE EXTENSION address_standardizer;")
-            cur.execute("CREATE EXTENSION address_standardizer_data_us;")
-            # execute query to ensure that it works
-            cur.execute(
-                "SELECT house_num, name, suftype, city, country, state, unit \
-                        FROM standardize_address('us_lex', 'us_gaz', 'us_rules', \
-                        'One Rust Place, Boston, MA 02109');"
-            )
-            res = cur.fetchall()
-            log.info(res)
-            assert len(res) > 0
-
-    cleanup(pg_version)
-
-
-# Test that extension is downloaded after endpoint restart,
-# when the library is used in the query.
 #
+# 3.Test that extension is downloaded after endpoint restart,
+# when the library is used in the query.
 # Run the test with mutliple simultaneous connections to an endpoint.
 # to ensure that the extension is downloaded only once.
 #
-@pytest.mark.skip(reason="https://github.com/neondatabase/neon/issues/4949")
-def test_extension_download_after_restart(
-    neon_env_builder: NeonEnvBuilder,
-    pg_version: PgVersion,
-):
-    # TODO: PG15 + PG16 extension building
-    if "v14" not in pg_version:  # test set only has extension built for v14
-        return None
-
-    neon_env_builder.enable_extensions_remote_storage(RemoteStorageKind.MOCK_S3)
-    env = neon_env_builder.init_start()
-    tenant_id, _ = env.neon_cli.create_tenant()
-    env.neon_cli.create_timeline("test_extension_download_after_restart", tenant_id=tenant_id)
-
-    assert env.ext_remote_storage is not None  # satisfy mypy
-
-    # For MOCK_S3 we upload test files.
-    upload_files(env)
-
-    endpoint = env.endpoints.create_start(
-        "test_extension_download_after_restart",
-        tenant_id=tenant_id,
-        remote_ext_config=env.ext_remote_storage.to_string(),
-        config_lines=["log_min_messages=debug3"],
-    )
-    with closing(endpoint.connect()) as conn:
-        with conn.cursor() as cur:
-            cur.execute("CREATE extension pg_buffercache;")
-            cur.execute("SELECT * from pg_buffercache;")
-            res = cur.fetchall()
-            assert len(res) > 0
-            log.info(res)
-
-    # shutdown compute node
-    endpoint.stop()
-    # remove extension files locally
-    cleanup(pg_version)
-
-    # spin up compute node again (there are no extension files available, because compute is stateless)
-    endpoint = env.endpoints.create_start(
-        "test_extension_download_after_restart",
-        tenant_id=tenant_id,
-        remote_ext_config=env.ext_remote_storage.to_string(),
-        config_lines=["log_min_messages=debug3"],
-    )
-
-    # connect to compute node and run the query
-    # that will trigger the download of the extension
-    def run_query(endpoint, thread_id: int):
-        log.info("thread_id {%d} starting", thread_id)
-        with closing(endpoint.connect()) as conn:
-            with conn.cursor() as cur:
-                cur.execute("SELECT * from pg_buffercache;")
-                res = cur.fetchall()
-                assert len(res) > 0
-                log.info("thread_id {%d}, res = %s", thread_id, res)
-
-    threads = [threading.Thread(target=run_query, args=(endpoint, i)) for i in range(2)]
-
-    for thread in threads:
-        thread.start()
-    for thread in threads:
-        thread.join()
-
-    cleanup(pg_version)
+# 4. Test that private extensions are only downloaded when they are present in the spec.
+#
diff --git a/test_runner/regress/test_fullbackup.py b/test_runner/regress/test_fullbackup.py
index 214f1f33a836..a456c0686267 100644
--- a/test_runner/regress/test_fullbackup.py
+++ b/test_runner/regress/test_fullbackup.py
@@ -20,6 +20,7 @@ def test_fullbackup(
     pg_bin: PgBin,
     port_distributor: PortDistributor,
     pg_distrib_dir: Path,
+    test_output_dir: Path,
 ):
     env = neon_env_builder.init_start()
 
@@ -49,10 +50,12 @@ def test_fullbackup(
     restored_dir_path = env.repo_dir / "restored_datadir"
     os.mkdir(restored_dir_path, 0o750)
     query = f"fullbackup {env.initial_tenant} {timeline} {lsn}"
-    cmd = ["psql", "--no-psqlrc", env.pageserver.connstr(), "-c", query]
-    result_basepath = pg_bin.run_capture(cmd, env=psql_env)
-    tar_output_file = result_basepath + ".stdout"
-    subprocess_capture(env.repo_dir, ["tar", "-xf", tar_output_file, "-C", str(restored_dir_path)])
+    tar_output_file = test_output_dir / "fullbackup.tar"
+    cmd = ["psql", "--no-psqlrc", env.pageserver.connstr(), "-c", query, "-o", str(tar_output_file)]
+    pg_bin.run_capture(cmd, env=psql_env)
+    subprocess_capture(
+        env.repo_dir, ["tar", "-xf", str(tar_output_file), "-C", str(restored_dir_path)]
+    )
 
     # HACK
     # fullbackup returns neon specific pg_control and first WAL segment
diff --git a/test_runner/regress/test_import.py b/test_runner/regress/test_import.py
index d357bd0ee451..8da5f1eec2ea 100644
--- a/test_runner/regress/test_import.py
+++ b/test_runner/regress/test_import.py
@@ -163,7 +163,9 @@ def import_tar(base, wal):
     assert endpoint.safe_psql("select count(*) from t") == [(300000,)]
 
 
-def test_import_from_pageserver_small(pg_bin: PgBin, neon_env_builder: NeonEnvBuilder):
+def test_import_from_pageserver_small(
+    pg_bin: PgBin, neon_env_builder: NeonEnvBuilder, test_output_dir: Path
+):
     neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
     env = neon_env_builder.init_start()
 
@@ -177,7 +179,7 @@ def test_import_from_pageserver_small(pg_bin: PgBin, neon_env_builder: NeonEnvBu
 
     num_rows = 3000
     lsn = _generate_data(num_rows, endpoint)
-    _import(num_rows, lsn, env, pg_bin, timeline, env.pg_distrib_dir)
+    _import(num_rows, lsn, env, pg_bin, timeline, env.pg_distrib_dir, test_output_dir)
 
 
 @pytest.mark.timeout(1800)
@@ -185,7 +187,9 @@ def test_import_from_pageserver_small(pg_bin: PgBin, neon_env_builder: NeonEnvBu
 # the test back after finding the failure cause.
 # @pytest.mark.skipif(os.environ.get('BUILD_TYPE') == "debug", reason="only run with release build")
 @pytest.mark.skip("See https://github.com/neondatabase/neon/issues/2255")
-def test_import_from_pageserver_multisegment(pg_bin: PgBin, neon_env_builder: NeonEnvBuilder):
+def test_import_from_pageserver_multisegment(
+    pg_bin: PgBin, neon_env_builder: NeonEnvBuilder, test_output_dir: Path
+):
     neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
     env = neon_env_builder.init_start()
 
@@ -205,7 +209,9 @@ def test_import_from_pageserver_multisegment(pg_bin: PgBin, neon_env_builder: Ne
     log.info(f"timeline logical size = {logical_size / (1024 ** 2)}MB")
     assert logical_size > 1024**3  # = 1GB
 
-    tar_output_file = _import(num_rows, lsn, env, pg_bin, timeline, env.pg_distrib_dir)
+    tar_output_file = _import(
+        num_rows, lsn, env, pg_bin, timeline, env.pg_distrib_dir, test_output_dir
+    )
 
     # Check if the backup data contains multiple segment files
     cnt_seg_files = 0
@@ -246,7 +252,8 @@ def _import(
     pg_bin: PgBin,
     timeline: TimelineId,
     pg_distrib_dir: Path,
-) -> str:
+    test_output_dir: Path,
+) -> Path:
     """Test importing backup data to the pageserver.
 
     Args:
@@ -263,9 +270,9 @@ def _import(
 
     # Get a fullbackup from pageserver
     query = f"fullbackup { env.initial_tenant} {timeline} {lsn}"
-    cmd = ["psql", "--no-psqlrc", env.pageserver.connstr(), "-c", query]
-    result_basepath = pg_bin.run_capture(cmd, env=psql_env)
-    tar_output_file = result_basepath + ".stdout"
+    tar_output_file = test_output_dir / "fullbackup.tar"
+    cmd = ["psql", "--no-psqlrc", env.pageserver.connstr(), "-c", query, "-o", str(tar_output_file)]
+    pg_bin.run_capture(cmd, env=psql_env)
 
     # Stop the first pageserver instance, erase all its data
     env.endpoints.stop_all()
@@ -299,7 +306,7 @@ def _import(
             "--base-lsn",
             str(lsn),
             "--base-tarfile",
-            os.path.join(tar_output_file),
+            str(tar_output_file),
             "--pg-version",
             env.pg_version,
         ]
@@ -315,9 +322,17 @@ def _import(
 
     # Take another fullbackup
     query = f"fullbackup { tenant} {timeline} {lsn}"
-    cmd = ["psql", "--no-psqlrc", env.pageserver.connstr(), "-c", query]
-    result_basepath = pg_bin.run_capture(cmd, env=psql_env)
-    new_tar_output_file = result_basepath + ".stdout"
+    new_tar_output_file = test_output_dir / "fullbackup-new.tar"
+    cmd = [
+        "psql",
+        "--no-psqlrc",
+        env.pageserver.connstr(),
+        "-c",
+        query,
+        "-o",
+        str(new_tar_output_file),
+    ]
+    pg_bin.run_capture(cmd, env=psql_env)
 
     # Check it's the same as the first fullbackup
     # TODO pageserver should be checking checksum
diff --git a/test_runner/regress/test_neon_local_cli.py b/test_runner/regress/test_neon_local_cli.py
index becdd9ff80ca..46b72fbca500 100644
--- a/test_runner/regress/test_neon_local_cli.py
+++ b/test_runner/regress/test_neon_local_cli.py
@@ -1,3 +1,4 @@
+import pytest
 from fixtures.neon_fixtures import NeonEnvBuilder
 from fixtures.port_distributor import PortDistributor
 
@@ -11,19 +12,50 @@ def test_neon_cli_basics(neon_env_builder: NeonEnvBuilder, port_distributor: Por
         env.neon_cli.start()
         env.neon_cli.create_tenant(tenant_id=env.initial_tenant, set_default=True)
 
+        main_branch_name = "main"
         pg_port = port_distributor.get_port()
         http_port = port_distributor.get_port()
-        env.neon_cli.endpoint_start(
-            endpoint_id="ep-basic-main", pg_port=pg_port, http_port=http_port
+        env.neon_cli.endpoint_create(
+            main_branch_name, pg_port, http_port, endpoint_id="ep-basic-main"
         )
+        env.neon_cli.endpoint_start("ep-basic-main")
 
         branch_name = "migration-check"
-
-        env.neon_cli.create_branch(new_branch_name=branch_name)
+        env.neon_cli.create_branch(branch_name)
         pg_port = port_distributor.get_port()
         http_port = port_distributor.get_port()
-        env.neon_cli.endpoint_start(
-            f"ep-{branch_name}", pg_port, http_port, branch_name=branch_name
+        env.neon_cli.endpoint_create(
+            branch_name, pg_port, http_port, endpoint_id=f"ep-{branch_name}"
         )
+        env.neon_cli.endpoint_start(f"ep-{branch_name}")
     finally:
         env.neon_cli.stop()
+
+
+def test_neon_two_primary_endpoints_fail(
+    neon_env_builder: NeonEnvBuilder, port_distributor: PortDistributor
+):
+    """
+    Two primary endpoints with same tenant and timeline will not run together
+    """
+    env = neon_env_builder.init_start()
+    branch_name = "main"
+
+    pg_port = port_distributor.get_port()
+    http_port = port_distributor.get_port()
+    env.neon_cli.endpoint_create(branch_name, pg_port, http_port, "ep1")
+
+    pg_port = port_distributor.get_port()
+    http_port = port_distributor.get_port()
+    # ep1 is not running so create will succeed
+    env.neon_cli.endpoint_create(branch_name, pg_port, http_port, "ep2")
+
+    env.neon_cli.endpoint_start("ep1")
+
+    expected_message = f'attempting to create a duplicate primary endpoint on tenant {env.initial_tenant}, timeline {env.initial_timeline}: endpoint "ep1" exists already. please don\'t do this, it is not supported.'
+    with pytest.raises(RuntimeError):
+        assert expected_message in env.neon_cli.endpoint_start("ep2").stderr
+
+    env.neon_cli.endpoint_stop("ep1")
+    # ep1 is stopped so create ep2 will succeed
+    env.neon_cli.endpoint_start("ep2")
diff --git a/test_runner/regress/test_pageserver_generations.py b/test_runner/regress/test_pageserver_generations.py
index c3f4ad476f6a..66cc286aba38 100644
--- a/test_runner/regress/test_pageserver_generations.py
+++ b/test_runner/regress/test_pageserver_generations.py
@@ -282,7 +282,7 @@ def test_deferred_deletion(neon_env_builder: NeonEnvBuilder):
 
     # Now advance the generation in the control plane: subsequent validations
     # from the running pageserver will fail.  No more deletions should happen.
-    env.attachment_service.attach_hook(env.initial_tenant, some_other_pageserver)
+    env.attachment_service.attach_hook_issue(env.initial_tenant, some_other_pageserver)
     generate_uploads_and_deletions(env, init=False)
 
     assert_deletion_queue(ps_http, lambda n: n > 0)
@@ -397,7 +397,7 @@ def assert_header_written():
     if keep_attachment == KeepAttachment.LOSE:
         some_other_pageserver = 101010
         assert env.attachment_service is not None
-        env.attachment_service.attach_hook(env.initial_tenant, some_other_pageserver)
+        env.attachment_service.attach_hook_issue(env.initial_tenant, some_other_pageserver)
 
     env.pageserver.start()
 
diff --git a/test_runner/regress/test_proxy_allowed_ips.py b/test_runner/regress/test_proxy_allowed_ips.py
new file mode 100644
index 000000000000..f53357981162
--- /dev/null
+++ b/test_runner/regress/test_proxy_allowed_ips.py
@@ -0,0 +1,74 @@
+import psycopg2
+import pytest
+from fixtures.neon_fixtures import (
+    NeonProxy,
+    VanillaPostgres,
+)
+
+TABLE_NAME = "neon_control_plane.endpoints"
+
+
+# Proxy uses the same logic for psql and websockets.
+@pytest.mark.asyncio
+async def test_proxy_psql_allowed_ips(static_proxy: NeonProxy, vanilla_pg: VanillaPostgres):
+    # Shouldn't be able to connect to this project
+    vanilla_pg.safe_psql(
+        f"INSERT INTO {TABLE_NAME} (endpoint_id, allowed_ips) VALUES ('private-project', '8.8.8.8')"
+    )
+    # Should be able to connect to this project
+    vanilla_pg.safe_psql(
+        f"INSERT INTO {TABLE_NAME} (endpoint_id, allowed_ips) VALUES ('generic-project', '::1,127.0.0.1')"
+    )
+
+    def check_cannot_connect(**kwargs):
+        with pytest.raises(psycopg2.Error) as exprinfo:
+            static_proxy.safe_psql(**kwargs)
+        text = str(exprinfo.value).strip()
+        assert "This IP address is not allowed to connect" in text
+
+    # no SNI, deprecated `options=project` syntax (before we had several endpoint in project)
+    check_cannot_connect(query="select 1", sslsni=0, options="project=private-project")
+
+    # no SNI, new `options=endpoint` syntax
+    check_cannot_connect(query="select 1", sslsni=0, options="endpoint=private-project")
+
+    # with SNI
+    check_cannot_connect(query="select 1", host="private-project.localtest.me")
+
+    # no SNI, deprecated `options=project` syntax (before we had several endpoint in project)
+    out = static_proxy.safe_psql(query="select 1", sslsni=0, options="project=generic-project")
+    assert out[0][0] == 1
+
+    # no SNI, new `options=endpoint` syntax
+    out = static_proxy.safe_psql(query="select 1", sslsni=0, options="endpoint=generic-project")
+    assert out[0][0] == 1
+
+    # with SNI
+    out = static_proxy.safe_psql(query="select 1", host="generic-project.localtest.me")
+    assert out[0][0] == 1
+
+
+@pytest.mark.asyncio
+async def test_proxy_http_allowed_ips(static_proxy: NeonProxy, vanilla_pg: VanillaPostgres):
+    static_proxy.safe_psql("create user http_auth with password 'http' superuser")
+
+    # Shouldn't be able to connect to this project
+    vanilla_pg.safe_psql(
+        f"INSERT INTO {TABLE_NAME} (endpoint_id, allowed_ips) VALUES ('proxy', '8.8.8.8')"
+    )
+
+    def query(status: int, query: str, *args):
+        static_proxy.http_query(
+            query,
+            args,
+            user="http_auth",
+            password="http",
+            expected_code=status,
+        )
+
+    query(400, "select 1;")  # ip address is not allowed
+    # Should be able to connect to this project
+    vanilla_pg.safe_psql(
+        f"UPDATE {TABLE_NAME} SET allowed_ips = '8.8.8.8,127.0.0.1' WHERE endpoint_id = 'proxy'"
+    )
+    query(200, "select 1;")  # should work now
diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py
index 31bc97703e5f..9c2bb2db115e 100644
--- a/test_runner/regress/test_remote_storage.py
+++ b/test_runner/regress/test_remote_storage.py
@@ -588,6 +588,7 @@ def assert_compacted_and_uploads_queued():
     env.pageserver.allowed_errors.extend(
         [
             ".* ERROR .*Error processing HTTP request: InternalServerError\\(The timeline or pageserver is shutting down",
+            ".* ERROR .*queue is in state Stopped.*",
             ".* ERROR .*[Cc]ould not flush frozen layer.*",
         ]
     )
@@ -602,7 +603,12 @@ def assert_compacted_and_uploads_queued():
     assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
     remote_timeline_path = env.pageserver_remote_storage.timeline_path(tenant_id, timeline_id)
 
-    assert not list(remote_timeline_path.iterdir())
+    filtered = [
+        path
+        for path in remote_timeline_path.iterdir()
+        if not (path.name.endswith("initdb.tar.zst"))
+    ]
+    assert len(filtered) == 0
 
     # timeline deletion should kill ongoing uploads, so, the metric will be gone
     assert get_queued_count(file_kind="index", op_kind="upload") is None
@@ -763,9 +769,7 @@ def test_compaction_waits_for_upload(
     neon_env_builder: NeonEnvBuilder,
 ):
     """
-    Compaction waits for outstanding uploads to complete, so that it avoids deleting layers
-    files that have not yet been uploaded.  This test forces a race between upload and
-    compaction.
+    This test forces a race between upload and compaction.
     """
     neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
 
@@ -784,6 +788,16 @@ def test_compaction_waits_for_upload(
     timeline_id = env.initial_timeline
 
     client = env.pageserver.http_client()
+    layers_at_creation = client.layer_map_info(tenant_id, timeline_id)
+    deltas_at_creation = len(layers_at_creation.delta_layers())
+    assert (
+        deltas_at_creation == 1
+    ), "are you fixing #5863? make sure we end up with 2 deltas at the end of endpoint lifecycle"
+
+    # Make new layer uploads get stuck.
+    # Note that timeline creation waits for the initial layers to reach remote storage.
+    # So at this point, the `layers_at_creation` are in remote storage.
+    client.configure_failpoints(("before-upload-layer-pausable", "pause"))
 
     with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
         # Build two tables with some data inside
@@ -791,85 +805,71 @@ def test_compaction_waits_for_upload(
         wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
 
         client.timeline_checkpoint(tenant_id, timeline_id)
+        deltas_at_first = len(client.layer_map_info(tenant_id, timeline_id).delta_layers())
+        assert (
+            deltas_at_first == 2
+        ), "are you fixing #5863? just add one more checkpoint after 'CREATE TABLE bar ...' statement."
 
         endpoint.safe_psql("CREATE TABLE bar AS SELECT x FROM generate_series(1, 10000) g(x)")
+        endpoint.safe_psql("UPDATE foo SET x = 0 WHERE x = 1")
         wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
 
-        # Now make the flushing hang and update one small piece of data
-        client.configure_failpoints(("before-upload-layer-pausable", "pause"))
+    layers_before_last_checkpoint = client.layer_map_info(tenant_id, timeline_id).historic_by_name()
+    upload_stuck_layers = layers_before_last_checkpoint - layers_at_creation.historic_by_name()
 
-        endpoint.safe_psql("UPDATE foo SET x = 0 WHERE x = 1")
+    assert len(upload_stuck_layers) > 0
 
-        wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
+    for name in upload_stuck_layers:
+        path = env.pageserver.timeline_dir(tenant_id, timeline_id) / name
+        assert path.exists(), "while uploads are stuck the layers should be present on disk"
 
-    checkpoint_result: queue.Queue[Optional[PageserverApiException]] = queue.Queue()
-    compact_result: queue.Queue[Optional[PageserverApiException]] = queue.Queue()
-    compact_barrier = threading.Barrier(2)
+    # now this will do the L0 => L1 compaction and want to remove
+    # upload_stuck_layers and the original initdb L0
+    client.timeline_checkpoint(tenant_id, timeline_id)
 
-    def checkpoint_in_background():
-        try:
-            log.info("Checkpoint starting")
-            client.timeline_checkpoint(tenant_id, timeline_id)
-            log.info("Checkpoint complete")
-            checkpoint_result.put(None)
-        except PageserverApiException as e:
-            log.info("Checkpoint errored: {e}")
-            checkpoint_result.put(e)
+    # as uploads are paused, the the upload_stuck_layers should still be with us
+    for name in upload_stuck_layers:
+        path = env.pageserver.timeline_dir(tenant_id, timeline_id) / name
+        assert path.exists(), "uploads are stuck still over compaction"
 
-    def compact_in_background():
-        compact_barrier.wait()
-        try:
-            log.info("Compaction starting")
-            client.timeline_compact(tenant_id, timeline_id)
-            log.info("Compaction complete")
-            compact_result.put(None)
-        except PageserverApiException as e:
-            log.info("Compaction errored: {e}")
-            compact_result.put(e)
-
-    checkpoint_thread = threading.Thread(target=checkpoint_in_background)
-    checkpoint_thread.start()
+    compacted_layers = client.layer_map_info(tenant_id, timeline_id).historic_by_name()
+    overlap = compacted_layers.intersection(upload_stuck_layers)
+    assert len(overlap) == 0, "none of the L0's should remain after L0 => L1 compaction"
+    assert (
+        len(compacted_layers) == 1
+    ), "there should be one L1 after L0 => L1 compaction (without #5863 being fixed)"
 
-    compact_thread = threading.Thread(target=compact_in_background)
-    compact_thread.start()
+    def layer_deletes_completed():
+        m = client.get_metric_value("pageserver_layer_gcs_count_total", {"state": "completed"})
+        if m is None:
+            return 0
+        return int(m)
 
-    try:
-        # Start the checkpoint, see that it blocks
-        log.info("Waiting to see checkpoint hang...")
-        time.sleep(5)
-        assert checkpoint_result.empty()
-
-        # Start the compaction, see that it finds work to do but blocks
-        compact_barrier.wait()
-        log.info("Waiting to see compaction hang...")
-        time.sleep(5)
-        assert compact_result.empty()
-
-        # This is logged once compaction is started, but before we wait for operations to complete
-        assert env.pageserver.log_contains("compact_level0_phase1 stats available.")
-
-        # Once we unblock uploads the compaction should complete successfully
-        log.info("Disabling failpoint")
-        client.configure_failpoints(("before-upload-layer-pausable", "off"))
-        log.info("Awaiting compaction result")
-        assert compact_result.get(timeout=10) is None
-        log.info("Awaiting checkpoint result")
-        assert checkpoint_result.get(timeout=10) is None
-
-    except Exception:
-        # Log the actual failure's backtrace here, before we proceed to join threads
-        log.exception("Failure, cleaning up...")
-        raise
-    finally:
-        compact_barrier.abort()
+    # if initdb created an initial delta layer, it might already be gc'd
+    # because it was uploaded before the failpoint was enabled. however, the
+    # deletion is not guaranteed to be complete.
+    assert layer_deletes_completed() <= 1
 
-        checkpoint_thread.join()
-        compact_thread.join()
+    client.configure_failpoints(("before-upload-layer-pausable", "off"))
 
     # Ensure that this actually terminates
     wait_upload_queue_empty(client, tenant_id, timeline_id)
 
-    # We should not have hit the error handling path in uploads where the remote file is gone
+    def until_layer_deletes_completed():
+        deletes = layer_deletes_completed()
+        log.info(f"layer_deletes: {deletes}")
+        # ensure that initdb delta layer AND the previously stuck are now deleted
+        assert deletes >= len(upload_stuck_layers) + 1
+
+    wait_until(10, 1, until_layer_deletes_completed)
+
+    for name in upload_stuck_layers:
+        path = env.pageserver.timeline_dir(tenant_id, timeline_id) / name
+        assert (
+            not path.exists()
+        ), "l0 should now be removed because of L0 => L1 compaction and completed uploads"
+
+    # We should not have hit the error handling path in uploads where a uploaded file is gone
     assert not env.pageserver.log_contains(
         "File to upload doesn't exist. Likely the file has been deleted and an upload is not required any more."
     )
diff --git a/test_runner/regress/test_tenant_conf.py b/test_runner/regress/test_tenant_conf.py
index 2fdcfca67153..fcc3243e817e 100644
--- a/test_runner/regress/test_tenant_conf.py
+++ b/test_runner/regress/test_tenant_conf.py
@@ -336,10 +336,15 @@ def test_live_reconfig_get_evictions_low_residence_duration_metric_threshold(
 ):
     neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
 
-    env = neon_env_builder.init_start()
+    env = neon_env_builder.init_start(
+        initial_tenant_conf={
+            # disable compaction so that it will not download the layer for repartitioning
+            "compaction_period": "0s"
+        }
+    )
     assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
 
-    (tenant_id, timeline_id) = env.neon_cli.create_tenant()
+    (tenant_id, timeline_id) = env.initial_tenant, env.initial_timeline
     ps_http = env.pageserver.http_client()
 
     def get_metric():
diff --git a/test_runner/regress/test_tenant_delete.py b/test_runner/regress/test_tenant_delete.py
index 0dd1f9a29598..89c474286a03 100644
--- a/test_runner/regress/test_tenant_delete.py
+++ b/test_runner/regress/test_tenant_delete.py
@@ -1,3 +1,4 @@
+import concurrent.futures
 import enum
 import os
 import shutil
@@ -284,6 +285,7 @@ def test_delete_tenant_exercise_crash_safety_failpoints(
                     str(tenant_id),
                 )
             ),
+            allowed_postfix="initdb.tar.zst",
         )
 
 
@@ -474,4 +476,95 @@ def tenant_is_deleted():
             deletion.join()
 
 
-# TODO test concurrent deletions with "hang" failpoint
+def test_tenant_delete_concurrent(
+    neon_env_builder: NeonEnvBuilder,
+    pg_bin: PgBin,
+):
+    """
+    Validate that concurrent delete requests to the same tenant behave correctly:
+    exactly one should succeed.
+
+    This is a reproducer for https://github.com/neondatabase/neon/issues/5936
+    """
+    neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.MOCK_S3)
+    env = neon_env_builder.init_start(initial_tenant_conf=MANY_SMALL_LAYERS_TENANT_CONFIG)
+    ps_http = env.pageserver.http_client()
+    tenant_id = env.initial_tenant
+    timeline_id = env.initial_timeline
+
+    # Populate some data
+    with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
+        run_pg_bench_small(pg_bin, endpoint.connstr())
+        last_flush_lsn_upload(env, endpoint, tenant_id, timeline_id)
+
+    CONFLICT_MESSAGE = "Precondition failed: Invalid state Stopping. Expected Active or Broken"
+
+    env.pageserver.allowed_errors.extend(
+        [
+            # lucky race with stopping from flushing a layer we fail to schedule any uploads
+            ".*layer flush task.+: could not flush frozen layer: update_metadata_file",
+            # Errors logged from our 4xx requests
+            f".*{CONFLICT_MESSAGE}.*",
+        ]
+    )
+
+    BEFORE_REMOVE_FAILPOINT = "tenant-delete-before-map-remove"
+    BEFORE_RUN_FAILPOINT = "tenant-delete-before-run"
+
+    # We will let the initial delete run until right before it would remove
+    # the tenant's TenantSlot.  This pauses it in a state where the tenant
+    # is visible in Stopping state, and concurrent requests should fail with 4xx.
+    ps_http.configure_failpoints((BEFORE_REMOVE_FAILPOINT, "pause"))
+
+    def delete_tenant():
+        return ps_http.tenant_delete(tenant_id)
+
+    def hit_remove_failpoint():
+        assert env.pageserver.log_contains(f"at failpoint {BEFORE_REMOVE_FAILPOINT}")
+
+    def hit_run_failpoint():
+        assert env.pageserver.log_contains(f"at failpoint {BEFORE_RUN_FAILPOINT}")
+
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        background_200_req = executor.submit(delete_tenant)
+        assert background_200_req.result(timeout=10).status_code == 202
+
+        # Wait until the first request completes its work and is blocked on removing
+        # the TenantSlot from tenant manager.
+        wait_until(100, 0.1, hit_remove_failpoint)
+
+        # Start another request: this should fail when it sees a tenant in Stopping state
+        with pytest.raises(PageserverApiException, match=CONFLICT_MESSAGE):
+            ps_http.tenant_delete(tenant_id)
+
+        # Start another background request, which will pause after acquiring a TenantSlotGuard
+        # but before completing.
+        ps_http.configure_failpoints((BEFORE_RUN_FAILPOINT, "pause"))
+        background_4xx_req = executor.submit(delete_tenant)
+        wait_until(100, 0.1, hit_run_failpoint)
+
+        # The TenantSlot is still present while the original request is hung before
+        # final removal
+        assert ps_http.get_metric_value("pageserver_tenant_manager_slots") == 1
+
+        # Permit the original request to run to success
+        ps_http.configure_failpoints((BEFORE_REMOVE_FAILPOINT, "off"))
+
+        # Permit the duplicate background request to run to completion and fail.
+        ps_http.configure_failpoints((BEFORE_RUN_FAILPOINT, "off"))
+        with pytest.raises(PageserverApiException, match=CONFLICT_MESSAGE):
+            background_4xx_req.result(timeout=10)
+
+    # Physical deletion should have happened
+    assert_prefix_empty(
+        neon_env_builder,
+        prefix="/".join(
+            (
+                "tenants",
+                str(tenant_id),
+            )
+        ),
+    )
+
+    # Zero tenants remain (we deleted the default tenant)
+    assert ps_http.get_metric_value("pageserver_tenant_manager_slots") == 0
diff --git a/test_runner/regress/test_tenants.py b/test_runner/regress/test_tenants.py
index 090d58672161..c81be4153007 100644
--- a/test_runner/regress/test_tenants.py
+++ b/test_runner/regress/test_tenants.py
@@ -290,10 +290,12 @@ def test_pageserver_with_empty_tenants(
 
     env = neon_env_builder.init_start()
 
-    env.pageserver.allowed_errors.append(
-        ".*marking .* as locally complete, while it doesnt exist in remote index.*"
+    env.pageserver.allowed_errors.extend(
+        [
+            ".*marking .* as locally complete, while it doesnt exist in remote index.*",
+            ".*load failed.*list timelines directory.*",
+        ]
     )
-    env.pageserver.allowed_errors.append(".*load failed.*list timelines directory.*")
 
     client = env.pageserver.http_client()
 
diff --git a/test_runner/regress/test_timeline_delete.py b/test_runner/regress/test_timeline_delete.py
index 2e1fcd38fe07..b1a2755394e3 100644
--- a/test_runner/regress/test_timeline_delete.py
+++ b/test_runner/regress/test_timeline_delete.py
@@ -308,8 +308,10 @@ def test_delete_timeline_exercise_crash_safety_failpoints(
         )
 
     timeline_dir = env.pageserver.timeline_dir(env.initial_tenant, timeline_id)
+
     # Check local is empty
-    assert not timeline_dir.exists()
+    assert (not timeline_dir.exists()) or len(os.listdir(timeline_dir)) == 0
+
     # Check no delete mark present
     assert not (timeline_dir.parent / f"{timeline_id}.___deleted").exists()
 
diff --git a/test_runner/regress/test_timeline_size.py b/test_runner/regress/test_timeline_size.py
index eb983488231b..24cbe344572e 100644
--- a/test_runner/regress/test_timeline_size.py
+++ b/test_runner/regress/test_timeline_size.py
@@ -146,6 +146,72 @@ def wait_for_pageserver_catchup(endpoint_main: Endpoint, polling_interval=1, tim
         time.sleep(polling_interval)
 
 
+def test_timeline_size_quota_on_startup(neon_env_builder: NeonEnvBuilder):
+    env = neon_env_builder.init_start()
+    client = env.pageserver.http_client()
+    new_timeline_id = env.neon_cli.create_branch("test_timeline_size_quota_on_startup")
+
+    wait_for_timeline_size_init(client, tenant=env.initial_tenant, timeline=new_timeline_id)
+
+    endpoint_main = env.endpoints.create(
+        "test_timeline_size_quota_on_startup",
+        # Set small limit for the test
+        config_lines=["neon.max_cluster_size=30MB"],
+    )
+    endpoint_main.start()
+
+    log.info("postgres is running on 'test_timeline_size_quota_on_startup' branch")
+
+    with closing(endpoint_main.connect()) as conn:
+        with conn.cursor() as cur:
+            cur.execute("CREATE TABLE foo (t text)")
+
+            # Insert many rows. This query must fail because of space limit
+            try:
+                for _i in range(5000):
+                    cur.execute(
+                        """
+                        INSERT INTO foo
+                            SELECT 'long string to consume some space' || g
+                            FROM generate_series(1, 100) g
+                    """
+                    )
+
+                # If we get here, the timeline size limit failed
+                log.error("Query unexpectedly succeeded")
+                raise AssertionError()
+
+            except psycopg2.errors.DiskFull as err:
+                log.info(f"Query expectedly failed with: {err}")
+
+    # Restart endpoint that reached the limit to ensure that it doesn't fail on startup
+    # i.e. the size limit is not enforced during startup.
+    endpoint_main.stop()
+    # don't skip pg_catalog updates - it runs CREATE EXTENSION neon
+    # which is needed for neon.pg_cluster_size() to work
+    endpoint_main.respec(skip_pg_catalog_updates=False)
+    endpoint_main.start()
+
+    # ensure that the limit is enforced after startup
+    with closing(endpoint_main.connect()) as conn:
+        with conn.cursor() as cur:
+            # This query must fail because of space limit
+            try:
+                cur.execute(
+                    """
+                    INSERT INTO foo
+                        SELECT 'long string to consume some space' || g
+                        FROM generate_series(1, 100000) g
+                """
+                )
+                # If we get here, the timeline size limit failed
+                log.error("Query unexpectedly succeeded")
+                raise AssertionError()
+
+            except psycopg2.errors.DiskFull as err:
+                log.info(f"Query expectedly failed with: {err}")
+
+
 def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
     env = neon_env_builder.init_start()
     client = env.pageserver.http_client()
diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py
index 05c60eb102dd..b7eaaf39bc37 100644
--- a/test_runner/regress/test_wal_acceptor.py
+++ b/test_runner/regress/test_wal_acceptor.py
@@ -1,6 +1,5 @@
 import filecmp
 import os
-import pathlib
 import random
 import shutil
 import signal
@@ -639,7 +638,7 @@ class ProposerPostgres(PgProtocol):
     def __init__(
         self,
         pgdata_dir: str,
-        pg_bin,
+        pg_bin: PgBin,
         tenant_id: TenantId,
         timeline_id: TimelineId,
         listen_addr: str,
@@ -665,7 +664,7 @@ def config_file_path(self) -> str:
     def create_dir_config(self, safekeepers: str):
         """Create dir and config for running --sync-safekeepers"""
 
-        pathlib.Path(self.pg_data_dir_path()).mkdir(exist_ok=True)
+        Path(self.pg_data_dir_path()).mkdir(exist_ok=True)
         with open(self.config_file_path(), "w") as f:
             cfg = [
                 "synchronous_standby_names = 'walproposer'\n",
@@ -691,7 +690,7 @@ def sync_safekeepers(self) -> Lsn:
             "PGDATA": self.pg_data_dir_path(),
         }
 
-        basepath = self.pg_bin.run_capture(command, env)
+        basepath = self.pg_bin.run_capture(command, env, with_command_header=False)
 
         log.info(f"postgres --sync-safekeepers output: {basepath}")
 
diff --git a/test_runner/regress/test_wal_restore.py b/test_runner/regress/test_wal_restore.py
index b039b3625578..4a9ffeee4b70 100644
--- a/test_runner/regress/test_wal_restore.py
+++ b/test_runner/regress/test_wal_restore.py
@@ -1,6 +1,7 @@
 import sys
 import tarfile
 import tempfile
+import time
 from pathlib import Path
 
 import pytest
@@ -125,3 +126,43 @@ def test_wal_restore_initdb(
         )
         log.info(f"original lsn: {original_lsn}, restored lsn: {restored_lsn}")
         assert restored.safe_psql("select count(*) from t", user="cloud_admin") == [(300000,)]
+
+
+def test_wal_restore_http(
+    neon_env_builder: NeonEnvBuilder,
+    test_output_dir: Path,
+):
+    env = neon_env_builder.init_start()
+    endpoint = env.endpoints.create_start("main")
+    endpoint.safe_psql("create table t as select generate_series(1,300000)")
+    tenant_id = env.initial_tenant
+    timeline_id = env.initial_timeline
+
+    ps_client = env.pageserver.http_client()
+
+    # shut down the endpoint and delete the timeline from the pageserver
+    endpoint.stop()
+
+    assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
+
+    test_output_dir / "initdb.tar.zst"
+
+    (env.pageserver_remote_storage.timeline_path(tenant_id, timeline_id) / "initdb.tar.zst")
+
+    ps_client.timeline_delete(tenant_id, timeline_id)
+    time.sleep(2)
+
+    # verify that it is indeed deleted
+    # TODO
+
+    # issue the restoration command
+    ps_client.timeline_create(
+        tenant_id=tenant_id,
+        new_timeline_id=timeline_id,
+        existing_initdb_timeline_id=timeline_id,
+        pg_version=env.pg_version,
+    )
+
+    # the table is back now!
+    restored = env.endpoints.create_start("main")
+    assert restored.safe_psql("select count(*) from t", user="cloud_admin") == [(300000,)]
diff --git a/vendor/postgres-v16 b/vendor/postgres-v16
index 763000f1d087..e3a22b729220 160000
--- a/vendor/postgres-v16
+++ b/vendor/postgres-v16
@@ -1 +1 @@
-Subproject commit 763000f1d0873b827829c41f2f6f799ffc0de55c
+Subproject commit e3a22b72922055f9212eca12700190f118578362
diff --git a/vendor/revisions.json b/vendor/revisions.json
index 377357e13178..c4cea208eeb0 100644
--- a/vendor/revisions.json
+++ b/vendor/revisions.json
@@ -1,5 +1,5 @@
 {
-    "postgres-v16": "763000f1d0873b827829c41f2f6f799ffc0de55c",
+    "postgres-v16": "e3a22b72922055f9212eca12700190f118578362",
     "postgres-v15": "bc88f539312fcc4bb292ce94ae9db09ab6656e8a",
     "postgres-v14": "dd067cf656f6810a25aca6025633d32d02c5085a"
 }
diff --git a/vm-image-spec.yaml b/vm-image-spec.yaml
index 2aa935fac675..6f0ebe5f665b 100644
--- a/vm-image-spec.yaml
+++ b/vm-image-spec.yaml
@@ -13,6 +13,10 @@ commands:
     user: nobody
     sysvInitAction: respawn
     shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres" /bin/postgres_exporter'
+  - name: sql-exporter
+    user: nobody
+    sysvInitAction: respawn
+    shell: '/bin/sql_exporter -config.file=/etc/sql_exporter.yml'
 shutdownHook: |
   su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10'
 files:
@@ -46,6 +50,77 @@ files:
           }
           memory {}
       }
+  - filename: sql_exporter.yml
+    content: |
+      # Configuration for sql_exporter
+      # Global defaults.
+      global:
+        # If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s.
+        scrape_timeout: 10s
+        # Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first.
+        scrape_timeout_offset: 500ms
+        # Minimum interval between collector runs: by default (0s) collectors are executed on every scrape.
+        min_interval: 0s
+        # Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections,
+        # as will concurrent scrapes.
+        max_connections: 1
+        # Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should
+        # always be the same as max_connections.
+        max_idle_connections: 1
+        # Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse.
+        # If 0, connections are not closed due to a connection's age.
+        max_connection_lifetime: 5m
+
+      # The target to monitor and the collectors to execute on it.
+      target:
+        # Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)
+        # the schema gets dropped or replaced to match the driver expected DSN format.
+        data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable'
+
+        # Collectors (referenced by name) to execute on the target.
+        # Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
+        collectors: [neon_collector]
+
+      # Collector files specifies a list of globs. One collector definition is read from each matching file.
+      # Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
+      collector_files:
+        - "neon_collector.yml"
+  - filename: neon_collector.yml
+    content: |
+      collector_name: neon_collector
+      metrics:
+      - metric_name: lfc_misses
+        type: gauge
+        help: 'lfc_misses'
+        key_labels:
+        values: [lfc_misses]
+        query: |
+          select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses';
+
+      - metric_name: lfc_used
+        type: gauge
+        help: 'lfc_used'
+        key_labels:
+        values: [lfc_used]
+        query: |
+          select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used';
+
+      - metric_name: lfc_hits
+        type: gauge
+        help: 'lfc_hits'
+        key_labels:
+        values: [lfc_hits]
+        query: |
+          select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits';
+
+      - metric_name: lfc_writes
+        type: gauge
+        help: 'lfc_writes'
+        key_labels:
+        values: [lfc_writes]
+        query: |
+          select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes';
+
 build: |
   # Build cgroup-tools
   #
@@ -82,17 +157,20 @@ build: |
 
   FROM quay.io/prometheuscommunity/postgres-exporter:v0.12.0 AS postgres-exporter
 
+  FROM burningalchemist/sql_exporter:0.13 AS sql-exporter
+
   # Build pgbouncer
   #
   FROM debian:bullseye-slim AS pgbouncer
   RUN set -e \
       && apt-get update \
       && apt-get install -y \
-          curl \
           build-essential \
-          pkg-config \
+          curl \
           libevent-dev \
-          libssl-dev
+          libssl-dev \
+          patchutils \
+          pkg-config
 
   ENV PGBOUNCER_VERSION 1.21.0
   ENV PGBOUNCER_GITPATH 1_21_0
@@ -100,6 +178,7 @@ build: |
       && curl -sfSL https://github.com/pgbouncer/pgbouncer/releases/download/pgbouncer_${PGBOUNCER_GITPATH}/pgbouncer-${PGBOUNCER_VERSION}.tar.gz -o pgbouncer-${PGBOUNCER_VERSION}.tar.gz \
       && tar xzvf pgbouncer-${PGBOUNCER_VERSION}.tar.gz \
       && cd pgbouncer-${PGBOUNCER_VERSION} \
+      && curl https://github.com/pgbouncer/pgbouncer/commit/a7b3c0a5f4caa9dbe92743d04cf1e28c4c05806c.patch | filterdiff --include a/src/server.c | patch -p1 \
       && LDFLAGS=-static ./configure --prefix=/usr/local/pgbouncer --without-openssl \
       && make -j $(nproc) \
       && make install
@@ -114,13 +193,19 @@ merge: |
 
   COPY cgconfig.conf /etc/cgconfig.conf
   COPY pgbouncer.ini /etc/pgbouncer.ini
+  COPY sql_exporter.yml /etc/sql_exporter.yml
+  COPY neon_collector.yml /etc/neon_collector.yml
+
   RUN set -e \
       && chown postgres:postgres /etc/pgbouncer.ini \
       && chmod 0644 /etc/pgbouncer.ini \
-      && chmod 0644 /etc/cgconfig.conf
+      && chmod 0644 /etc/cgconfig.conf \
+      && chmod 0644 /etc/sql_exporter.yml \
+      && chmod 0644 /etc/neon_collector.yml
 
   COPY --from=libcgroup-builder /libcgroup-install/bin/*  /usr/bin/
   COPY --from=libcgroup-builder /libcgroup-install/lib/*  /usr/lib/
   COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/
   COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter
+  COPY --from=sql-exporter      /bin/sql_exporter      /bin/sql_exporter
   COPY --from=pgbouncer         /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer
diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml
index 66828fb53d39..3e46731adf8e 100644
--- a/workspace_hack/Cargo.toml
+++ b/workspace_hack/Cargo.toml
@@ -14,12 +14,16 @@ publish = false
 ### BEGIN HAKARI SECTION
 [dependencies]
 anyhow = { version = "1", features = ["backtrace"] }
-aws-config = { version = "0.56", default-features = false, features = ["credentials-sso", "rustls"] }
-aws-runtime = { version = "0.56", default-features = false, features = ["event-stream"] }
-aws-sigv4 = { version = "0.56", features = ["sign-eventstream"] }
-aws-smithy-http = { version = "0.56", default-features = false, features = ["event-stream", "rt-tokio"] }
+aws-config = { version = "1", default-features = false, features = ["rustls", "sso"] }
+aws-runtime = { version = "1", default-features = false, features = ["event-stream", "sigv4a"] }
+aws-sigv4 = { version = "1", features = ["http0-compat", "sign-eventstream", "sigv4a"] }
+aws-smithy-async = { version = "1", default-features = false, features = ["rt-tokio"] }
+aws-smithy-http = { version = "0.60", default-features = false, features = ["event-stream"] }
+aws-smithy-runtime-api = { version = "1", features = ["client", "http-02x", "http-auth"] }
+aws-smithy-types = { version = "1", default-features = false, features = ["byte-stream-poll-next", "http-body-0-4-x", "rt-tokio"] }
 axum = { version = "0.6", features = ["ws"] }
 base64 = { version = "0.21", features = ["alloc"] }
+base64ct = { version = "1", default-features = false, features = ["std"] }
 bytes = { version = "1", features = ["serde"] }
 chrono = { version = "0.4", default-features = false, features = ["clock", "serde", "wasmbind"] }
 clap = { version = "4", features = ["derive", "string"] }
@@ -36,6 +40,7 @@ futures-io = { version = "0.3" }
 futures-sink = { version = "0.3" }
 futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
 hex = { version = "0.4", features = ["serde"] }
+hmac = { version = "0.12", default-features = false, features = ["reset"] }
 hyper = { version = "0.14", features = ["full"] }
 itertools = { version = "0.10" }
 libc = { version = "0.2", features = ["extra_traits"] }
@@ -56,13 +61,14 @@ scopeguard = { version = "1" }
 serde = { version = "1", features = ["alloc", "derive"] }
 serde_json = { version = "1", features = ["raw_value"] }
 smallvec = { version = "1", default-features = false, features = ["write"] }
+subtle = { version = "2" }
 time = { version = "0.3", features = ["local-offset", "macros", "serde-well-known"] }
 tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "net", "process", "rt-multi-thread", "signal", "test-util"] }
 tokio-rustls = { version = "0.24" }
 tokio-util = { version = "0.7", features = ["codec", "io"] }
 toml_datetime = { version = "0.6", default-features = false, features = ["serde"] }
 toml_edit = { version = "0.19", features = ["serde"] }
-tower = { version = "0.4", features = ["balance", "buffer", "limit", "retry", "timeout", "util"] }
+tower = { version = "0.4", default-features = false, features = ["balance", "buffer", "limit", "log", "timeout", "util"] }
 tracing = { version = "0.1", features = ["log"] }
 tracing-core = { version = "0.1" }
 tungstenite = { version = "0.20" }